/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 515 - (show annotations)
Tue May 4 09:12:25 2010 UTC (5 years, 2 months ago) by ph10
File MIME type: text/plain
File size: 70154 byte(s)
Error occurred while calculating annotation data.
Avoid warnings about fwrite in pcregrep.
1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2010 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65
66 #include "pcre.h"
67
68 #define FALSE 0
69 #define TRUE 1
70
71 typedef int BOOL;
72
73 #define MAX_PATTERN_COUNT 100
74 #define OFFSET_SIZE 99
75
76 #if BUFSIZ > 8192
77 #define MBUFTHIRD BUFSIZ
78 #else
79 #define MBUFTHIRD 8192
80 #endif
81
82 /* Values for the "filenames" variable, which specifies options for file name
83 output. The order is important; it is assumed that a file name is wanted for
84 all values greater than FN_DEFAULT. */
85
86 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87
88 /* File reading styles */
89
90 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91
92 /* Actions for the -d and -D options */
93
94 enum { dee_READ, dee_SKIP, dee_RECURSE };
95 enum { DEE_READ, DEE_SKIP };
96
97 /* Actions for special processing options (flag bits) */
98
99 #define PO_WORD_MATCH 0x0001
100 #define PO_LINE_MATCH 0x0002
101 #define PO_FIXED_STRINGS 0x0004
102
103 /* Line ending types */
104
105 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106
107 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108 environments), a warning is issued if the value of fwrite() is ignored.
109 Unfortunately, casting to (void) does not suppress the warning. To get round
110 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 apply to fprintf(). */
112
113 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114
115
116
117 /*************************************************
118 * Global variables *
119 *************************************************/
120
121 /* Jeffrey Friedl has some debugging requirements that are not part of the
122 regular code. */
123
124 #ifdef JFRIEDL_DEBUG
125 static int S_arg = -1;
126 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128 static const char *jfriedl_prefix = "";
129 static const char *jfriedl_postfix = "";
130 #endif
131
132 static int endlinetype;
133
134 static char *colour_string = (char *)"1;31";
135 static char *colour_option = NULL;
136 static char *dee_option = NULL;
137 static char *DEE_option = NULL;
138 static char *newline = NULL;
139 static char *pattern_filename = NULL;
140 static char *stdin_name = (char *)"(standard input)";
141 static char *locale = NULL;
142
143 static const unsigned char *pcretables = NULL;
144
145 static int pattern_count = 0;
146 static pcre **pattern_list = NULL;
147 static pcre_extra **hints_list = NULL;
148
149 static char *include_pattern = NULL;
150 static char *exclude_pattern = NULL;
151 static char *include_dir_pattern = NULL;
152 static char *exclude_dir_pattern = NULL;
153
154 static pcre *include_compiled = NULL;
155 static pcre *exclude_compiled = NULL;
156 static pcre *include_dir_compiled = NULL;
157 static pcre *exclude_dir_compiled = NULL;
158
159 static int after_context = 0;
160 static int before_context = 0;
161 static int both_context = 0;
162 static int dee_action = dee_READ;
163 static int DEE_action = DEE_READ;
164 static int error_count = 0;
165 static int filenames = FN_DEFAULT;
166 static int process_options = 0;
167
168 static BOOL count_only = FALSE;
169 static BOOL do_colour = FALSE;
170 static BOOL file_offsets = FALSE;
171 static BOOL hyphenpending = FALSE;
172 static BOOL invert = FALSE;
173 static BOOL line_offsets = FALSE;
174 static BOOL multiline = FALSE;
175 static BOOL number = FALSE;
176 static BOOL omit_zero_count = FALSE;
177 static BOOL only_matching = FALSE;
178 static BOOL quiet = FALSE;
179 static BOOL silent = FALSE;
180 static BOOL utf8 = FALSE;
181
182 /* Structure for options and list of them */
183
184 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
185 OP_PATLIST };
186
187 typedef struct option_item {
188 int type;
189 int one_char;
190 void *dataptr;
191 const char *long_name;
192 const char *help_text;
193 } option_item;
194
195 /* Options without a single-letter equivalent get a negative value. This can be
196 used to identify them. */
197
198 #define N_COLOUR (-1)
199 #define N_EXCLUDE (-2)
200 #define N_EXCLUDE_DIR (-3)
201 #define N_HELP (-4)
202 #define N_INCLUDE (-5)
203 #define N_INCLUDE_DIR (-6)
204 #define N_LABEL (-7)
205 #define N_LOCALE (-8)
206 #define N_NULL (-9)
207 #define N_LOFFSETS (-10)
208 #define N_FOFFSETS (-11)
209
210 static option_item optionlist[] = {
211 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
212 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
213 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
214 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
215 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
216 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
217 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
218 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
219 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
220 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
221 { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
222 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
223 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
224 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
225 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
226 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
227 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
228 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
229 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
230 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
231 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
232 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
233 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
234 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
235 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
236 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
237 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
238 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
239 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
240 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
241 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
242 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
243 #ifdef JFRIEDL_DEBUG
244 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
245 #endif
246 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
247 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
248 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
249 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
250 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
251 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
252 { OP_NODATA, 0, NULL, NULL, NULL }
253 };
254
255 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
256 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
257 that the combination of -w and -x has the same effect as -x on its own, so we
258 can treat them as the same. */
259
260 static const char *prefix[] = {
261 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
262
263 static const char *suffix[] = {
264 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
265
266 /* UTF-8 tables - used only when the newline setting is "any". */
267
268 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
269
270 const char utf8_table4[] = {
271 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
272 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
273 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
274 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
275
276
277
278 /*************************************************
279 * OS-specific functions *
280 *************************************************/
281
282 /* These functions are defined so that they can be made system specific,
283 although at present the only ones are for Unix, Win32, and for "no support". */
284
285
286 /************* Directory scanning in Unix ***********/
287
288 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
289 #include <sys/types.h>
290 #include <sys/stat.h>
291 #include <dirent.h>
292
293 typedef DIR directory_type;
294
295 static int
296 isdirectory(char *filename)
297 {
298 struct stat statbuf;
299 if (stat(filename, &statbuf) < 0)
300 return 0; /* In the expectation that opening as a file will fail */
301 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
302 }
303
304 static directory_type *
305 opendirectory(char *filename)
306 {
307 return opendir(filename);
308 }
309
310 static char *
311 readdirectory(directory_type *dir)
312 {
313 for (;;)
314 {
315 struct dirent *dent = readdir(dir);
316 if (dent == NULL) return NULL;
317 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
318 return dent->d_name;
319 }
320 /* Control never reaches here */
321 }
322
323 static void
324 closedirectory(directory_type *dir)
325 {
326 closedir(dir);
327 }
328
329
330 /************* Test for regular file in Unix **********/
331
332 static int
333 isregfile(char *filename)
334 {
335 struct stat statbuf;
336 if (stat(filename, &statbuf) < 0)
337 return 1; /* In the expectation that opening as a file will fail */
338 return (statbuf.st_mode & S_IFMT) == S_IFREG;
339 }
340
341
342 /************* Test stdout for being a terminal in Unix **********/
343
344 static BOOL
345 is_stdout_tty(void)
346 {
347 return isatty(fileno(stdout));
348 }
349
350
351 /************* Directory scanning in Win32 ***********/
352
353 /* I (Philip Hazel) have no means of testing this code. It was contributed by
354 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
355 when it did not exist. David Byron added a patch that moved the #include of
356 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
357 */
358
359 #elif HAVE_WINDOWS_H
360
361 #ifndef STRICT
362 # define STRICT
363 #endif
364 #ifndef WIN32_LEAN_AND_MEAN
365 # define WIN32_LEAN_AND_MEAN
366 #endif
367
368 #include <windows.h>
369
370 #ifndef INVALID_FILE_ATTRIBUTES
371 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
372 #endif
373
374 typedef struct directory_type
375 {
376 HANDLE handle;
377 BOOL first;
378 WIN32_FIND_DATA data;
379 } directory_type;
380
381 int
382 isdirectory(char *filename)
383 {
384 DWORD attr = GetFileAttributes(filename);
385 if (attr == INVALID_FILE_ATTRIBUTES)
386 return 0;
387 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
388 }
389
390 directory_type *
391 opendirectory(char *filename)
392 {
393 size_t len;
394 char *pattern;
395 directory_type *dir;
396 DWORD err;
397 len = strlen(filename);
398 pattern = (char *) malloc(len + 3);
399 dir = (directory_type *) malloc(sizeof(*dir));
400 if ((pattern == NULL) || (dir == NULL))
401 {
402 fprintf(stderr, "pcregrep: malloc failed\n");
403 exit(2);
404 }
405 memcpy(pattern, filename, len);
406 memcpy(&(pattern[len]), "\\*", 3);
407 dir->handle = FindFirstFile(pattern, &(dir->data));
408 if (dir->handle != INVALID_HANDLE_VALUE)
409 {
410 free(pattern);
411 dir->first = TRUE;
412 return dir;
413 }
414 err = GetLastError();
415 free(pattern);
416 free(dir);
417 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
418 return NULL;
419 }
420
421 char *
422 readdirectory(directory_type *dir)
423 {
424 for (;;)
425 {
426 if (!dir->first)
427 {
428 if (!FindNextFile(dir->handle, &(dir->data)))
429 return NULL;
430 }
431 else
432 {
433 dir->first = FALSE;
434 }
435 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
436 return dir->data.cFileName;
437 }
438 #ifndef _MSC_VER
439 return NULL; /* Keep compiler happy; never executed */
440 #endif
441 }
442
443 void
444 closedirectory(directory_type *dir)
445 {
446 FindClose(dir->handle);
447 free(dir);
448 }
449
450
451 /************* Test for regular file in Win32 **********/
452
453 /* I don't know how to do this, or if it can be done; assume all paths are
454 regular if they are not directories. */
455
456 int isregfile(char *filename)
457 {
458 return !isdirectory(filename);
459 }
460
461
462 /************* Test stdout for being a terminal in Win32 **********/
463
464 /* I don't know how to do this; assume never */
465
466 static BOOL
467 is_stdout_tty(void)
468 {
469 return FALSE;
470 }
471
472
473 /************* Directory scanning when we can't do it ***********/
474
475 /* The type is void, and apart from isdirectory(), the functions do nothing. */
476
477 #else
478
479 typedef void directory_type;
480
481 int isdirectory(char *filename) { return 0; }
482 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
483 char *readdirectory(directory_type *dir) { return (char*)0;}
484 void closedirectory(directory_type *dir) {}
485
486
487 /************* Test for regular when we can't do it **********/
488
489 /* Assume all files are regular. */
490
491 int isregfile(char *filename) { return 1; }
492
493
494 /************* Test stdout for being a terminal when we can't do it **********/
495
496 static BOOL
497 is_stdout_tty(void)
498 {
499 return FALSE;
500 }
501
502
503 #endif
504
505
506
507 #ifndef HAVE_STRERROR
508 /*************************************************
509 * Provide strerror() for non-ANSI libraries *
510 *************************************************/
511
512 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
513 in their libraries, but can provide the same facility by this simple
514 alternative function. */
515
516 extern int sys_nerr;
517 extern char *sys_errlist[];
518
519 char *
520 strerror(int n)
521 {
522 if (n < 0 || n >= sys_nerr) return "unknown error number";
523 return sys_errlist[n];
524 }
525 #endif /* HAVE_STRERROR */
526
527
528
529 /*************************************************
530 * Find end of line *
531 *************************************************/
532
533 /* The length of the endline sequence that is found is set via lenptr. This may
534 be zero at the very end of the file if there is no line-ending sequence there.
535
536 Arguments:
537 p current position in line
538 endptr end of available data
539 lenptr where to put the length of the eol sequence
540
541 Returns: pointer to the last byte of the line
542 */
543
544 static char *
545 end_of_line(char *p, char *endptr, int *lenptr)
546 {
547 switch(endlinetype)
548 {
549 default: /* Just in case */
550 case EL_LF:
551 while (p < endptr && *p != '\n') p++;
552 if (p < endptr)
553 {
554 *lenptr = 1;
555 return p + 1;
556 }
557 *lenptr = 0;
558 return endptr;
559
560 case EL_CR:
561 while (p < endptr && *p != '\r') p++;
562 if (p < endptr)
563 {
564 *lenptr = 1;
565 return p + 1;
566 }
567 *lenptr = 0;
568 return endptr;
569
570 case EL_CRLF:
571 for (;;)
572 {
573 while (p < endptr && *p != '\r') p++;
574 if (++p >= endptr)
575 {
576 *lenptr = 0;
577 return endptr;
578 }
579 if (*p == '\n')
580 {
581 *lenptr = 2;
582 return p + 1;
583 }
584 }
585 break;
586
587 case EL_ANYCRLF:
588 while (p < endptr)
589 {
590 int extra = 0;
591 register int c = *((unsigned char *)p);
592
593 if (utf8 && c >= 0xc0)
594 {
595 int gcii, gcss;
596 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
597 gcss = 6*extra;
598 c = (c & utf8_table3[extra]) << gcss;
599 for (gcii = 1; gcii <= extra; gcii++)
600 {
601 gcss -= 6;
602 c |= (p[gcii] & 0x3f) << gcss;
603 }
604 }
605
606 p += 1 + extra;
607
608 switch (c)
609 {
610 case 0x0a: /* LF */
611 *lenptr = 1;
612 return p;
613
614 case 0x0d: /* CR */
615 if (p < endptr && *p == 0x0a)
616 {
617 *lenptr = 2;
618 p++;
619 }
620 else *lenptr = 1;
621 return p;
622
623 default:
624 break;
625 }
626 } /* End of loop for ANYCRLF case */
627
628 *lenptr = 0; /* Must have hit the end */
629 return endptr;
630
631 case EL_ANY:
632 while (p < endptr)
633 {
634 int extra = 0;
635 register int c = *((unsigned char *)p);
636
637 if (utf8 && c >= 0xc0)
638 {
639 int gcii, gcss;
640 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
641 gcss = 6*extra;
642 c = (c & utf8_table3[extra]) << gcss;
643 for (gcii = 1; gcii <= extra; gcii++)
644 {
645 gcss -= 6;
646 c |= (p[gcii] & 0x3f) << gcss;
647 }
648 }
649
650 p += 1 + extra;
651
652 switch (c)
653 {
654 case 0x0a: /* LF */
655 case 0x0b: /* VT */
656 case 0x0c: /* FF */
657 *lenptr = 1;
658 return p;
659
660 case 0x0d: /* CR */
661 if (p < endptr && *p == 0x0a)
662 {
663 *lenptr = 2;
664 p++;
665 }
666 else *lenptr = 1;
667 return p;
668
669 case 0x85: /* NEL */
670 *lenptr = utf8? 2 : 1;
671 return p;
672
673 case 0x2028: /* LS */
674 case 0x2029: /* PS */
675 *lenptr = 3;
676 return p;
677
678 default:
679 break;
680 }
681 } /* End of loop for ANY case */
682
683 *lenptr = 0; /* Must have hit the end */
684 return endptr;
685 } /* End of overall switch */
686 }
687
688
689
690 /*************************************************
691 * Find start of previous line *
692 *************************************************/
693
694 /* This is called when looking back for before lines to print.
695
696 Arguments:
697 p start of the subsequent line
698 startptr start of available data
699
700 Returns: pointer to the start of the previous line
701 */
702
703 static char *
704 previous_line(char *p, char *startptr)
705 {
706 switch(endlinetype)
707 {
708 default: /* Just in case */
709 case EL_LF:
710 p--;
711 while (p > startptr && p[-1] != '\n') p--;
712 return p;
713
714 case EL_CR:
715 p--;
716 while (p > startptr && p[-1] != '\n') p--;
717 return p;
718
719 case EL_CRLF:
720 for (;;)
721 {
722 p -= 2;
723 while (p > startptr && p[-1] != '\n') p--;
724 if (p <= startptr + 1 || p[-2] == '\r') return p;
725 }
726 return p; /* But control should never get here */
727
728 case EL_ANY:
729 case EL_ANYCRLF:
730 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
731 if (utf8) while ((*p & 0xc0) == 0x80) p--;
732
733 while (p > startptr)
734 {
735 register int c;
736 char *pp = p - 1;
737
738 if (utf8)
739 {
740 int extra = 0;
741 while ((*pp & 0xc0) == 0x80) pp--;
742 c = *((unsigned char *)pp);
743 if (c >= 0xc0)
744 {
745 int gcii, gcss;
746 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
747 gcss = 6*extra;
748 c = (c & utf8_table3[extra]) << gcss;
749 for (gcii = 1; gcii <= extra; gcii++)
750 {
751 gcss -= 6;
752 c |= (pp[gcii] & 0x3f) << gcss;
753 }
754 }
755 }
756 else c = *((unsigned char *)pp);
757
758 if (endlinetype == EL_ANYCRLF) switch (c)
759 {
760 case 0x0a: /* LF */
761 case 0x0d: /* CR */
762 return p;
763
764 default:
765 break;
766 }
767
768 else switch (c)
769 {
770 case 0x0a: /* LF */
771 case 0x0b: /* VT */
772 case 0x0c: /* FF */
773 case 0x0d: /* CR */
774 case 0x85: /* NEL */
775 case 0x2028: /* LS */
776 case 0x2029: /* PS */
777 return p;
778
779 default:
780 break;
781 }
782
783 p = pp; /* Back one character */
784 } /* End of loop for ANY case */
785
786 return startptr; /* Hit start of data */
787 } /* End of overall switch */
788 }
789
790
791
792
793
794 /*************************************************
795 * Print the previous "after" lines *
796 *************************************************/
797
798 /* This is called if we are about to lose said lines because of buffer filling,
799 and at the end of the file. The data in the line is written using fwrite() so
800 that a binary zero does not terminate it.
801
802 Arguments:
803 lastmatchnumber the number of the last matching line, plus one
804 lastmatchrestart where we restarted after the last match
805 endptr end of available data
806 printname filename for printing
807
808 Returns: nothing
809 */
810
811 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
812 char *endptr, char *printname)
813 {
814 if (after_context > 0 && lastmatchnumber > 0)
815 {
816 int count = 0;
817 while (lastmatchrestart < endptr && count++ < after_context)
818 {
819 int ellength;
820 char *pp = lastmatchrestart;
821 if (printname != NULL) fprintf(stdout, "%s-", printname);
822 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
823 pp = end_of_line(pp, endptr, &ellength);
824 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
825 lastmatchrestart = pp;
826 }
827 hyphenpending = TRUE;
828 }
829 }
830
831
832
833 /*************************************************
834 * Apply patterns to subject till one matches *
835 *************************************************/
836
837 /* This function is called to run through all patterns, looking for a match. It
838 is used multiple times for the same subject when colouring is enabled, in order
839 to find all possible matches.
840
841 Arguments:
842 matchptr the start of the subject
843 length the length of the subject to match
844 offsets the offets vector to fill in
845 mrc address of where to put the result of pcre_exec()
846
847 Returns: TRUE if there was a match
848 FALSE if there was no match
849 invert if there was a non-fatal error
850 */
851
852 static BOOL
853 match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
854 {
855 int i;
856 for (i = 0; i < pattern_count; i++)
857 {
858 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0,
859 PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
860 if (*mrc >= 0) return TRUE;
861 if (*mrc == PCRE_ERROR_NOMATCH) continue;
862 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
863 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
864 fprintf(stderr, "this text:\n");
865 FWRITE(matchptr, 1, length, stderr); /* In case binary zero included */
866 fprintf(stderr, "\n");
867 if (error_count == 0 &&
868 (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
869 {
870 fprintf(stderr, "pcregrep: error %d means that a resource limit "
871 "was exceeded\n", *mrc);
872 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
873 }
874 if (error_count++ > 20)
875 {
876 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
877 exit(2);
878 }
879 return invert; /* No more matching; don't show the line again */
880 }
881
882 return FALSE; /* No match, no errors */
883 }
884
885
886
887 /*************************************************
888 * Grep an individual file *
889 *************************************************/
890
891 /* This is called from grep_or_recurse() below. It uses a buffer that is three
892 times the value of MBUFTHIRD. The matching point is never allowed to stray into
893 the top third of the buffer, thus keeping more of the file available for
894 context printing or for multiline scanning. For large files, the pointer will
895 be in the middle third most of the time, so the bottom third is available for
896 "before" context printing.
897
898 Arguments:
899 handle the fopened FILE stream for a normal file
900 the gzFile pointer when reading is via libz
901 the BZFILE pointer when reading is via libbz2
902 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
903 printname the file name if it is to be printed for each match
904 or NULL if the file name is not to be printed
905 it cannot be NULL if filenames[_nomatch]_only is set
906
907 Returns: 0 if there was at least one match
908 1 otherwise (no matches)
909 2 if there is a read error on a .bz2 file
910 */
911
912 static int
913 pcregrep(void *handle, int frtype, char *printname)
914 {
915 int rc = 1;
916 int linenumber = 1;
917 int lastmatchnumber = 0;
918 int count = 0;
919 int filepos = 0;
920 int offsets[OFFSET_SIZE];
921 char *lastmatchrestart = NULL;
922 char buffer[3*MBUFTHIRD];
923 char *ptr = buffer;
924 char *endptr;
925 size_t bufflength;
926 BOOL endhyphenpending = FALSE;
927 FILE *in = NULL; /* Ensure initialized */
928
929 #ifdef SUPPORT_LIBZ
930 gzFile ingz = NULL;
931 #endif
932
933 #ifdef SUPPORT_LIBBZ2
934 BZFILE *inbz2 = NULL;
935 #endif
936
937
938 /* Do the first read into the start of the buffer and set up the pointer to end
939 of what we have. In the case of libz, a non-zipped .gz file will be read as a
940 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
941 fail. */
942
943 #ifdef SUPPORT_LIBZ
944 if (frtype == FR_LIBZ)
945 {
946 ingz = (gzFile)handle;
947 bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
948 }
949 else
950 #endif
951
952 #ifdef SUPPORT_LIBBZ2
953 if (frtype == FR_LIBBZ2)
954 {
955 inbz2 = (BZFILE *)handle;
956 bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
957 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
958 } /* without the cast it is unsigned. */
959 else
960 #endif
961
962 {
963 in = (FILE *)handle;
964 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
965 }
966
967 endptr = buffer + bufflength;
968
969 /* Loop while the current pointer is not at the end of the file. For large
970 files, endptr will be at the end of the buffer when we are in the middle of the
971 file, but ptr will never get there, because as soon as it gets over 2/3 of the
972 way, the buffer is shifted left and re-filled. */
973
974 while (ptr < endptr)
975 {
976 int endlinelength;
977 int mrc = 0;
978 BOOL match;
979 char *matchptr = ptr;
980 char *t = ptr;
981 size_t length, linelength;
982
983 /* At this point, ptr is at the start of a line. We need to find the length
984 of the subject string to pass to pcre_exec(). In multiline mode, it is the
985 length remainder of the data in the buffer. Otherwise, it is the length of
986 the next line, excluding the terminating newline. After matching, we always
987 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
988 option is used for compiling, so that any match is constrained to be in the
989 first line. */
990
991 t = end_of_line(t, endptr, &endlinelength);
992 linelength = t - ptr - endlinelength;
993 length = multiline? (size_t)(endptr - ptr) : linelength;
994
995 /* Extra processing for Jeffrey Friedl's debugging. */
996
997 #ifdef JFRIEDL_DEBUG
998 if (jfriedl_XT || jfriedl_XR)
999 {
1000 #include <sys/time.h>
1001 #include <time.h>
1002 struct timeval start_time, end_time;
1003 struct timezone dummy;
1004 int i;
1005
1006 if (jfriedl_XT)
1007 {
1008 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1009 const char *orig = ptr;
1010 ptr = malloc(newlen + 1);
1011 if (!ptr) {
1012 printf("out of memory");
1013 exit(2);
1014 }
1015 endptr = ptr;
1016 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1017 for (i = 0; i < jfriedl_XT; i++) {
1018 strncpy(endptr, orig, length);
1019 endptr += length;
1020 }
1021 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1022 length = newlen;
1023 }
1024
1025 if (gettimeofday(&start_time, &dummy) != 0)
1026 perror("bad gettimeofday");
1027
1028
1029 for (i = 0; i < jfriedl_XR; i++)
1030 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1031 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1032
1033 if (gettimeofday(&end_time, &dummy) != 0)
1034 perror("bad gettimeofday");
1035
1036 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1037 -
1038 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1039
1040 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1041 return 0;
1042 }
1043 #endif
1044
1045 /* We come back here after a match when the -o option (only_matching) is set,
1046 in order to find any further matches in the same line. */
1047
1048 ONLY_MATCHING_RESTART:
1049
1050 /* Run through all the patterns until one matches or there is an error other
1051 than NOMATCH. This code is in a subroutine so that it can be re-used for
1052 finding subsequent matches when colouring matched lines. */
1053
1054 match = match_patterns(matchptr, length, offsets, &mrc);
1055
1056 /* If it's a match or a not-match (as required), do what's wanted. */
1057
1058 if (match != invert)
1059 {
1060 BOOL hyphenprinted = FALSE;
1061
1062 /* We've failed if we want a file that doesn't have any matches. */
1063
1064 if (filenames == FN_NOMATCH_ONLY) return 1;
1065
1066 /* Just count if just counting is wanted. */
1067
1068 if (count_only) count++;
1069
1070 /* If all we want is a file name, there is no need to scan any more lines
1071 in the file. */
1072
1073 else if (filenames == FN_MATCH_ONLY)
1074 {
1075 fprintf(stdout, "%s\n", printname);
1076 return 0;
1077 }
1078
1079 /* Likewise, if all we want is a yes/no answer. */
1080
1081 else if (quiet) return 0;
1082
1083 /* The --only-matching option prints just the substring that matched, and
1084 the --file-offsets and --line-offsets options output offsets for the
1085 matching substring (they both force --only-matching). None of these options
1086 prints any context. Afterwards, adjust the start and length, and then jump
1087 back to look for further matches in the same line. If we are in invert
1088 mode, however, nothing is printed - this could be still useful because the
1089 return code is set. */
1090
1091 else if (only_matching)
1092 {
1093 if (!invert)
1094 {
1095 if (printname != NULL) fprintf(stdout, "%s:", printname);
1096 if (number) fprintf(stdout, "%d:", linenumber);
1097 if (line_offsets)
1098 fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1099 offsets[1] - offsets[0]);
1100 else if (file_offsets)
1101 fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1102 offsets[1] - offsets[0]);
1103 else
1104 {
1105 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1106 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1107 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1108 }
1109 fprintf(stdout, "\n");
1110 matchptr += offsets[1];
1111 length -= offsets[1];
1112 match = FALSE;
1113 goto ONLY_MATCHING_RESTART;
1114 }
1115 }
1116
1117 /* This is the default case when none of the above options is set. We print
1118 the matching lines(s), possibly preceded and/or followed by other lines of
1119 context. */
1120
1121 else
1122 {
1123 /* See if there is a requirement to print some "after" lines from a
1124 previous match. We never print any overlaps. */
1125
1126 if (after_context > 0 && lastmatchnumber > 0)
1127 {
1128 int ellength;
1129 int linecount = 0;
1130 char *p = lastmatchrestart;
1131
1132 while (p < ptr && linecount < after_context)
1133 {
1134 p = end_of_line(p, ptr, &ellength);
1135 linecount++;
1136 }
1137
1138 /* It is important to advance lastmatchrestart during this printing so
1139 that it interacts correctly with any "before" printing below. Print
1140 each line's data using fwrite() in case there are binary zeroes. */
1141
1142 while (lastmatchrestart < p)
1143 {
1144 char *pp = lastmatchrestart;
1145 if (printname != NULL) fprintf(stdout, "%s-", printname);
1146 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1147 pp = end_of_line(pp, endptr, &ellength);
1148 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1149 lastmatchrestart = pp;
1150 }
1151 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1152 }
1153
1154 /* If there were non-contiguous lines printed above, insert hyphens. */
1155
1156 if (hyphenpending)
1157 {
1158 fprintf(stdout, "--\n");
1159 hyphenpending = FALSE;
1160 hyphenprinted = TRUE;
1161 }
1162
1163 /* See if there is a requirement to print some "before" lines for this
1164 match. Again, don't print overlaps. */
1165
1166 if (before_context > 0)
1167 {
1168 int linecount = 0;
1169 char *p = ptr;
1170
1171 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1172 linecount < before_context)
1173 {
1174 linecount++;
1175 p = previous_line(p, buffer);
1176 }
1177
1178 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1179 fprintf(stdout, "--\n");
1180
1181 while (p < ptr)
1182 {
1183 int ellength;
1184 char *pp = p;
1185 if (printname != NULL) fprintf(stdout, "%s-", printname);
1186 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1187 pp = end_of_line(pp, endptr, &ellength);
1188 FWRITE(p, 1, pp - p, stdout);
1189 p = pp;
1190 }
1191 }
1192
1193 /* Now print the matching line(s); ensure we set hyphenpending at the end
1194 of the file if any context lines are being output. */
1195
1196 if (after_context > 0 || before_context > 0)
1197 endhyphenpending = TRUE;
1198
1199 if (printname != NULL) fprintf(stdout, "%s:", printname);
1200 if (number) fprintf(stdout, "%d:", linenumber);
1201
1202 /* In multiline mode, we want to print to the end of the line in which
1203 the end of the matched string is found, so we adjust linelength and the
1204 line number appropriately, but only when there actually was a match
1205 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1206 the match will always be before the first newline sequence. */
1207
1208 if (multiline)
1209 {
1210 int ellength;
1211 char *endmatch = ptr;
1212 if (!invert)
1213 {
1214 endmatch += offsets[1];
1215 t = ptr;
1216 while (t < endmatch)
1217 {
1218 t = end_of_line(t, endptr, &ellength);
1219 if (t <= endmatch) linenumber++; else break;
1220 }
1221 }
1222 endmatch = end_of_line(endmatch, endptr, &ellength);
1223 linelength = endmatch - ptr - ellength;
1224 }
1225
1226 /*** NOTE: Use only fwrite() to output the data line, so that binary
1227 zeroes are treated as just another data character. */
1228
1229 /* This extra option, for Jeffrey Friedl's debugging requirements,
1230 replaces the matched string, or a specific captured string if it exists,
1231 with X. When this happens, colouring is ignored. */
1232
1233 #ifdef JFRIEDL_DEBUG
1234 if (S_arg >= 0 && S_arg < mrc)
1235 {
1236 int first = S_arg * 2;
1237 int last = first + 1;
1238 FWRITE(ptr, 1, offsets[first], stdout);
1239 fprintf(stdout, "X");
1240 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1241 }
1242 else
1243 #endif
1244
1245 /* We have to split the line(s) up if colouring, and search for further
1246 matches. */
1247
1248 if (do_colour)
1249 {
1250 int last_offset = 0;
1251 FWRITE(ptr, 1, offsets[0], stdout);
1252 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1253 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1254 fprintf(stdout, "%c[00m", 0x1b);
1255 for (;;)
1256 {
1257 last_offset += offsets[1];
1258 matchptr += offsets[1];
1259 length -= offsets[1];
1260 if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1261 FWRITE(matchptr, 1, offsets[0], stdout);
1262 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1263 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1264 fprintf(stdout, "%c[00m", 0x1b);
1265 }
1266 FWRITE(ptr + last_offset, 1,
1267 (linelength + endlinelength) - last_offset, stdout);
1268 }
1269
1270 /* Not colouring; no need to search for further matches */
1271
1272 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1273 }
1274
1275 /* End of doing what has to be done for a match */
1276
1277 rc = 0; /* Had some success */
1278
1279 /* Remember where the last match happened for after_context. We remember
1280 where we are about to restart, and that line's number. */
1281
1282 lastmatchrestart = ptr + linelength + endlinelength;
1283 lastmatchnumber = linenumber + 1;
1284 }
1285
1286 /* For a match in multiline inverted mode (which of course did not cause
1287 anything to be printed), we have to move on to the end of the match before
1288 proceeding. */
1289
1290 if (multiline && invert && match)
1291 {
1292 int ellength;
1293 char *endmatch = ptr + offsets[1];
1294 t = ptr;
1295 while (t < endmatch)
1296 {
1297 t = end_of_line(t, endptr, &ellength);
1298 if (t <= endmatch) linenumber++; else break;
1299 }
1300 endmatch = end_of_line(endmatch, endptr, &ellength);
1301 linelength = endmatch - ptr - ellength;
1302 }
1303
1304 /* Advance to after the newline and increment the line number. The file
1305 offset to the current line is maintained in filepos. */
1306
1307 ptr += linelength + endlinelength;
1308 filepos += linelength + endlinelength;
1309 linenumber++;
1310
1311 /* If we haven't yet reached the end of the file (the buffer is full), and
1312 the current point is in the top 1/3 of the buffer, slide the buffer down by
1313 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1314 about to be lost, print them. */
1315
1316 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1317 {
1318 if (after_context > 0 &&
1319 lastmatchnumber > 0 &&
1320 lastmatchrestart < buffer + MBUFTHIRD)
1321 {
1322 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1323 lastmatchnumber = 0;
1324 }
1325
1326 /* Now do the shuffle */
1327
1328 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1329 ptr -= MBUFTHIRD;
1330
1331 #ifdef SUPPORT_LIBZ
1332 if (frtype == FR_LIBZ)
1333 bufflength = 2*MBUFTHIRD +
1334 gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1335 else
1336 #endif
1337
1338 #ifdef SUPPORT_LIBBZ2
1339 if (frtype == FR_LIBBZ2)
1340 bufflength = 2*MBUFTHIRD +
1341 BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1342 else
1343 #endif
1344
1345 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1346
1347 endptr = buffer + bufflength;
1348
1349 /* Adjust any last match point */
1350
1351 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1352 }
1353 } /* Loop through the whole file */
1354
1355 /* End of file; print final "after" lines if wanted; do_after_lines sets
1356 hyphenpending if it prints something. */
1357
1358 if (!only_matching && !count_only)
1359 {
1360 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1361 hyphenpending |= endhyphenpending;
1362 }
1363
1364 /* Print the file name if we are looking for those without matches and there
1365 were none. If we found a match, we won't have got this far. */
1366
1367 if (filenames == FN_NOMATCH_ONLY)
1368 {
1369 fprintf(stdout, "%s\n", printname);
1370 return 0;
1371 }
1372
1373 /* Print the match count if wanted */
1374
1375 if (count_only)
1376 {
1377 if (count > 0 || !omit_zero_count)
1378 {
1379 if (printname != NULL && filenames != FN_NONE)
1380 fprintf(stdout, "%s:", printname);
1381 fprintf(stdout, "%d\n", count);
1382 }
1383 }
1384
1385 return rc;
1386 }
1387
1388
1389
1390 /*************************************************
1391 * Grep a file or recurse into a directory *
1392 *************************************************/
1393
1394 /* Given a path name, if it's a directory, scan all the files if we are
1395 recursing; if it's a file, grep it.
1396
1397 Arguments:
1398 pathname the path to investigate
1399 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1400 only_one_at_top TRUE if the path is the only one at toplevel
1401
1402 Returns: 0 if there was at least one match
1403 1 if there were no matches
1404 2 there was some kind of error
1405
1406 However, file opening failures are suppressed if "silent" is set.
1407 */
1408
1409 static int
1410 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1411 {
1412 int rc = 1;
1413 int sep;
1414 int frtype;
1415 int pathlen;
1416 void *handle;
1417 FILE *in = NULL; /* Ensure initialized */
1418
1419 #ifdef SUPPORT_LIBZ
1420 gzFile ingz = NULL;
1421 #endif
1422
1423 #ifdef SUPPORT_LIBBZ2
1424 BZFILE *inbz2 = NULL;
1425 #endif
1426
1427 /* If the file name is "-" we scan stdin */
1428
1429 if (strcmp(pathname, "-") == 0)
1430 {
1431 return pcregrep(stdin, FR_PLAIN,
1432 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1433 stdin_name : NULL);
1434 }
1435
1436 /* If the file is a directory, skip if skipping or if we are recursing, scan
1437 each file and directory within it, subject to any include or exclude patterns
1438 that were set. The scanning code is localized so it can be made
1439 system-specific. */
1440
1441 if ((sep = isdirectory(pathname)) != 0)
1442 {
1443 if (dee_action == dee_SKIP) return 1;
1444 if (dee_action == dee_RECURSE)
1445 {
1446 char buffer[1024];
1447 char *nextfile;
1448 directory_type *dir = opendirectory(pathname);
1449
1450 if (dir == NULL)
1451 {
1452 if (!silent)
1453 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1454 strerror(errno));
1455 return 2;
1456 }
1457
1458 while ((nextfile = readdirectory(dir)) != NULL)
1459 {
1460 int frc, nflen;
1461 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1462 nflen = strlen(nextfile);
1463
1464 if (isdirectory(buffer))
1465 {
1466 if (exclude_dir_compiled != NULL &&
1467 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1468 continue;
1469
1470 if (include_dir_compiled != NULL &&
1471 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1472 continue;
1473 }
1474 else
1475 {
1476 if (exclude_compiled != NULL &&
1477 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1478 continue;
1479
1480 if (include_compiled != NULL &&
1481 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1482 continue;
1483 }
1484
1485 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1486 if (frc > 1) rc = frc;
1487 else if (frc == 0 && rc == 1) rc = 0;
1488 }
1489
1490 closedirectory(dir);
1491 return rc;
1492 }
1493 }
1494
1495 /* If the file is not a directory and not a regular file, skip it if that's
1496 been requested. */
1497
1498 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1499
1500 /* Control reaches here if we have a regular file, or if we have a directory
1501 and recursion or skipping was not requested, or if we have anything else and
1502 skipping was not requested. The scan proceeds. If this is the first and only
1503 argument at top level, we don't show the file name, unless we are only showing
1504 the file name, or the filename was forced (-H). */
1505
1506 pathlen = strlen(pathname);
1507
1508 /* Open using zlib if it is supported and the file name ends with .gz. */
1509
1510 #ifdef SUPPORT_LIBZ
1511 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1512 {
1513 ingz = gzopen(pathname, "rb");
1514 if (ingz == NULL)
1515 {
1516 if (!silent)
1517 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1518 strerror(errno));
1519 return 2;
1520 }
1521 handle = (void *)ingz;
1522 frtype = FR_LIBZ;
1523 }
1524 else
1525 #endif
1526
1527 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1528
1529 #ifdef SUPPORT_LIBBZ2
1530 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1531 {
1532 inbz2 = BZ2_bzopen(pathname, "rb");
1533 handle = (void *)inbz2;
1534 frtype = FR_LIBBZ2;
1535 }
1536 else
1537 #endif
1538
1539 /* Otherwise use plain fopen(). The label is so that we can come back here if
1540 an attempt to read a .bz2 file indicates that it really is a plain file. */
1541
1542 #ifdef SUPPORT_LIBBZ2
1543 PLAIN_FILE:
1544 #endif
1545 {
1546 in = fopen(pathname, "rb");
1547 handle = (void *)in;
1548 frtype = FR_PLAIN;
1549 }
1550
1551 /* All the opening methods return errno when they fail. */
1552
1553 if (handle == NULL)
1554 {
1555 if (!silent)
1556 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1557 strerror(errno));
1558 return 2;
1559 }
1560
1561 /* Now grep the file */
1562
1563 rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1564 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1565
1566 /* Close in an appropriate manner. */
1567
1568 #ifdef SUPPORT_LIBZ
1569 if (frtype == FR_LIBZ)
1570 gzclose(ingz);
1571 else
1572 #endif
1573
1574 /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1575 read failed. If the error indicates that the file isn't in fact bzipped, try
1576 again as a normal file. */
1577
1578 #ifdef SUPPORT_LIBBZ2
1579 if (frtype == FR_LIBBZ2)
1580 {
1581 if (rc == 2)
1582 {
1583 int errnum;
1584 const char *err = BZ2_bzerror(inbz2, &errnum);
1585 if (errnum == BZ_DATA_ERROR_MAGIC)
1586 {
1587 BZ2_bzclose(inbz2);
1588 goto PLAIN_FILE;
1589 }
1590 else if (!silent)
1591 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1592 pathname, err);
1593 }
1594 BZ2_bzclose(inbz2);
1595 }
1596 else
1597 #endif
1598
1599 /* Normal file close */
1600
1601 fclose(in);
1602
1603 /* Pass back the yield from pcregrep(). */
1604
1605 return rc;
1606 }
1607
1608
1609
1610
1611 /*************************************************
1612 * Usage function *
1613 *************************************************/
1614
1615 static int
1616 usage(int rc)
1617 {
1618 option_item *op;
1619 fprintf(stderr, "Usage: pcregrep [-");
1620 for (op = optionlist; op->one_char != 0; op++)
1621 {
1622 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1623 }
1624 fprintf(stderr, "] [long options] [pattern] [files]\n");
1625 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1626 "options.\n");
1627 return rc;
1628 }
1629
1630
1631
1632
1633 /*************************************************
1634 * Help function *
1635 *************************************************/
1636
1637 static void
1638 help(void)
1639 {
1640 option_item *op;
1641
1642 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1643 printf("Search for PATTERN in each FILE or standard input.\n");
1644 printf("PATTERN must be present if neither -e nor -f is used.\n");
1645 printf("\"-\" can be used as a file name to mean STDIN.\n");
1646
1647 #ifdef SUPPORT_LIBZ
1648 printf("Files whose names end in .gz are read using zlib.\n");
1649 #endif
1650
1651 #ifdef SUPPORT_LIBBZ2
1652 printf("Files whose names end in .bz2 are read using bzlib2.\n");
1653 #endif
1654
1655 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1656 printf("Other files and the standard input are read as plain files.\n\n");
1657 #else
1658 printf("All files are read as plain files, without any interpretation.\n\n");
1659 #endif
1660
1661 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1662 printf("Options:\n");
1663
1664 for (op = optionlist; op->one_char != 0; op++)
1665 {
1666 int n;
1667 char s[4];
1668 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1669 n = 30 - printf(" %s --%s", s, op->long_name);
1670 if (n < 1) n = 1;
1671 printf("%.*s%s\n", n, " ", op->help_text);
1672 }
1673
1674 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1675 printf("trailing white space is removed and blank lines are ignored.\n");
1676 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1677
1678 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1679 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1680 }
1681
1682
1683
1684
1685 /*************************************************
1686 * Handle a single-letter, no data option *
1687 *************************************************/
1688
1689 static int
1690 handle_option(int letter, int options)
1691 {
1692 switch(letter)
1693 {
1694 case N_FOFFSETS: file_offsets = TRUE; break;
1695 case N_HELP: help(); exit(0);
1696 case N_LOFFSETS: line_offsets = number = TRUE; break;
1697 case 'c': count_only = TRUE; break;
1698 case 'F': process_options |= PO_FIXED_STRINGS; break;
1699 case 'H': filenames = FN_FORCE; break;
1700 case 'h': filenames = FN_NONE; break;
1701 case 'i': options |= PCRE_CASELESS; break;
1702 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1703 case 'L': filenames = FN_NOMATCH_ONLY; break;
1704 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1705 case 'n': number = TRUE; break;
1706 case 'o': only_matching = TRUE; break;
1707 case 'q': quiet = TRUE; break;
1708 case 'r': dee_action = dee_RECURSE; break;
1709 case 's': silent = TRUE; break;
1710 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1711 case 'v': invert = TRUE; break;
1712 case 'w': process_options |= PO_WORD_MATCH; break;
1713 case 'x': process_options |= PO_LINE_MATCH; break;
1714
1715 case 'V':
1716 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1717 exit(0);
1718 break;
1719
1720 default:
1721 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1722 exit(usage(2));
1723 }
1724
1725 return options;
1726 }
1727
1728
1729
1730
1731 /*************************************************
1732 * Construct printed ordinal *
1733 *************************************************/
1734
1735 /* This turns a number into "1st", "3rd", etc. */
1736
1737 static char *
1738 ordin(int n)
1739 {
1740 static char buffer[8];
1741 char *p = buffer;
1742 sprintf(p, "%d", n);
1743 while (*p != 0) p++;
1744 switch (n%10)
1745 {
1746 case 1: strcpy(p, "st"); break;
1747 case 2: strcpy(p, "nd"); break;
1748 case 3: strcpy(p, "rd"); break;
1749 default: strcpy(p, "th"); break;
1750 }
1751 return buffer;
1752 }
1753
1754
1755
1756 /*************************************************
1757 * Compile a single pattern *
1758 *************************************************/
1759
1760 /* When the -F option has been used, this is called for each substring.
1761 Otherwise it's called for each supplied pattern.
1762
1763 Arguments:
1764 pattern the pattern string
1765 options the PCRE options
1766 filename the file name, or NULL for a command-line pattern
1767 count 0 if this is the only command line pattern, or
1768 number of the command line pattern, or
1769 linenumber for a pattern from a file
1770
1771 Returns: TRUE on success, FALSE after an error
1772 */
1773
1774 static BOOL
1775 compile_single_pattern(char *pattern, int options, char *filename, int count)
1776 {
1777 char buffer[MBUFTHIRD + 16];
1778 const char *error;
1779 int errptr;
1780
1781 if (pattern_count >= MAX_PATTERN_COUNT)
1782 {
1783 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1784 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1785 return FALSE;
1786 }
1787
1788 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1789 suffix[process_options]);
1790 pattern_list[pattern_count] =
1791 pcre_compile(buffer, options, &error, &errptr, pcretables);
1792 if (pattern_list[pattern_count] != NULL)
1793 {
1794 pattern_count++;
1795 return TRUE;
1796 }
1797
1798 /* Handle compile errors */
1799
1800 errptr -= (int)strlen(prefix[process_options]);
1801 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1802
1803 if (filename == NULL)
1804 {
1805 if (count == 0)
1806 fprintf(stderr, "pcregrep: Error in command-line regex "
1807 "at offset %d: %s\n", errptr, error);
1808 else
1809 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1810 "at offset %d: %s\n", ordin(count), errptr, error);
1811 }
1812 else
1813 {
1814 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1815 "at offset %d: %s\n", count, filename, errptr, error);
1816 }
1817
1818 return FALSE;
1819 }
1820
1821
1822
1823 /*************************************************
1824 * Compile one supplied pattern *
1825 *************************************************/
1826
1827 /* When the -F option has been used, each string may be a list of strings,
1828 separated by line breaks. They will be matched literally.
1829
1830 Arguments:
1831 pattern the pattern string
1832 options the PCRE options
1833 filename the file name, or NULL for a command-line pattern
1834 count 0 if this is the only command line pattern, or
1835 number of the command line pattern, or
1836 linenumber for a pattern from a file
1837
1838 Returns: TRUE on success, FALSE after an error
1839 */
1840
1841 static BOOL
1842 compile_pattern(char *pattern, int options, char *filename, int count)
1843 {
1844 if ((process_options & PO_FIXED_STRINGS) != 0)
1845 {
1846 char *eop = pattern + strlen(pattern);
1847 char buffer[MBUFTHIRD];
1848 for(;;)
1849 {
1850 int ellength;
1851 char *p = end_of_line(pattern, eop, &ellength);
1852 if (ellength == 0)
1853 return compile_single_pattern(pattern, options, filename, count);
1854 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1855 pattern = p;
1856 if (!compile_single_pattern(buffer, options, filename, count))
1857 return FALSE;
1858 }
1859 }
1860 else return compile_single_pattern(pattern, options, filename, count);
1861 }
1862
1863
1864
1865 /*************************************************
1866 * Main program *
1867 *************************************************/
1868
1869 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1870
1871 int
1872 main(int argc, char **argv)
1873 {
1874 int i, j;
1875 int rc = 1;
1876 int pcre_options = 0;
1877 int cmd_pattern_count = 0;
1878 int hint_count = 0;
1879 int errptr;
1880 BOOL only_one_at_top;
1881 char *patterns[MAX_PATTERN_COUNT];
1882 const char *locale_from = "--locale";
1883 const char *error;
1884
1885 /* Set the default line ending value from the default in the PCRE library;
1886 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1887 Note that the return values from pcre_config(), though derived from the ASCII
1888 codes, are the same in EBCDIC environments, so we must use the actual values
1889 rather than escapes such as as '\r'. */
1890
1891 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1892 switch(i)
1893 {
1894 default: newline = (char *)"lf"; break;
1895 case 13: newline = (char *)"cr"; break;
1896 case (13 << 8) | 10: newline = (char *)"crlf"; break;
1897 case -1: newline = (char *)"any"; break;
1898 case -2: newline = (char *)"anycrlf"; break;
1899 }
1900
1901 /* Process the options */
1902
1903 for (i = 1; i < argc; i++)
1904 {
1905 option_item *op = NULL;
1906 char *option_data = (char *)""; /* default to keep compiler happy */
1907 BOOL longop;
1908 BOOL longopwasequals = FALSE;
1909
1910 if (argv[i][0] != '-') break;
1911
1912 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1913 but only if we have previously had -e or -f to define the patterns. */
1914
1915 if (argv[i][1] == 0)
1916 {
1917 if (pattern_filename != NULL || pattern_count > 0) break;
1918 else exit(usage(2));
1919 }
1920
1921 /* Handle a long name option, or -- to terminate the options */
1922
1923 if (argv[i][1] == '-')
1924 {
1925 char *arg = argv[i] + 2;
1926 char *argequals = strchr(arg, '=');
1927
1928 if (*arg == 0) /* -- terminates options */
1929 {
1930 i++;
1931 break; /* out of the options-handling loop */
1932 }
1933
1934 longop = TRUE;
1935
1936 /* Some long options have data that follows after =, for example file=name.
1937 Some options have variations in the long name spelling: specifically, we
1938 allow "regexp" because GNU grep allows it, though I personally go along
1939 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1940 These options are entered in the table as "regex(p)". Options can be in
1941 both these categories. */
1942
1943 for (op = optionlist; op->one_char != 0; op++)
1944 {
1945 char *opbra = strchr(op->long_name, '(');
1946 char *equals = strchr(op->long_name, '=');
1947
1948 /* Handle options with only one spelling of the name */
1949
1950 if (opbra == NULL) /* Does not contain '(' */
1951 {
1952 if (equals == NULL) /* Not thing=data case */
1953 {
1954 if (strcmp(arg, op->long_name) == 0) break;
1955 }
1956 else /* Special case xxx=data */
1957 {
1958 int oplen = equals - op->long_name;
1959 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1960 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1961 {
1962 option_data = arg + arglen;
1963 if (*option_data == '=')
1964 {
1965 option_data++;
1966 longopwasequals = TRUE;
1967 }
1968 break;
1969 }
1970 }
1971 }
1972
1973 /* Handle options with an alternate spelling of the name */
1974
1975 else
1976 {
1977 char buff1[24];
1978 char buff2[24];
1979
1980 int baselen = opbra - op->long_name;
1981 int fulllen = strchr(op->long_name, ')') - op->long_name + 1;
1982 int arglen = (argequals == NULL || equals == NULL)?
1983 (int)strlen(arg) : argequals - arg;
1984
1985 sprintf(buff1, "%.*s", baselen, op->long_name);
1986 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
1987
1988 if (strncmp(arg, buff1, arglen) == 0 ||
1989 strncmp(arg, buff2, arglen) == 0)
1990 {
1991 if (equals != NULL && argequals != NULL)
1992 {
1993 option_data = argequals;
1994 if (*option_data == '=')
1995 {
1996 option_data++;
1997 longopwasequals = TRUE;
1998 }
1999 }
2000 break;
2001 }
2002 }
2003 }
2004
2005 if (op->one_char == 0)
2006 {
2007 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2008 exit(usage(2));
2009 }
2010 }
2011
2012 /* Jeffrey Friedl's debugging harness uses these additional options which
2013 are not in the right form for putting in the option table because they use
2014 only one hyphen, yet are more than one character long. By putting them
2015 separately here, they will not get displayed as part of the help() output,
2016 but I don't think Jeffrey will care about that. */
2017
2018 #ifdef JFRIEDL_DEBUG
2019 else if (strcmp(argv[i], "-pre") == 0) {
2020 jfriedl_prefix = argv[++i];
2021 continue;
2022 } else if (strcmp(argv[i], "-post") == 0) {
2023 jfriedl_postfix = argv[++i];
2024 continue;
2025 } else if (strcmp(argv[i], "-XT") == 0) {
2026 sscanf(argv[++i], "%d", &jfriedl_XT);
2027 continue;
2028 } else if (strcmp(argv[i], "-XR") == 0) {
2029 sscanf(argv[++i], "%d", &jfriedl_XR);
2030 continue;
2031 }
2032 #endif
2033
2034
2035 /* One-char options; many that have no data may be in a single argument; we
2036 continue till we hit the last one or one that needs data. */
2037
2038 else
2039 {
2040 char *s = argv[i] + 1;
2041 longop = FALSE;
2042 while (*s != 0)
2043 {
2044 for (op = optionlist; op->one_char != 0; op++)
2045 { if (*s == op->one_char) break; }
2046 if (op->one_char == 0)
2047 {
2048 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2049 *s, argv[i]);
2050 exit(usage(2));
2051 }
2052 if (op->type != OP_NODATA || s[1] == 0)
2053 {
2054 option_data = s+1;
2055 break;
2056 }
2057 pcre_options = handle_option(*s++, pcre_options);
2058 }
2059 }
2060
2061 /* At this point we should have op pointing to a matched option. If the type
2062 is NO_DATA, it means that there is no data, and the option might set
2063 something in the PCRE options. */
2064
2065 if (op->type == OP_NODATA)
2066 {
2067 pcre_options = handle_option(op->one_char, pcre_options);
2068 continue;
2069 }
2070
2071 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2072 either has a value or defaults to something. It cannot have data in a
2073 separate item. At the moment, the only such options are "colo(u)r" and
2074 Jeffrey Friedl's special -S debugging option. */
2075
2076 if (*option_data == 0 &&
2077 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2078 {
2079 switch (op->one_char)
2080 {
2081 case N_COLOUR:
2082 colour_option = (char *)"auto";
2083 break;
2084 #ifdef JFRIEDL_DEBUG
2085 case 'S':
2086 S_arg = 0;
2087 break;
2088 #endif
2089 }
2090 continue;
2091 }
2092
2093 /* Otherwise, find the data string for the option. */
2094
2095 if (*option_data == 0)
2096 {
2097 if (i >= argc - 1 || longopwasequals)
2098 {
2099 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2100 exit(usage(2));
2101 }
2102 option_data = argv[++i];
2103 }
2104
2105 /* If the option type is OP_PATLIST, it's the -e option, which can be called
2106 multiple times to create a list of patterns. */
2107
2108 if (op->type == OP_PATLIST)
2109 {
2110 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2111 {
2112 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2113 MAX_PATTERN_COUNT);
2114 return 2;
2115 }
2116 patterns[cmd_pattern_count++] = option_data;
2117 }
2118
2119 /* Otherwise, deal with single string or numeric data values. */
2120
2121 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2122 {
2123 *((char **)op->dataptr) = option_data;
2124 }
2125 else
2126 {
2127 char *endptr;
2128 int n = strtoul(option_data, &endptr, 10);
2129 if (*endptr != 0)
2130 {
2131 if (longop)
2132 {
2133 char *equals = strchr(op->long_name, '=');
2134 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2135 equals - op->long_name;
2136 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2137 option_data, nlen, op->long_name);
2138 }
2139 else
2140 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2141 option_data, op->one_char);
2142 exit(usage(2));
2143 }
2144 *((int *)op->dataptr) = n;
2145 }
2146 }
2147
2148 /* Options have been decoded. If -C was used, its value is used as a default
2149 for -A and -B. */
2150
2151 if (both_context > 0)
2152 {
2153 if (after_context == 0) after_context = both_context;
2154 if (before_context == 0) before_context = both_context;
2155 }
2156
2157 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2158 However, the latter two set the only_matching flag. */
2159
2160 if ((only_matching && (file_offsets || line_offsets)) ||
2161 (file_offsets && line_offsets))
2162 {
2163 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2164 "and/or --line-offsets\n");
2165 exit(usage(2));
2166 }
2167
2168 if (file_offsets || line_offsets) only_matching = TRUE;
2169
2170 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2171 LC_ALL environment variable is set, and if so, use it. */
2172
2173 if (locale == NULL)
2174 {
2175 locale = getenv("LC_ALL");
2176 locale_from = "LCC_ALL";
2177 }
2178
2179 if (locale == NULL)
2180 {
2181 locale = getenv("LC_CTYPE");
2182 locale_from = "LC_CTYPE";
2183 }
2184
2185 /* If a locale has been provided, set it, and generate the tables the PCRE
2186 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2187
2188 if (locale != NULL)
2189 {
2190 if (setlocale(LC_CTYPE, locale) == NULL)
2191 {
2192 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2193 locale, locale_from);
2194 return 2;
2195 }
2196 pcretables = pcre_maketables();
2197 }
2198
2199 /* Sort out colouring */
2200
2201 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2202 {
2203 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2204 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2205 else
2206 {
2207 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2208 colour_option);
2209 return 2;
2210 }
2211 if (do_colour)
2212 {
2213 char *cs = getenv("PCREGREP_COLOUR");
2214 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2215 if (cs != NULL) colour_string = cs;
2216 }
2217 }
2218
2219 /* Interpret the newline type; the default settings are Unix-like. */
2220
2221 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2222 {
2223 pcre_options |= PCRE_NEWLINE_CR;
2224 endlinetype = EL_CR;
2225 }
2226 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2227 {
2228 pcre_options |= PCRE_NEWLINE_LF;
2229 endlinetype = EL_LF;
2230 }
2231 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2232 {
2233 pcre_options |= PCRE_NEWLINE_CRLF;
2234 endlinetype = EL_CRLF;
2235 }
2236 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2237 {
2238 pcre_options |= PCRE_NEWLINE_ANY;
2239 endlinetype = EL_ANY;
2240 }
2241 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2242 {
2243 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2244 endlinetype = EL_ANYCRLF;
2245 }
2246 else
2247 {
2248 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2249 return 2;
2250 }
2251
2252 /* Interpret the text values for -d and -D */
2253
2254 if (dee_option != NULL)
2255 {
2256 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2257 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2258 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2259 else
2260 {
2261 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2262 return 2;
2263 }
2264 }
2265
2266 if (DEE_option != NULL)
2267 {
2268 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2269 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2270 else
2271 {
2272 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2273 return 2;
2274 }
2275 }
2276
2277 /* Check the values for Jeffrey Friedl's debugging options. */
2278
2279 #ifdef JFRIEDL_DEBUG
2280 if (S_arg > 9)
2281 {
2282 fprintf(stderr, "pcregrep: bad value for -S option\n");
2283 return 2;
2284 }
2285 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2286 {
2287 if (jfriedl_XT == 0) jfriedl_XT = 1;
2288 if (jfriedl_XR == 0) jfriedl_XR = 1;
2289 }
2290 #endif
2291
2292 /* Get memory to store the pattern and hints lists. */
2293
2294 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2295 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2296
2297 if (pattern_list == NULL || hints_list == NULL)
2298 {
2299 fprintf(stderr, "pcregrep: malloc failed\n");
2300 goto EXIT2;
2301 }
2302
2303 /* If no patterns were provided by -e, and there is no file provided by -f,
2304 the first argument is the one and only pattern, and it must exist. */
2305
2306 if (cmd_pattern_count == 0 && pattern_filename == NULL)
2307 {
2308 if (i >= argc) return usage(2);
2309 patterns[cmd_pattern_count++] = argv[i++];
2310 }
2311
2312 /* Compile the patterns that were provided on the command line, either by
2313 multiple uses of -e or as a single unkeyed pattern. */
2314
2315 for (j = 0; j < cmd_pattern_count; j++)
2316 {
2317 if (!compile_pattern(patterns[j], pcre_options, NULL,
2318 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2319 goto EXIT2;
2320 }
2321
2322 /* Compile the regular expressions that are provided in a file. */
2323
2324 if (pattern_filename != NULL)
2325 {
2326 int linenumber = 0;
2327 FILE *f;
2328 char *filename;
2329 char buffer[MBUFTHIRD];
2330
2331 if (strcmp(pattern_filename, "-") == 0)
2332 {
2333 f = stdin;
2334 filename = stdin_name;
2335 }
2336 else
2337 {
2338 f = fopen(pattern_filename, "r");
2339 if (f == NULL)
2340 {
2341 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2342 strerror(errno));
2343 goto EXIT2;
2344 }
2345 filename = pattern_filename;
2346 }
2347
2348 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2349 {
2350 char *s = buffer + (int)strlen(buffer);
2351 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2352 *s = 0;
2353 linenumber++;
2354 if (buffer[0] == 0) continue; /* Skip blank lines */
2355 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2356 goto EXIT2;
2357 }
2358
2359 if (f != stdin) fclose(f);
2360 }
2361
2362 /* Study the regular expressions, as we will be running them many times */
2363
2364 for (j = 0; j < pattern_count; j++)
2365 {
2366 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2367 if (error != NULL)
2368 {
2369 char s[16];
2370 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2371 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2372 goto EXIT2;
2373 }
2374 hint_count++;
2375 }
2376
2377 /* If there are include or exclude patterns, compile them. */
2378
2379 if (exclude_pattern != NULL)
2380 {
2381 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2382 pcretables);
2383 if (exclude_compiled == NULL)
2384 {
2385 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2386 errptr, error);
2387 goto EXIT2;
2388 }
2389 }
2390
2391 if (include_pattern != NULL)
2392 {
2393 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2394 pcretables);
2395 if (include_compiled == NULL)
2396 {
2397 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2398 errptr, error);
2399 goto EXIT2;
2400 }
2401 }
2402
2403 if (exclude_dir_pattern != NULL)
2404 {
2405 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2406 pcretables);
2407 if (exclude_dir_compiled == NULL)
2408 {
2409 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2410 errptr, error);
2411 goto EXIT2;
2412 }
2413 }
2414
2415 if (include_dir_pattern != NULL)
2416 {
2417 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2418 pcretables);
2419 if (include_dir_compiled == NULL)
2420 {
2421 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2422 errptr, error);
2423 goto EXIT2;
2424 }
2425 }
2426
2427 /* If there are no further arguments, do the business on stdin and exit. */
2428
2429 if (i >= argc)
2430 {
2431 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2432 goto EXIT;
2433 }
2434
2435 /* Otherwise, work through the remaining arguments as files or directories.
2436 Pass in the fact that there is only one argument at top level - this suppresses
2437 the file name if the argument is not a directory and filenames are not
2438 otherwise forced. */
2439
2440 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2441
2442 for (; i < argc; i++)
2443 {
2444 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2445 only_one_at_top);
2446 if (frc > 1) rc = frc;
2447 else if (frc == 0 && rc == 1) rc = 0;
2448 }
2449
2450 EXIT:
2451 if (pattern_list != NULL)
2452 {
2453 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2454 free(pattern_list);
2455 }
2456 if (hints_list != NULL)
2457 {
2458 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2459 free(hints_list);
2460 }
2461 return rc;
2462
2463 EXIT2:
2464 rc = 2;
2465 goto EXIT;
2466 }
2467
2468 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5