/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 378 - (show annotations)
Sun Mar 1 14:13:34 2009 UTC (6 years, 2 months ago) by ph10
File MIME type: text/plain
File size: 68901 byte(s)
Error occurred while calculating annotation data.
Make pcregrep with --colour show all matches in a line in colour.
1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2009 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65
66 #include "pcre.h"
67
68 #define FALSE 0
69 #define TRUE 1
70
71 typedef int BOOL;
72
73 #define MAX_PATTERN_COUNT 100
74 #define OFFSET_SIZE 99
75
76 #if BUFSIZ > 8192
77 #define MBUFTHIRD BUFSIZ
78 #else
79 #define MBUFTHIRD 8192
80 #endif
81
82 /* Values for the "filenames" variable, which specifies options for file name
83 output. The order is important; it is assumed that a file name is wanted for
84 all values greater than FN_DEFAULT. */
85
86 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87
88 /* File reading styles */
89
90 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91
92 /* Actions for the -d and -D options */
93
94 enum { dee_READ, dee_SKIP, dee_RECURSE };
95 enum { DEE_READ, DEE_SKIP };
96
97 /* Actions for special processing options (flag bits) */
98
99 #define PO_WORD_MATCH 0x0001
100 #define PO_LINE_MATCH 0x0002
101 #define PO_FIXED_STRINGS 0x0004
102
103 /* Line ending types */
104
105 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106
107
108
109 /*************************************************
110 * Global variables *
111 *************************************************/
112
113 /* Jeffrey Friedl has some debugging requirements that are not part of the
114 regular code. */
115
116 #ifdef JFRIEDL_DEBUG
117 static int S_arg = -1;
118 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
119 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
120 static const char *jfriedl_prefix = "";
121 static const char *jfriedl_postfix = "";
122 #endif
123
124 static int endlinetype;
125
126 static char *colour_string = (char *)"1;31";
127 static char *colour_option = NULL;
128 static char *dee_option = NULL;
129 static char *DEE_option = NULL;
130 static char *newline = NULL;
131 static char *pattern_filename = NULL;
132 static char *stdin_name = (char *)"(standard input)";
133 static char *locale = NULL;
134
135 static const unsigned char *pcretables = NULL;
136
137 static int pattern_count = 0;
138 static pcre **pattern_list = NULL;
139 static pcre_extra **hints_list = NULL;
140
141 static char *include_pattern = NULL;
142 static char *exclude_pattern = NULL;
143 static char *include_dir_pattern = NULL;
144 static char *exclude_dir_pattern = NULL;
145
146 static pcre *include_compiled = NULL;
147 static pcre *exclude_compiled = NULL;
148 static pcre *include_dir_compiled = NULL;
149 static pcre *exclude_dir_compiled = NULL;
150
151 static int after_context = 0;
152 static int before_context = 0;
153 static int both_context = 0;
154 static int dee_action = dee_READ;
155 static int DEE_action = DEE_READ;
156 static int error_count = 0;
157 static int filenames = FN_DEFAULT;
158 static int process_options = 0;
159
160 static BOOL count_only = FALSE;
161 static BOOL do_colour = FALSE;
162 static BOOL file_offsets = FALSE;
163 static BOOL hyphenpending = FALSE;
164 static BOOL invert = FALSE;
165 static BOOL line_offsets = FALSE;
166 static BOOL multiline = FALSE;
167 static BOOL number = FALSE;
168 static BOOL only_matching = FALSE;
169 static BOOL quiet = FALSE;
170 static BOOL silent = FALSE;
171 static BOOL utf8 = FALSE;
172
173 /* Structure for options and list of them */
174
175 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
176 OP_PATLIST };
177
178 typedef struct option_item {
179 int type;
180 int one_char;
181 void *dataptr;
182 const char *long_name;
183 const char *help_text;
184 } option_item;
185
186 /* Options without a single-letter equivalent get a negative value. This can be
187 used to identify them. */
188
189 #define N_COLOUR (-1)
190 #define N_EXCLUDE (-2)
191 #define N_EXCLUDE_DIR (-3)
192 #define N_HELP (-4)
193 #define N_INCLUDE (-5)
194 #define N_INCLUDE_DIR (-6)
195 #define N_LABEL (-7)
196 #define N_LOCALE (-8)
197 #define N_NULL (-9)
198 #define N_LOFFSETS (-10)
199 #define N_FOFFSETS (-11)
200
201 static option_item optionlist[] = {
202 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
203 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
204 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
205 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
206 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
207 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
208 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
209 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
210 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
211 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
212 { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
213 { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
214 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
215 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
216 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
217 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
218 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
219 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
220 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
221 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
222 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
223 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
224 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
225 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
226 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
227 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
228 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
229 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
230 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
231 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
232 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
233 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
234 #ifdef JFRIEDL_DEBUG
235 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
236 #endif
237 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
238 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
239 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
240 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
241 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
242 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
243 { OP_NODATA, 0, NULL, NULL, NULL }
244 };
245
246 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
247 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
248 that the combination of -w and -x has the same effect as -x on its own, so we
249 can treat them as the same. */
250
251 static const char *prefix[] = {
252 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
253
254 static const char *suffix[] = {
255 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
256
257 /* UTF-8 tables - used only when the newline setting is "any". */
258
259 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
260
261 const char utf8_table4[] = {
262 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
263 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
264 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
265 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
266
267
268
269 /*************************************************
270 * OS-specific functions *
271 *************************************************/
272
273 /* These functions are defined so that they can be made system specific,
274 although at present the only ones are for Unix, Win32, and for "no support". */
275
276
277 /************* Directory scanning in Unix ***********/
278
279 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
280 #include <sys/types.h>
281 #include <sys/stat.h>
282 #include <dirent.h>
283
284 typedef DIR directory_type;
285
286 static int
287 isdirectory(char *filename)
288 {
289 struct stat statbuf;
290 if (stat(filename, &statbuf) < 0)
291 return 0; /* In the expectation that opening as a file will fail */
292 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
293 }
294
295 static directory_type *
296 opendirectory(char *filename)
297 {
298 return opendir(filename);
299 }
300
301 static char *
302 readdirectory(directory_type *dir)
303 {
304 for (;;)
305 {
306 struct dirent *dent = readdir(dir);
307 if (dent == NULL) return NULL;
308 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
309 return dent->d_name;
310 }
311 /* Control never reaches here */
312 }
313
314 static void
315 closedirectory(directory_type *dir)
316 {
317 closedir(dir);
318 }
319
320
321 /************* Test for regular file in Unix **********/
322
323 static int
324 isregfile(char *filename)
325 {
326 struct stat statbuf;
327 if (stat(filename, &statbuf) < 0)
328 return 1; /* In the expectation that opening as a file will fail */
329 return (statbuf.st_mode & S_IFMT) == S_IFREG;
330 }
331
332
333 /************* Test stdout for being a terminal in Unix **********/
334
335 static BOOL
336 is_stdout_tty(void)
337 {
338 return isatty(fileno(stdout));
339 }
340
341
342 /************* Directory scanning in Win32 ***********/
343
344 /* I (Philip Hazel) have no means of testing this code. It was contributed by
345 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
346 when it did not exist. David Byron added a patch that moved the #include of
347 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
348 */
349
350 #elif HAVE_WINDOWS_H
351
352 #ifndef STRICT
353 # define STRICT
354 #endif
355 #ifndef WIN32_LEAN_AND_MEAN
356 # define WIN32_LEAN_AND_MEAN
357 #endif
358
359 #include <windows.h>
360
361 #ifndef INVALID_FILE_ATTRIBUTES
362 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
363 #endif
364
365 typedef struct directory_type
366 {
367 HANDLE handle;
368 BOOL first;
369 WIN32_FIND_DATA data;
370 } directory_type;
371
372 int
373 isdirectory(char *filename)
374 {
375 DWORD attr = GetFileAttributes(filename);
376 if (attr == INVALID_FILE_ATTRIBUTES)
377 return 0;
378 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
379 }
380
381 directory_type *
382 opendirectory(char *filename)
383 {
384 size_t len;
385 char *pattern;
386 directory_type *dir;
387 DWORD err;
388 len = strlen(filename);
389 pattern = (char *) malloc(len + 3);
390 dir = (directory_type *) malloc(sizeof(*dir));
391 if ((pattern == NULL) || (dir == NULL))
392 {
393 fprintf(stderr, "pcregrep: malloc failed\n");
394 exit(2);
395 }
396 memcpy(pattern, filename, len);
397 memcpy(&(pattern[len]), "\\*", 3);
398 dir->handle = FindFirstFile(pattern, &(dir->data));
399 if (dir->handle != INVALID_HANDLE_VALUE)
400 {
401 free(pattern);
402 dir->first = TRUE;
403 return dir;
404 }
405 err = GetLastError();
406 free(pattern);
407 free(dir);
408 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
409 return NULL;
410 }
411
412 char *
413 readdirectory(directory_type *dir)
414 {
415 for (;;)
416 {
417 if (!dir->first)
418 {
419 if (!FindNextFile(dir->handle, &(dir->data)))
420 return NULL;
421 }
422 else
423 {
424 dir->first = FALSE;
425 }
426 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
427 return dir->data.cFileName;
428 }
429 #ifndef _MSC_VER
430 return NULL; /* Keep compiler happy; never executed */
431 #endif
432 }
433
434 void
435 closedirectory(directory_type *dir)
436 {
437 FindClose(dir->handle);
438 free(dir);
439 }
440
441
442 /************* Test for regular file in Win32 **********/
443
444 /* I don't know how to do this, or if it can be done; assume all paths are
445 regular if they are not directories. */
446
447 int isregfile(char *filename)
448 {
449 return !isdirectory(filename);
450 }
451
452
453 /************* Test stdout for being a terminal in Win32 **********/
454
455 /* I don't know how to do this; assume never */
456
457 static BOOL
458 is_stdout_tty(void)
459 {
460 return FALSE;
461 }
462
463
464 /************* Directory scanning when we can't do it ***********/
465
466 /* The type is void, and apart from isdirectory(), the functions do nothing. */
467
468 #else
469
470 typedef void directory_type;
471
472 int isdirectory(char *filename) { return 0; }
473 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
474 char *readdirectory(directory_type *dir) { return (char*)0;}
475 void closedirectory(directory_type *dir) {}
476
477
478 /************* Test for regular when we can't do it **********/
479
480 /* Assume all files are regular. */
481
482 int isregfile(char *filename) { return 1; }
483
484
485 /************* Test stdout for being a terminal when we can't do it **********/
486
487 static BOOL
488 is_stdout_tty(void)
489 {
490 return FALSE;
491 }
492
493
494 #endif
495
496
497
498 #ifndef HAVE_STRERROR
499 /*************************************************
500 * Provide strerror() for non-ANSI libraries *
501 *************************************************/
502
503 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
504 in their libraries, but can provide the same facility by this simple
505 alternative function. */
506
507 extern int sys_nerr;
508 extern char *sys_errlist[];
509
510 char *
511 strerror(int n)
512 {
513 if (n < 0 || n >= sys_nerr) return "unknown error number";
514 return sys_errlist[n];
515 }
516 #endif /* HAVE_STRERROR */
517
518
519
520 /*************************************************
521 * Find end of line *
522 *************************************************/
523
524 /* The length of the endline sequence that is found is set via lenptr. This may
525 be zero at the very end of the file if there is no line-ending sequence there.
526
527 Arguments:
528 p current position in line
529 endptr end of available data
530 lenptr where to put the length of the eol sequence
531
532 Returns: pointer to the last byte of the line
533 */
534
535 static char *
536 end_of_line(char *p, char *endptr, int *lenptr)
537 {
538 switch(endlinetype)
539 {
540 default: /* Just in case */
541 case EL_LF:
542 while (p < endptr && *p != '\n') p++;
543 if (p < endptr)
544 {
545 *lenptr = 1;
546 return p + 1;
547 }
548 *lenptr = 0;
549 return endptr;
550
551 case EL_CR:
552 while (p < endptr && *p != '\r') p++;
553 if (p < endptr)
554 {
555 *lenptr = 1;
556 return p + 1;
557 }
558 *lenptr = 0;
559 return endptr;
560
561 case EL_CRLF:
562 for (;;)
563 {
564 while (p < endptr && *p != '\r') p++;
565 if (++p >= endptr)
566 {
567 *lenptr = 0;
568 return endptr;
569 }
570 if (*p == '\n')
571 {
572 *lenptr = 2;
573 return p + 1;
574 }
575 }
576 break;
577
578 case EL_ANYCRLF:
579 while (p < endptr)
580 {
581 int extra = 0;
582 register int c = *((unsigned char *)p);
583
584 if (utf8 && c >= 0xc0)
585 {
586 int gcii, gcss;
587 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
588 gcss = 6*extra;
589 c = (c & utf8_table3[extra]) << gcss;
590 for (gcii = 1; gcii <= extra; gcii++)
591 {
592 gcss -= 6;
593 c |= (p[gcii] & 0x3f) << gcss;
594 }
595 }
596
597 p += 1 + extra;
598
599 switch (c)
600 {
601 case 0x0a: /* LF */
602 *lenptr = 1;
603 return p;
604
605 case 0x0d: /* CR */
606 if (p < endptr && *p == 0x0a)
607 {
608 *lenptr = 2;
609 p++;
610 }
611 else *lenptr = 1;
612 return p;
613
614 default:
615 break;
616 }
617 } /* End of loop for ANYCRLF case */
618
619 *lenptr = 0; /* Must have hit the end */
620 return endptr;
621
622 case EL_ANY:
623 while (p < endptr)
624 {
625 int extra = 0;
626 register int c = *((unsigned char *)p);
627
628 if (utf8 && c >= 0xc0)
629 {
630 int gcii, gcss;
631 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
632 gcss = 6*extra;
633 c = (c & utf8_table3[extra]) << gcss;
634 for (gcii = 1; gcii <= extra; gcii++)
635 {
636 gcss -= 6;
637 c |= (p[gcii] & 0x3f) << gcss;
638 }
639 }
640
641 p += 1 + extra;
642
643 switch (c)
644 {
645 case 0x0a: /* LF */
646 case 0x0b: /* VT */
647 case 0x0c: /* FF */
648 *lenptr = 1;
649 return p;
650
651 case 0x0d: /* CR */
652 if (p < endptr && *p == 0x0a)
653 {
654 *lenptr = 2;
655 p++;
656 }
657 else *lenptr = 1;
658 return p;
659
660 case 0x85: /* NEL */
661 *lenptr = utf8? 2 : 1;
662 return p;
663
664 case 0x2028: /* LS */
665 case 0x2029: /* PS */
666 *lenptr = 3;
667 return p;
668
669 default:
670 break;
671 }
672 } /* End of loop for ANY case */
673
674 *lenptr = 0; /* Must have hit the end */
675 return endptr;
676 } /* End of overall switch */
677 }
678
679
680
681 /*************************************************
682 * Find start of previous line *
683 *************************************************/
684
685 /* This is called when looking back for before lines to print.
686
687 Arguments:
688 p start of the subsequent line
689 startptr start of available data
690
691 Returns: pointer to the start of the previous line
692 */
693
694 static char *
695 previous_line(char *p, char *startptr)
696 {
697 switch(endlinetype)
698 {
699 default: /* Just in case */
700 case EL_LF:
701 p--;
702 while (p > startptr && p[-1] != '\n') p--;
703 return p;
704
705 case EL_CR:
706 p--;
707 while (p > startptr && p[-1] != '\n') p--;
708 return p;
709
710 case EL_CRLF:
711 for (;;)
712 {
713 p -= 2;
714 while (p > startptr && p[-1] != '\n') p--;
715 if (p <= startptr + 1 || p[-2] == '\r') return p;
716 }
717 return p; /* But control should never get here */
718
719 case EL_ANY:
720 case EL_ANYCRLF:
721 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
722 if (utf8) while ((*p & 0xc0) == 0x80) p--;
723
724 while (p > startptr)
725 {
726 register int c;
727 char *pp = p - 1;
728
729 if (utf8)
730 {
731 int extra = 0;
732 while ((*pp & 0xc0) == 0x80) pp--;
733 c = *((unsigned char *)pp);
734 if (c >= 0xc0)
735 {
736 int gcii, gcss;
737 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
738 gcss = 6*extra;
739 c = (c & utf8_table3[extra]) << gcss;
740 for (gcii = 1; gcii <= extra; gcii++)
741 {
742 gcss -= 6;
743 c |= (pp[gcii] & 0x3f) << gcss;
744 }
745 }
746 }
747 else c = *((unsigned char *)pp);
748
749 if (endlinetype == EL_ANYCRLF) switch (c)
750 {
751 case 0x0a: /* LF */
752 case 0x0d: /* CR */
753 return p;
754
755 default:
756 break;
757 }
758
759 else switch (c)
760 {
761 case 0x0a: /* LF */
762 case 0x0b: /* VT */
763 case 0x0c: /* FF */
764 case 0x0d: /* CR */
765 case 0x85: /* NEL */
766 case 0x2028: /* LS */
767 case 0x2029: /* PS */
768 return p;
769
770 default:
771 break;
772 }
773
774 p = pp; /* Back one character */
775 } /* End of loop for ANY case */
776
777 return startptr; /* Hit start of data */
778 } /* End of overall switch */
779 }
780
781
782
783
784
785 /*************************************************
786 * Print the previous "after" lines *
787 *************************************************/
788
789 /* This is called if we are about to lose said lines because of buffer filling,
790 and at the end of the file. The data in the line is written using fwrite() so
791 that a binary zero does not terminate it.
792
793 Arguments:
794 lastmatchnumber the number of the last matching line, plus one
795 lastmatchrestart where we restarted after the last match
796 endptr end of available data
797 printname filename for printing
798
799 Returns: nothing
800 */
801
802 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
803 char *endptr, char *printname)
804 {
805 if (after_context > 0 && lastmatchnumber > 0)
806 {
807 int count = 0;
808 while (lastmatchrestart < endptr && count++ < after_context)
809 {
810 int ellength;
811 char *pp = lastmatchrestart;
812 if (printname != NULL) fprintf(stdout, "%s-", printname);
813 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
814 pp = end_of_line(pp, endptr, &ellength);
815 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
816 lastmatchrestart = pp;
817 }
818 hyphenpending = TRUE;
819 }
820 }
821
822
823
824 /*************************************************
825 * Apply patterns to subject till one matches *
826 *************************************************/
827
828 /* This function is called to run through all patterns, looking for a match. It
829 is used multiple times for the same subject when colouring is enabled, in order
830 to find all possible matches.
831
832 Arguments:
833 matchptr the start of the subject
834 length the length of the subject to match
835 offsets the offets vector to fill in
836 mrc address of where to put the result of pcre_exec()
837
838 Returns: TRUE if there was a match
839 FALSE if there was no match
840 invert if there was a non-fatal error
841 */
842
843 static BOOL
844 match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
845 {
846 int i;
847 for (i = 0; i < pattern_count; i++)
848 {
849 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
850 offsets, OFFSET_SIZE);
851 if (*mrc >= 0) return TRUE;
852 if (*mrc == PCRE_ERROR_NOMATCH) continue;
853 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
854 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
855 fprintf(stderr, "this text:\n");
856 fwrite(matchptr, 1, length, stderr); /* In case binary zero included */
857 fprintf(stderr, "\n");
858 if (error_count == 0 &&
859 (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
860 {
861 fprintf(stderr, "pcregrep: error %d means that a resource limit "
862 "was exceeded\n", *mrc);
863 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
864 }
865 if (error_count++ > 20)
866 {
867 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
868 exit(2);
869 }
870 return invert; /* No more matching; don't show the line again */
871 }
872
873 return FALSE; /* No match, no errors */
874 }
875
876
877
878 /*************************************************
879 * Grep an individual file *
880 *************************************************/
881
882 /* This is called from grep_or_recurse() below. It uses a buffer that is three
883 times the value of MBUFTHIRD. The matching point is never allowed to stray into
884 the top third of the buffer, thus keeping more of the file available for
885 context printing or for multiline scanning. For large files, the pointer will
886 be in the middle third most of the time, so the bottom third is available for
887 "before" context printing.
888
889 Arguments:
890 handle the fopened FILE stream for a normal file
891 the gzFile pointer when reading is via libz
892 the BZFILE pointer when reading is via libbz2
893 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
894 printname the file name if it is to be printed for each match
895 or NULL if the file name is not to be printed
896 it cannot be NULL if filenames[_nomatch]_only is set
897
898 Returns: 0 if there was at least one match
899 1 otherwise (no matches)
900 2 if there is a read error on a .bz2 file
901 */
902
903 static int
904 pcregrep(void *handle, int frtype, char *printname)
905 {
906 int rc = 1;
907 int linenumber = 1;
908 int lastmatchnumber = 0;
909 int count = 0;
910 int filepos = 0;
911 int offsets[OFFSET_SIZE];
912 char *lastmatchrestart = NULL;
913 char buffer[3*MBUFTHIRD];
914 char *ptr = buffer;
915 char *endptr;
916 size_t bufflength;
917 BOOL endhyphenpending = FALSE;
918 FILE *in = NULL; /* Ensure initialized */
919
920 #ifdef SUPPORT_LIBZ
921 gzFile ingz = NULL;
922 #endif
923
924 #ifdef SUPPORT_LIBBZ2
925 BZFILE *inbz2 = NULL;
926 #endif
927
928
929 /* Do the first read into the start of the buffer and set up the pointer to end
930 of what we have. In the case of libz, a non-zipped .gz file will be read as a
931 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
932 fail. */
933
934 #ifdef SUPPORT_LIBZ
935 if (frtype == FR_LIBZ)
936 {
937 ingz = (gzFile)handle;
938 bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
939 }
940 else
941 #endif
942
943 #ifdef SUPPORT_LIBBZ2
944 if (frtype == FR_LIBBZ2)
945 {
946 inbz2 = (BZFILE *)handle;
947 bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
948 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
949 } /* without the cast it is unsigned. */
950 else
951 #endif
952
953 {
954 in = (FILE *)handle;
955 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
956 }
957
958 endptr = buffer + bufflength;
959
960 /* Loop while the current pointer is not at the end of the file. For large
961 files, endptr will be at the end of the buffer when we are in the middle of the
962 file, but ptr will never get there, because as soon as it gets over 2/3 of the
963 way, the buffer is shifted left and re-filled. */
964
965 while (ptr < endptr)
966 {
967 int endlinelength;
968 int mrc = 0;
969 BOOL match;
970 char *matchptr = ptr;
971 char *t = ptr;
972 size_t length, linelength;
973
974 /* At this point, ptr is at the start of a line. We need to find the length
975 of the subject string to pass to pcre_exec(). In multiline mode, it is the
976 length remainder of the data in the buffer. Otherwise, it is the length of
977 the next line, excluding the terminating newline. After matching, we always
978 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
979 option is used for compiling, so that any match is constrained to be in the
980 first line. */
981
982 t = end_of_line(t, endptr, &endlinelength);
983 linelength = t - ptr - endlinelength;
984 length = multiline? (size_t)(endptr - ptr) : linelength;
985
986 /* Extra processing for Jeffrey Friedl's debugging. */
987
988 #ifdef JFRIEDL_DEBUG
989 if (jfriedl_XT || jfriedl_XR)
990 {
991 #include <sys/time.h>
992 #include <time.h>
993 struct timeval start_time, end_time;
994 struct timezone dummy;
995 int i;
996
997 if (jfriedl_XT)
998 {
999 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1000 const char *orig = ptr;
1001 ptr = malloc(newlen + 1);
1002 if (!ptr) {
1003 printf("out of memory");
1004 exit(2);
1005 }
1006 endptr = ptr;
1007 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1008 for (i = 0; i < jfriedl_XT; i++) {
1009 strncpy(endptr, orig, length);
1010 endptr += length;
1011 }
1012 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1013 length = newlen;
1014 }
1015
1016 if (gettimeofday(&start_time, &dummy) != 0)
1017 perror("bad gettimeofday");
1018
1019
1020 for (i = 0; i < jfriedl_XR; i++)
1021 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, OFFSET_SIZE) >= 0);
1022
1023 if (gettimeofday(&end_time, &dummy) != 0)
1024 perror("bad gettimeofday");
1025
1026 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1027 -
1028 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1029
1030 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1031 return 0;
1032 }
1033 #endif
1034
1035 /* We come back here after a match when the -o option (only_matching) is set,
1036 in order to find any further matches in the same line. */
1037
1038 ONLY_MATCHING_RESTART:
1039
1040 /* Run through all the patterns until one matches or there is an error other
1041 than NOMATCH. This code is in a subroutine so that it can be re-used for
1042 finding subsequent matches when colouring matched lines. */
1043
1044 match = match_patterns(matchptr, length, offsets, &mrc);
1045
1046 /* If it's a match or a not-match (as required), do what's wanted. */
1047
1048 if (match != invert)
1049 {
1050 BOOL hyphenprinted = FALSE;
1051
1052 /* We've failed if we want a file that doesn't have any matches. */
1053
1054 if (filenames == FN_NOMATCH_ONLY) return 1;
1055
1056 /* Just count if just counting is wanted. */
1057
1058 if (count_only) count++;
1059
1060 /* If all we want is a file name, there is no need to scan any more lines
1061 in the file. */
1062
1063 else if (filenames == FN_ONLY)
1064 {
1065 fprintf(stdout, "%s\n", printname);
1066 return 0;
1067 }
1068
1069 /* Likewise, if all we want is a yes/no answer. */
1070
1071 else if (quiet) return 0;
1072
1073 /* The --only-matching option prints just the substring that matched, and
1074 the --file-offsets and --line-offsets options output offsets for the
1075 matching substring (they both force --only-matching). None of these options
1076 prints any context. Afterwards, adjust the start and length, and then jump
1077 back to look for further matches in the same line. If we are in invert
1078 mode, however, nothing is printed - this could be still useful because the
1079 return code is set. */
1080
1081 else if (only_matching)
1082 {
1083 if (!invert)
1084 {
1085 if (printname != NULL) fprintf(stdout, "%s:", printname);
1086 if (number) fprintf(stdout, "%d:", linenumber);
1087 if (line_offsets)
1088 fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1089 offsets[1] - offsets[0]);
1090 else if (file_offsets)
1091 fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1092 offsets[1] - offsets[0]);
1093 else
1094 {
1095 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1096 fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1097 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1098 }
1099 fprintf(stdout, "\n");
1100 matchptr += offsets[1];
1101 length -= offsets[1];
1102 match = FALSE;
1103 goto ONLY_MATCHING_RESTART;
1104 }
1105 }
1106
1107 /* This is the default case when none of the above options is set. We print
1108 the matching lines(s), possibly preceded and/or followed by other lines of
1109 context. */
1110
1111 else
1112 {
1113 /* See if there is a requirement to print some "after" lines from a
1114 previous match. We never print any overlaps. */
1115
1116 if (after_context > 0 && lastmatchnumber > 0)
1117 {
1118 int ellength;
1119 int linecount = 0;
1120 char *p = lastmatchrestart;
1121
1122 while (p < ptr && linecount < after_context)
1123 {
1124 p = end_of_line(p, ptr, &ellength);
1125 linecount++;
1126 }
1127
1128 /* It is important to advance lastmatchrestart during this printing so
1129 that it interacts correctly with any "before" printing below. Print
1130 each line's data using fwrite() in case there are binary zeroes. */
1131
1132 while (lastmatchrestart < p)
1133 {
1134 char *pp = lastmatchrestart;
1135 if (printname != NULL) fprintf(stdout, "%s-", printname);
1136 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1137 pp = end_of_line(pp, endptr, &ellength);
1138 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1139 lastmatchrestart = pp;
1140 }
1141 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1142 }
1143
1144 /* If there were non-contiguous lines printed above, insert hyphens. */
1145
1146 if (hyphenpending)
1147 {
1148 fprintf(stdout, "--\n");
1149 hyphenpending = FALSE;
1150 hyphenprinted = TRUE;
1151 }
1152
1153 /* See if there is a requirement to print some "before" lines for this
1154 match. Again, don't print overlaps. */
1155
1156 if (before_context > 0)
1157 {
1158 int linecount = 0;
1159 char *p = ptr;
1160
1161 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1162 linecount < before_context)
1163 {
1164 linecount++;
1165 p = previous_line(p, buffer);
1166 }
1167
1168 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1169 fprintf(stdout, "--\n");
1170
1171 while (p < ptr)
1172 {
1173 int ellength;
1174 char *pp = p;
1175 if (printname != NULL) fprintf(stdout, "%s-", printname);
1176 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1177 pp = end_of_line(pp, endptr, &ellength);
1178 fwrite(p, 1, pp - p, stdout);
1179 p = pp;
1180 }
1181 }
1182
1183 /* Now print the matching line(s); ensure we set hyphenpending at the end
1184 of the file if any context lines are being output. */
1185
1186 if (after_context > 0 || before_context > 0)
1187 endhyphenpending = TRUE;
1188
1189 if (printname != NULL) fprintf(stdout, "%s:", printname);
1190 if (number) fprintf(stdout, "%d:", linenumber);
1191
1192 /* In multiline mode, we want to print to the end of the line in which
1193 the end of the matched string is found, so we adjust linelength and the
1194 line number appropriately, but only when there actually was a match
1195 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1196 the match will always be before the first newline sequence. */
1197
1198 if (multiline)
1199 {
1200 int ellength;
1201 char *endmatch = ptr;
1202 if (!invert)
1203 {
1204 endmatch += offsets[1];
1205 t = ptr;
1206 while (t < endmatch)
1207 {
1208 t = end_of_line(t, endptr, &ellength);
1209 if (t <= endmatch) linenumber++; else break;
1210 }
1211 }
1212 endmatch = end_of_line(endmatch, endptr, &ellength);
1213 linelength = endmatch - ptr - ellength;
1214 }
1215
1216 /*** NOTE: Use only fwrite() to output the data line, so that binary
1217 zeroes are treated as just another data character. */
1218
1219 /* This extra option, for Jeffrey Friedl's debugging requirements,
1220 replaces the matched string, or a specific captured string if it exists,
1221 with X. When this happens, colouring is ignored. */
1222
1223 #ifdef JFRIEDL_DEBUG
1224 if (S_arg >= 0 && S_arg < mrc)
1225 {
1226 int first = S_arg * 2;
1227 int last = first + 1;
1228 fwrite(ptr, 1, offsets[first], stdout);
1229 fprintf(stdout, "X");
1230 fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1231 }
1232 else
1233 #endif
1234
1235 /* We have to split the line(s) up if colouring, and search for further
1236 matches. */
1237
1238 if (do_colour)
1239 {
1240 int last_offset = 0;
1241 fwrite(ptr, 1, offsets[0], stdout);
1242 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1243 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1244 fprintf(stdout, "%c[00m", 0x1b);
1245 for (;;)
1246 {
1247 last_offset += offsets[1];
1248 matchptr += offsets[1];
1249 length -= offsets[1];
1250 if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1251 fwrite(matchptr, 1, offsets[0], stdout);
1252 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1253 fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1254 fprintf(stdout, "%c[00m", 0x1b);
1255 }
1256 fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset,
1257 stdout);
1258 }
1259
1260 /* Not colouring; no need to search for further matches */
1261
1262 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1263 }
1264
1265 /* End of doing what has to be done for a match */
1266
1267 rc = 0; /* Had some success */
1268
1269 /* Remember where the last match happened for after_context. We remember
1270 where we are about to restart, and that line's number. */
1271
1272 lastmatchrestart = ptr + linelength + endlinelength;
1273 lastmatchnumber = linenumber + 1;
1274 }
1275
1276 /* For a match in multiline inverted mode (which of course did not cause
1277 anything to be printed), we have to move on to the end of the match before
1278 proceeding. */
1279
1280 if (multiline && invert && match)
1281 {
1282 int ellength;
1283 char *endmatch = ptr + offsets[1];
1284 t = ptr;
1285 while (t < endmatch)
1286 {
1287 t = end_of_line(t, endptr, &ellength);
1288 if (t <= endmatch) linenumber++; else break;
1289 }
1290 endmatch = end_of_line(endmatch, endptr, &ellength);
1291 linelength = endmatch - ptr - ellength;
1292 }
1293
1294 /* Advance to after the newline and increment the line number. The file
1295 offset to the current line is maintained in filepos. */
1296
1297 ptr += linelength + endlinelength;
1298 filepos += linelength + endlinelength;
1299 linenumber++;
1300
1301 /* If we haven't yet reached the end of the file (the buffer is full), and
1302 the current point is in the top 1/3 of the buffer, slide the buffer down by
1303 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1304 about to be lost, print them. */
1305
1306 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1307 {
1308 if (after_context > 0 &&
1309 lastmatchnumber > 0 &&
1310 lastmatchrestart < buffer + MBUFTHIRD)
1311 {
1312 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1313 lastmatchnumber = 0;
1314 }
1315
1316 /* Now do the shuffle */
1317
1318 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1319 ptr -= MBUFTHIRD;
1320
1321 #ifdef SUPPORT_LIBZ
1322 if (frtype == FR_LIBZ)
1323 bufflength = 2*MBUFTHIRD +
1324 gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1325 else
1326 #endif
1327
1328 #ifdef SUPPORT_LIBBZ2
1329 if (frtype == FR_LIBBZ2)
1330 bufflength = 2*MBUFTHIRD +
1331 BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1332 else
1333 #endif
1334
1335 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1336
1337 endptr = buffer + bufflength;
1338
1339 /* Adjust any last match point */
1340
1341 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1342 }
1343 } /* Loop through the whole file */
1344
1345 /* End of file; print final "after" lines if wanted; do_after_lines sets
1346 hyphenpending if it prints something. */
1347
1348 if (!only_matching && !count_only)
1349 {
1350 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1351 hyphenpending |= endhyphenpending;
1352 }
1353
1354 /* Print the file name if we are looking for those without matches and there
1355 were none. If we found a match, we won't have got this far. */
1356
1357 if (filenames == FN_NOMATCH_ONLY)
1358 {
1359 fprintf(stdout, "%s\n", printname);
1360 return 0;
1361 }
1362
1363 /* Print the match count if wanted */
1364
1365 if (count_only)
1366 {
1367 if (printname != NULL) fprintf(stdout, "%s:", printname);
1368 fprintf(stdout, "%d\n", count);
1369 }
1370
1371 return rc;
1372 }
1373
1374
1375
1376 /*************************************************
1377 * Grep a file or recurse into a directory *
1378 *************************************************/
1379
1380 /* Given a path name, if it's a directory, scan all the files if we are
1381 recursing; if it's a file, grep it.
1382
1383 Arguments:
1384 pathname the path to investigate
1385 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1386 only_one_at_top TRUE if the path is the only one at toplevel
1387
1388 Returns: 0 if there was at least one match
1389 1 if there were no matches
1390 2 there was some kind of error
1391
1392 However, file opening failures are suppressed if "silent" is set.
1393 */
1394
1395 static int
1396 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1397 {
1398 int rc = 1;
1399 int sep;
1400 int frtype;
1401 int pathlen;
1402 void *handle;
1403 FILE *in = NULL; /* Ensure initialized */
1404
1405 #ifdef SUPPORT_LIBZ
1406 gzFile ingz = NULL;
1407 #endif
1408
1409 #ifdef SUPPORT_LIBBZ2
1410 BZFILE *inbz2 = NULL;
1411 #endif
1412
1413 /* If the file name is "-" we scan stdin */
1414
1415 if (strcmp(pathname, "-") == 0)
1416 {
1417 return pcregrep(stdin, FR_PLAIN,
1418 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1419 stdin_name : NULL);
1420 }
1421
1422 /* If the file is a directory, skip if skipping or if we are recursing, scan
1423 each file and directory within it, subject to any include or exclude patterns
1424 that were set. The scanning code is localized so it can be made
1425 system-specific. */
1426
1427 if ((sep = isdirectory(pathname)) != 0)
1428 {
1429 if (dee_action == dee_SKIP) return 1;
1430 if (dee_action == dee_RECURSE)
1431 {
1432 char buffer[1024];
1433 char *nextfile;
1434 directory_type *dir = opendirectory(pathname);
1435
1436 if (dir == NULL)
1437 {
1438 if (!silent)
1439 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1440 strerror(errno));
1441 return 2;
1442 }
1443
1444 while ((nextfile = readdirectory(dir)) != NULL)
1445 {
1446 int frc, nflen;
1447 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1448 nflen = strlen(nextfile);
1449
1450 if (isdirectory(buffer))
1451 {
1452 if (exclude_dir_compiled != NULL &&
1453 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1454 continue;
1455
1456 if (include_dir_compiled != NULL &&
1457 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1458 continue;
1459 }
1460 else
1461 {
1462 if (exclude_compiled != NULL &&
1463 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1464 continue;
1465
1466 if (include_compiled != NULL &&
1467 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1468 continue;
1469 }
1470
1471 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1472 if (frc > 1) rc = frc;
1473 else if (frc == 0 && rc == 1) rc = 0;
1474 }
1475
1476 closedirectory(dir);
1477 return rc;
1478 }
1479 }
1480
1481 /* If the file is not a directory and not a regular file, skip it if that's
1482 been requested. */
1483
1484 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1485
1486 /* Control reaches here if we have a regular file, or if we have a directory
1487 and recursion or skipping was not requested, or if we have anything else and
1488 skipping was not requested. The scan proceeds. If this is the first and only
1489 argument at top level, we don't show the file name, unless we are only showing
1490 the file name, or the filename was forced (-H). */
1491
1492 pathlen = strlen(pathname);
1493
1494 /* Open using zlib if it is supported and the file name ends with .gz. */
1495
1496 #ifdef SUPPORT_LIBZ
1497 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1498 {
1499 ingz = gzopen(pathname, "rb");
1500 if (ingz == NULL)
1501 {
1502 if (!silent)
1503 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1504 strerror(errno));
1505 return 2;
1506 }
1507 handle = (void *)ingz;
1508 frtype = FR_LIBZ;
1509 }
1510 else
1511 #endif
1512
1513 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1514
1515 #ifdef SUPPORT_LIBBZ2
1516 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1517 {
1518 inbz2 = BZ2_bzopen(pathname, "rb");
1519 handle = (void *)inbz2;
1520 frtype = FR_LIBBZ2;
1521 }
1522 else
1523 #endif
1524
1525 /* Otherwise use plain fopen(). The label is so that we can come back here if
1526 an attempt to read a .bz2 file indicates that it really is a plain file. */
1527
1528 #ifdef SUPPORT_LIBBZ2
1529 PLAIN_FILE:
1530 #endif
1531 {
1532 in = fopen(pathname, "r");
1533 handle = (void *)in;
1534 frtype = FR_PLAIN;
1535 }
1536
1537 /* All the opening methods return errno when they fail. */
1538
1539 if (handle == NULL)
1540 {
1541 if (!silent)
1542 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1543 strerror(errno));
1544 return 2;
1545 }
1546
1547 /* Now grep the file */
1548
1549 rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1550 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1551
1552 /* Close in an appropriate manner. */
1553
1554 #ifdef SUPPORT_LIBZ
1555 if (frtype == FR_LIBZ)
1556 gzclose(ingz);
1557 else
1558 #endif
1559
1560 /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1561 read failed. If the error indicates that the file isn't in fact bzipped, try
1562 again as a normal file. */
1563
1564 #ifdef SUPPORT_LIBBZ2
1565 if (frtype == FR_LIBBZ2)
1566 {
1567 if (rc == 2)
1568 {
1569 int errnum;
1570 const char *err = BZ2_bzerror(inbz2, &errnum);
1571 if (errnum == BZ_DATA_ERROR_MAGIC)
1572 {
1573 BZ2_bzclose(inbz2);
1574 goto PLAIN_FILE;
1575 }
1576 else if (!silent)
1577 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1578 pathname, err);
1579 }
1580 BZ2_bzclose(inbz2);
1581 }
1582 else
1583 #endif
1584
1585 /* Normal file close */
1586
1587 fclose(in);
1588
1589 /* Pass back the yield from pcregrep(). */
1590
1591 return rc;
1592 }
1593
1594
1595
1596
1597 /*************************************************
1598 * Usage function *
1599 *************************************************/
1600
1601 static int
1602 usage(int rc)
1603 {
1604 option_item *op;
1605 fprintf(stderr, "Usage: pcregrep [-");
1606 for (op = optionlist; op->one_char != 0; op++)
1607 {
1608 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1609 }
1610 fprintf(stderr, "] [long options] [pattern] [files]\n");
1611 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1612 "options.\n");
1613 return rc;
1614 }
1615
1616
1617
1618
1619 /*************************************************
1620 * Help function *
1621 *************************************************/
1622
1623 static void
1624 help(void)
1625 {
1626 option_item *op;
1627
1628 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1629 printf("Search for PATTERN in each FILE or standard input.\n");
1630 printf("PATTERN must be present if neither -e nor -f is used.\n");
1631 printf("\"-\" can be used as a file name to mean STDIN.\n");
1632
1633 #ifdef SUPPORT_LIBZ
1634 printf("Files whose names end in .gz are read using zlib.\n");
1635 #endif
1636
1637 #ifdef SUPPORT_LIBBZ2
1638 printf("Files whose names end in .bz2 are read using bzlib2.\n");
1639 #endif
1640
1641 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1642 printf("Other files and the standard input are read as plain files.\n\n");
1643 #else
1644 printf("All files are read as plain files, without any interpretation.\n\n");
1645 #endif
1646
1647 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1648 printf("Options:\n");
1649
1650 for (op = optionlist; op->one_char != 0; op++)
1651 {
1652 int n;
1653 char s[4];
1654 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1655 n = 30 - printf(" %s --%s", s, op->long_name);
1656 if (n < 1) n = 1;
1657 printf("%.*s%s\n", n, " ", op->help_text);
1658 }
1659
1660 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1661 printf("trailing white space is removed and blank lines are ignored.\n");
1662 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1663
1664 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1665 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1666 }
1667
1668
1669
1670
1671 /*************************************************
1672 * Handle a single-letter, no data option *
1673 *************************************************/
1674
1675 static int
1676 handle_option(int letter, int options)
1677 {
1678 switch(letter)
1679 {
1680 case N_FOFFSETS: file_offsets = TRUE; break;
1681 case N_HELP: help(); exit(0);
1682 case N_LOFFSETS: line_offsets = number = TRUE; break;
1683 case 'c': count_only = TRUE; break;
1684 case 'F': process_options |= PO_FIXED_STRINGS; break;
1685 case 'H': filenames = FN_FORCE; break;
1686 case 'h': filenames = FN_NONE; break;
1687 case 'i': options |= PCRE_CASELESS; break;
1688 case 'l': filenames = FN_ONLY; break;
1689 case 'L': filenames = FN_NOMATCH_ONLY; break;
1690 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1691 case 'n': number = TRUE; break;
1692 case 'o': only_matching = TRUE; break;
1693 case 'q': quiet = TRUE; break;
1694 case 'r': dee_action = dee_RECURSE; break;
1695 case 's': silent = TRUE; break;
1696 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1697 case 'v': invert = TRUE; break;
1698 case 'w': process_options |= PO_WORD_MATCH; break;
1699 case 'x': process_options |= PO_LINE_MATCH; break;
1700
1701 case 'V':
1702 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1703 exit(0);
1704 break;
1705
1706 default:
1707 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1708 exit(usage(2));
1709 }
1710
1711 return options;
1712 }
1713
1714
1715
1716
1717 /*************************************************
1718 * Construct printed ordinal *
1719 *************************************************/
1720
1721 /* This turns a number into "1st", "3rd", etc. */
1722
1723 static char *
1724 ordin(int n)
1725 {
1726 static char buffer[8];
1727 char *p = buffer;
1728 sprintf(p, "%d", n);
1729 while (*p != 0) p++;
1730 switch (n%10)
1731 {
1732 case 1: strcpy(p, "st"); break;
1733 case 2: strcpy(p, "nd"); break;
1734 case 3: strcpy(p, "rd"); break;
1735 default: strcpy(p, "th"); break;
1736 }
1737 return buffer;
1738 }
1739
1740
1741
1742 /*************************************************
1743 * Compile a single pattern *
1744 *************************************************/
1745
1746 /* When the -F option has been used, this is called for each substring.
1747 Otherwise it's called for each supplied pattern.
1748
1749 Arguments:
1750 pattern the pattern string
1751 options the PCRE options
1752 filename the file name, or NULL for a command-line pattern
1753 count 0 if this is the only command line pattern, or
1754 number of the command line pattern, or
1755 linenumber for a pattern from a file
1756
1757 Returns: TRUE on success, FALSE after an error
1758 */
1759
1760 static BOOL
1761 compile_single_pattern(char *pattern, int options, char *filename, int count)
1762 {
1763 char buffer[MBUFTHIRD + 16];
1764 const char *error;
1765 int errptr;
1766
1767 if (pattern_count >= MAX_PATTERN_COUNT)
1768 {
1769 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1770 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1771 return FALSE;
1772 }
1773
1774 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1775 suffix[process_options]);
1776 pattern_list[pattern_count] =
1777 pcre_compile(buffer, options, &error, &errptr, pcretables);
1778 if (pattern_list[pattern_count] != NULL)
1779 {
1780 pattern_count++;
1781 return TRUE;
1782 }
1783
1784 /* Handle compile errors */
1785
1786 errptr -= (int)strlen(prefix[process_options]);
1787 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1788
1789 if (filename == NULL)
1790 {
1791 if (count == 0)
1792 fprintf(stderr, "pcregrep: Error in command-line regex "
1793 "at offset %d: %s\n", errptr, error);
1794 else
1795 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1796 "at offset %d: %s\n", ordin(count), errptr, error);
1797 }
1798 else
1799 {
1800 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1801 "at offset %d: %s\n", count, filename, errptr, error);
1802 }
1803
1804 return FALSE;
1805 }
1806
1807
1808
1809 /*************************************************
1810 * Compile one supplied pattern *
1811 *************************************************/
1812
1813 /* When the -F option has been used, each string may be a list of strings,
1814 separated by line breaks. They will be matched literally.
1815
1816 Arguments:
1817 pattern the pattern string
1818 options the PCRE options
1819 filename the file name, or NULL for a command-line pattern
1820 count 0 if this is the only command line pattern, or
1821 number of the command line pattern, or
1822 linenumber for a pattern from a file
1823
1824 Returns: TRUE on success, FALSE after an error
1825 */
1826
1827 static BOOL
1828 compile_pattern(char *pattern, int options, char *filename, int count)
1829 {
1830 if ((process_options & PO_FIXED_STRINGS) != 0)
1831 {
1832 char *eop = pattern + strlen(pattern);
1833 char buffer[MBUFTHIRD];
1834 for(;;)
1835 {
1836 int ellength;
1837 char *p = end_of_line(pattern, eop, &ellength);
1838 if (ellength == 0)
1839 return compile_single_pattern(pattern, options, filename, count);
1840 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1841 pattern = p;
1842 if (!compile_single_pattern(buffer, options, filename, count))
1843 return FALSE;
1844 }
1845 }
1846 else return compile_single_pattern(pattern, options, filename, count);
1847 }
1848
1849
1850
1851 /*************************************************
1852 * Main program *
1853 *************************************************/
1854
1855 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1856
1857 int
1858 main(int argc, char **argv)
1859 {
1860 int i, j;
1861 int rc = 1;
1862 int pcre_options = 0;
1863 int cmd_pattern_count = 0;
1864 int hint_count = 0;
1865 int errptr;
1866 BOOL only_one_at_top;
1867 char *patterns[MAX_PATTERN_COUNT];
1868 const char *locale_from = "--locale";
1869 const char *error;
1870
1871 /* Set the default line ending value from the default in the PCRE library;
1872 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1873 */
1874
1875 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1876 switch(i)
1877 {
1878 default: newline = (char *)"lf"; break;
1879 case '\r': newline = (char *)"cr"; break;
1880 case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1881 case -1: newline = (char *)"any"; break;
1882 case -2: newline = (char *)"anycrlf"; break;
1883 }
1884
1885 /* Process the options */
1886
1887 for (i = 1; i < argc; i++)
1888 {
1889 option_item *op = NULL;
1890 char *option_data = (char *)""; /* default to keep compiler happy */
1891 BOOL longop;
1892 BOOL longopwasequals = FALSE;
1893
1894 if (argv[i][0] != '-') break;
1895
1896 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1897 but only if we have previously had -e or -f to define the patterns. */
1898
1899 if (argv[i][1] == 0)
1900 {
1901 if (pattern_filename != NULL || pattern_count > 0) break;
1902 else exit(usage(2));
1903 }
1904
1905 /* Handle a long name option, or -- to terminate the options */
1906
1907 if (argv[i][1] == '-')
1908 {
1909 char *arg = argv[i] + 2;
1910 char *argequals = strchr(arg, '=');
1911
1912 if (*arg == 0) /* -- terminates options */
1913 {
1914 i++;
1915 break; /* out of the options-handling loop */
1916 }
1917
1918 longop = TRUE;
1919
1920 /* Some long options have data that follows after =, for example file=name.
1921 Some options have variations in the long name spelling: specifically, we
1922 allow "regexp" because GNU grep allows it, though I personally go along
1923 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1924 These options are entered in the table as "regex(p)". No option is in both
1925 these categories, fortunately. */
1926
1927 for (op = optionlist; op->one_char != 0; op++)
1928 {
1929 char *opbra = strchr(op->long_name, '(');
1930 char *equals = strchr(op->long_name, '=');
1931 if (opbra == NULL) /* Not a (p) case */
1932 {
1933 if (equals == NULL) /* Not thing=data case */
1934 {
1935 if (strcmp(arg, op->long_name) == 0) break;
1936 }
1937 else /* Special case xxx=data */
1938 {
1939 int oplen = equals - op->long_name;
1940 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1941 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1942 {
1943 option_data = arg + arglen;
1944 if (*option_data == '=')
1945 {
1946 option_data++;
1947 longopwasequals = TRUE;
1948 }
1949 break;
1950 }
1951 }
1952 }
1953 else /* Special case xxxx(p) */
1954 {
1955 char buff1[24];
1956 char buff2[24];
1957 int baselen = opbra - op->long_name;
1958 sprintf(buff1, "%.*s", baselen, op->long_name);
1959 sprintf(buff2, "%s%.*s", buff1,
1960 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1961 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1962 break;
1963 }
1964 }
1965
1966 if (op->one_char == 0)
1967 {
1968 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1969 exit(usage(2));
1970 }
1971 }
1972
1973
1974 /* Jeffrey Friedl's debugging harness uses these additional options which
1975 are not in the right form for putting in the option table because they use
1976 only one hyphen, yet are more than one character long. By putting them
1977 separately here, they will not get displayed as part of the help() output,
1978 but I don't think Jeffrey will care about that. */
1979
1980 #ifdef JFRIEDL_DEBUG
1981 else if (strcmp(argv[i], "-pre") == 0) {
1982 jfriedl_prefix = argv[++i];
1983 continue;
1984 } else if (strcmp(argv[i], "-post") == 0) {
1985 jfriedl_postfix = argv[++i];
1986 continue;
1987 } else if (strcmp(argv[i], "-XT") == 0) {
1988 sscanf(argv[++i], "%d", &jfriedl_XT);
1989 continue;
1990 } else if (strcmp(argv[i], "-XR") == 0) {
1991 sscanf(argv[++i], "%d", &jfriedl_XR);
1992 continue;
1993 }
1994 #endif
1995
1996
1997 /* One-char options; many that have no data may be in a single argument; we
1998 continue till we hit the last one or one that needs data. */
1999
2000 else
2001 {
2002 char *s = argv[i] + 1;
2003 longop = FALSE;
2004 while (*s != 0)
2005 {
2006 for (op = optionlist; op->one_char != 0; op++)
2007 { if (*s == op->one_char) break; }
2008 if (op->one_char == 0)
2009 {
2010 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2011 *s, argv[i]);
2012 exit(usage(2));
2013 }
2014 if (op->type != OP_NODATA || s[1] == 0)
2015 {
2016 option_data = s+1;
2017 break;
2018 }
2019 pcre_options = handle_option(*s++, pcre_options);
2020 }
2021 }
2022
2023 /* At this point we should have op pointing to a matched option. If the type
2024 is NO_DATA, it means that there is no data, and the option might set
2025 something in the PCRE options. */
2026
2027 if (op->type == OP_NODATA)
2028 {
2029 pcre_options = handle_option(op->one_char, pcre_options);
2030 continue;
2031 }
2032
2033 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2034 either has a value or defaults to something. It cannot have data in a
2035 separate item. At the moment, the only such options are "colo(u)r" and
2036 Jeffrey Friedl's special -S debugging option. */
2037
2038 if (*option_data == 0 &&
2039 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2040 {
2041 switch (op->one_char)
2042 {
2043 case N_COLOUR:
2044 colour_option = (char *)"auto";
2045 break;
2046 #ifdef JFRIEDL_DEBUG
2047 case 'S':
2048 S_arg = 0;
2049 break;
2050 #endif
2051 }
2052 continue;
2053 }
2054
2055 /* Otherwise, find the data string for the option. */
2056
2057 if (*option_data == 0)
2058 {
2059 if (i >= argc - 1 || longopwasequals)
2060 {
2061 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2062 exit(usage(2));
2063 }
2064 option_data = argv[++i];
2065 }
2066
2067 /* If the option type is OP_PATLIST, it's the -e option, which can be called
2068 multiple times to create a list of patterns. */
2069
2070 if (op->type == OP_PATLIST)
2071 {
2072 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2073 {
2074 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2075 MAX_PATTERN_COUNT);
2076 return 2;
2077 }
2078 patterns[cmd_pattern_count++] = option_data;
2079 }
2080
2081 /* Otherwise, deal with single string or numeric data values. */
2082
2083 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2084 {
2085 *((char **)op->dataptr) = option_data;
2086 }
2087 else
2088 {
2089 char *endptr;
2090 int n = strtoul(option_data, &endptr, 10);
2091 if (*endptr != 0)
2092 {
2093 if (longop)
2094 {
2095 char *equals = strchr(op->long_name, '=');
2096 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2097 equals - op->long_name;
2098 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2099 option_data, nlen, op->long_name);
2100 }
2101 else
2102 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2103 option_data, op->one_char);
2104 exit(usage(2));
2105 }
2106 *((int *)op->dataptr) = n;
2107 }
2108 }
2109
2110 /* Options have been decoded. If -C was used, its value is used as a default
2111 for -A and -B. */
2112
2113 if (both_context > 0)
2114 {
2115 if (after_context == 0) after_context = both_context;
2116 if (before_context == 0) before_context = both_context;
2117 }
2118
2119 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2120 However, the latter two set the only_matching flag. */
2121
2122 if ((only_matching && (file_offsets || line_offsets)) ||
2123 (file_offsets && line_offsets))
2124 {
2125 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2126 "and/or --line-offsets\n");
2127 exit(usage(2));
2128 }
2129
2130 if (file_offsets || line_offsets) only_matching = TRUE;
2131
2132 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2133 LC_ALL environment variable is set, and if so, use it. */
2134
2135 if (locale == NULL)
2136 {
2137 locale = getenv("LC_ALL");
2138 locale_from = "LCC_ALL";
2139 }
2140
2141 if (locale == NULL)
2142 {
2143 locale = getenv("LC_CTYPE");
2144 locale_from = "LC_CTYPE";
2145 }
2146
2147 /* If a locale has been provided, set it, and generate the tables the PCRE
2148 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2149
2150 if (locale != NULL)
2151 {
2152 if (setlocale(LC_CTYPE, locale) == NULL)
2153 {
2154 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2155 locale, locale_from);
2156 return 2;
2157 }
2158 pcretables = pcre_maketables();
2159 }
2160
2161 /* Sort out colouring */
2162
2163 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2164 {
2165 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2166 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2167 else
2168 {
2169 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2170 colour_option);
2171 return 2;
2172 }
2173 if (do_colour)
2174 {
2175 char *cs = getenv("PCREGREP_COLOUR");
2176 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2177 if (cs != NULL) colour_string = cs;
2178 }
2179 }
2180
2181 /* Interpret the newline type; the default settings are Unix-like. */
2182
2183 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2184 {
2185 pcre_options |= PCRE_NEWLINE_CR;
2186 endlinetype = EL_CR;
2187 }
2188 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2189 {
2190 pcre_options |= PCRE_NEWLINE_LF;
2191 endlinetype = EL_LF;
2192 }
2193 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2194 {
2195 pcre_options |= PCRE_NEWLINE_CRLF;
2196 endlinetype = EL_CRLF;
2197 }
2198 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2199 {
2200 pcre_options |= PCRE_NEWLINE_ANY;
2201 endlinetype = EL_ANY;
2202 }
2203 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2204 {
2205 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2206 endlinetype = EL_ANYCRLF;
2207 }
2208 else
2209 {
2210 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2211 return 2;
2212 }
2213
2214 /* Interpret the text values for -d and -D */
2215
2216 if (dee_option != NULL)
2217 {
2218 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2219 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2220 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2221 else
2222 {
2223 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2224 return 2;
2225 }
2226 }
2227
2228 if (DEE_option != NULL)
2229 {
2230 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2231 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2232 else
2233 {
2234 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2235 return 2;
2236 }
2237 }
2238
2239 /* Check the values for Jeffrey Friedl's debugging options. */
2240
2241 #ifdef JFRIEDL_DEBUG
2242 if (S_arg > 9)
2243 {
2244 fprintf(stderr, "pcregrep: bad value for -S option\n");
2245 return 2;
2246 }
2247 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2248 {
2249 if (jfriedl_XT == 0) jfriedl_XT = 1;
2250 if (jfriedl_XR == 0) jfriedl_XR = 1;
2251 }
2252 #endif
2253
2254 /* Get memory to store the pattern and hints lists. */
2255
2256 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2257 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2258
2259 if (pattern_list == NULL || hints_list == NULL)
2260 {
2261 fprintf(stderr, "pcregrep: malloc failed\n");
2262 goto EXIT2;
2263 }
2264
2265 /* If no patterns were provided by -e, and there is no file provided by -f,
2266 the first argument is the one and only pattern, and it must exist. */
2267
2268 if (cmd_pattern_count == 0 && pattern_filename == NULL)
2269 {
2270 if (i >= argc) return usage(2);
2271 patterns[cmd_pattern_count++] = argv[i++];
2272 }
2273
2274 /* Compile the patterns that were provided on the command line, either by
2275 multiple uses of -e or as a single unkeyed pattern. */
2276
2277 for (j = 0; j < cmd_pattern_count; j++)
2278 {
2279 if (!compile_pattern(patterns[j], pcre_options, NULL,
2280 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2281 goto EXIT2;
2282 }
2283
2284 /* Compile the regular expressions that are provided in a file. */
2285
2286 if (pattern_filename != NULL)
2287 {
2288 int linenumber = 0;
2289 FILE *f;
2290 char *filename;
2291 char buffer[MBUFTHIRD];
2292
2293 if (strcmp(pattern_filename, "-") == 0)
2294 {
2295 f = stdin;
2296 filename = stdin_name;
2297 }
2298 else
2299 {
2300 f = fopen(pattern_filename, "r");
2301 if (f == NULL)
2302 {
2303 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2304 strerror(errno));
2305 goto EXIT2;
2306 }
2307 filename = pattern_filename;
2308 }
2309
2310 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2311 {
2312 char *s = buffer + (int)strlen(buffer);
2313 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2314 *s = 0;
2315 linenumber++;
2316 if (buffer[0] == 0) continue; /* Skip blank lines */
2317 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2318 goto EXIT2;
2319 }
2320
2321 if (f != stdin) fclose(f);
2322 }
2323
2324 /* Study the regular expressions, as we will be running them many times */
2325
2326 for (j = 0; j < pattern_count; j++)
2327 {
2328 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2329 if (error != NULL)
2330 {
2331 char s[16];
2332 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2333 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2334 goto EXIT2;
2335 }
2336 hint_count++;
2337 }
2338
2339 /* If there are include or exclude patterns, compile them. */
2340
2341 if (exclude_pattern != NULL)
2342 {
2343 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2344 pcretables);
2345 if (exclude_compiled == NULL)
2346 {
2347 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2348 errptr, error);
2349 goto EXIT2;
2350 }
2351 }
2352
2353 if (include_pattern != NULL)
2354 {
2355 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2356 pcretables);
2357 if (include_compiled == NULL)
2358 {
2359 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2360 errptr, error);
2361 goto EXIT2;
2362 }
2363 }
2364
2365 if (exclude_dir_pattern != NULL)
2366 {
2367 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2368 pcretables);
2369 if (exclude_dir_compiled == NULL)
2370 {
2371 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2372 errptr, error);
2373 goto EXIT2;
2374 }
2375 }
2376
2377 if (include_dir_pattern != NULL)
2378 {
2379 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2380 pcretables);
2381 if (include_dir_compiled == NULL)
2382 {
2383 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2384 errptr, error);
2385 goto EXIT2;
2386 }
2387 }
2388
2389 /* If there are no further arguments, do the business on stdin and exit. */
2390
2391 if (i >= argc)
2392 {
2393 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2394 goto EXIT;
2395 }
2396
2397 /* Otherwise, work through the remaining arguments as files or directories.
2398 Pass in the fact that there is only one argument at top level - this suppresses
2399 the file name if the argument is not a directory and filenames are not
2400 otherwise forced. */
2401
2402 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2403
2404 for (; i < argc; i++)
2405 {
2406 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2407 only_one_at_top);
2408 if (frc > 1) rc = frc;
2409 else if (frc == 0 && rc == 1) rc = 0;
2410 }
2411
2412 EXIT:
2413 if (pattern_list != NULL)
2414 {
2415 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2416 free(pattern_list);
2417 }
2418 if (hints_list != NULL)
2419 {
2420 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2421 free(hints_list);
2422 }
2423 return rc;
2424
2425 EXIT2:
2426 rc = 2;
2427 goto EXIT;
2428 }
2429
2430 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5