/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 283 - (show annotations)
Fri Dec 7 19:59:19 2007 UTC (7 years, 7 months ago) by ph10
File MIME type: text/plain
File size: 61706 byte(s)
Error occurred while calculating annotation data.
David Byron's patch for typos and one re-arrangement in Windows code in 
pcregrep.
1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2007 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #include "pcre.h"
59
60 #define FALSE 0
61 #define TRUE 1
62
63 typedef int BOOL;
64
65 #define MAX_PATTERN_COUNT 100
66
67 #if BUFSIZ > 8192
68 #define MBUFTHIRD BUFSIZ
69 #else
70 #define MBUFTHIRD 8192
71 #endif
72
73 /* Values for the "filenames" variable, which specifies options for file name
74 output. The order is important; it is assumed that a file name is wanted for
75 all values greater than FN_DEFAULT. */
76
77 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
78
79 /* Actions for the -d and -D options */
80
81 enum { dee_READ, dee_SKIP, dee_RECURSE };
82 enum { DEE_READ, DEE_SKIP };
83
84 /* Actions for special processing options (flag bits) */
85
86 #define PO_WORD_MATCH 0x0001
87 #define PO_LINE_MATCH 0x0002
88 #define PO_FIXED_STRINGS 0x0004
89
90 /* Line ending types */
91
92 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93
94
95
96 /*************************************************
97 * Global variables *
98 *************************************************/
99
100 /* Jeffrey Friedl has some debugging requirements that are not part of the
101 regular code. */
102
103 #ifdef JFRIEDL_DEBUG
104 static int S_arg = -1;
105 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
106 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
107 static const char *jfriedl_prefix = "";
108 static const char *jfriedl_postfix = "";
109 #endif
110
111 static int endlinetype;
112
113 static char *colour_string = (char *)"1;31";
114 static char *colour_option = NULL;
115 static char *dee_option = NULL;
116 static char *DEE_option = NULL;
117 static char *newline = NULL;
118 static char *pattern_filename = NULL;
119 static char *stdin_name = (char *)"(standard input)";
120 static char *locale = NULL;
121
122 static const unsigned char *pcretables = NULL;
123
124 static int pattern_count = 0;
125 static pcre **pattern_list = NULL;
126 static pcre_extra **hints_list = NULL;
127
128 static char *include_pattern = NULL;
129 static char *exclude_pattern = NULL;
130
131 static pcre *include_compiled = NULL;
132 static pcre *exclude_compiled = NULL;
133
134 static int after_context = 0;
135 static int before_context = 0;
136 static int both_context = 0;
137 static int dee_action = dee_READ;
138 static int DEE_action = DEE_READ;
139 static int error_count = 0;
140 static int filenames = FN_DEFAULT;
141 static int process_options = 0;
142
143 static BOOL count_only = FALSE;
144 static BOOL do_colour = FALSE;
145 static BOOL file_offsets = FALSE;
146 static BOOL hyphenpending = FALSE;
147 static BOOL invert = FALSE;
148 static BOOL line_offsets = FALSE;
149 static BOOL multiline = FALSE;
150 static BOOL number = FALSE;
151 static BOOL only_matching = FALSE;
152 static BOOL quiet = FALSE;
153 static BOOL silent = FALSE;
154 static BOOL utf8 = FALSE;
155
156 /* Structure for options and list of them */
157
158 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
159 OP_PATLIST };
160
161 typedef struct option_item {
162 int type;
163 int one_char;
164 void *dataptr;
165 const char *long_name;
166 const char *help_text;
167 } option_item;
168
169 /* Options without a single-letter equivalent get a negative value. This can be
170 used to identify them. */
171
172 #define N_COLOUR (-1)
173 #define N_EXCLUDE (-2)
174 #define N_HELP (-3)
175 #define N_INCLUDE (-4)
176 #define N_LABEL (-5)
177 #define N_LOCALE (-6)
178 #define N_NULL (-7)
179 #define N_LOFFSETS (-8)
180 #define N_FOFFSETS (-9)
181
182 static option_item optionlist[] = {
183 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
184 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
185 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
186 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
187 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
188 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
189 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
190 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
191 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
192 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
193 { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
194 { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
195 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
196 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
197 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
198 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
199 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
200 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
201 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
202 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
203 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
204 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
205 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
206 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
207 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
208 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
209 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
210 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
211 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
212 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
213 #ifdef JFRIEDL_DEBUG
214 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
215 #endif
216 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
217 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
218 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
219 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
220 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
221 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
222 { OP_NODATA, 0, NULL, NULL, NULL }
223 };
224
225 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
226 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
227 that the combination of -w and -x has the same effect as -x on its own, so we
228 can treat them as the same. */
229
230 static const char *prefix[] = {
231 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
232
233 static const char *suffix[] = {
234 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
235
236 /* UTF-8 tables - used only when the newline setting is "any". */
237
238 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
239
240 const char utf8_table4[] = {
241 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
242 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
243 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
244 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
245
246
247
248 /*************************************************
249 * OS-specific functions *
250 *************************************************/
251
252 /* These functions are defined so that they can be made system specific,
253 although at present the only ones are for Unix, Win32, and for "no support". */
254
255
256 /************* Directory scanning in Unix ***********/
257
258 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
259 #include <sys/types.h>
260 #include <sys/stat.h>
261 #include <dirent.h>
262
263 typedef DIR directory_type;
264
265 static int
266 isdirectory(char *filename)
267 {
268 struct stat statbuf;
269 if (stat(filename, &statbuf) < 0)
270 return 0; /* In the expectation that opening as a file will fail */
271 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
272 }
273
274 static directory_type *
275 opendirectory(char *filename)
276 {
277 return opendir(filename);
278 }
279
280 static char *
281 readdirectory(directory_type *dir)
282 {
283 for (;;)
284 {
285 struct dirent *dent = readdir(dir);
286 if (dent == NULL) return NULL;
287 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
288 return dent->d_name;
289 }
290 /* Control never reaches here */
291 }
292
293 static void
294 closedirectory(directory_type *dir)
295 {
296 closedir(dir);
297 }
298
299
300 /************* Test for regular file in Unix **********/
301
302 static int
303 isregfile(char *filename)
304 {
305 struct stat statbuf;
306 if (stat(filename, &statbuf) < 0)
307 return 1; /* In the expectation that opening as a file will fail */
308 return (statbuf.st_mode & S_IFMT) == S_IFREG;
309 }
310
311
312 /************* Test stdout for being a terminal in Unix **********/
313
314 static BOOL
315 is_stdout_tty(void)
316 {
317 return isatty(fileno(stdout));
318 }
319
320
321 /************* Directory scanning in Win32 ***********/
322
323 /* I (Philip Hazel) have no means of testing this code. It was contributed by
324 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
325 when it did not exist. David Byron added a patch that moved the #include of
326 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
327 */
328
329 #elif HAVE_WINDOWS_H
330
331 #ifndef STRICT
332 # define STRICT
333 #endif
334 #ifndef WIN32_LEAN_AND_MEAN
335 # define WIN32_LEAN_AND_MEAN
336 #endif
337
338 #include <windows.h>
339
340 #ifndef INVALID_FILE_ATTRIBUTES
341 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
342 #endif
343
344 typedef struct directory_type
345 {
346 HANDLE handle;
347 BOOL first;
348 WIN32_FIND_DATA data;
349 } directory_type;
350
351 int
352 isdirectory(char *filename)
353 {
354 DWORD attr = GetFileAttributes(filename);
355 if (attr == INVALID_FILE_ATTRIBUTES)
356 return 0;
357 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
358 }
359
360 directory_type *
361 opendirectory(char *filename)
362 {
363 size_t len;
364 char *pattern;
365 directory_type *dir;
366 DWORD err;
367 len = strlen(filename);
368 pattern = (char *) malloc(len + 3);
369 dir = (directory_type *) malloc(sizeof(*dir));
370 if ((pattern == NULL) || (dir == NULL))
371 {
372 fprintf(stderr, "pcregrep: malloc failed\n");
373 exit(2);
374 }
375 memcpy(pattern, filename, len);
376 memcpy(&(pattern[len]), "\\*", 3);
377 dir->handle = FindFirstFile(pattern, &(dir->data));
378 if (dir->handle != INVALID_HANDLE_VALUE)
379 {
380 free(pattern);
381 dir->first = TRUE;
382 return dir;
383 }
384 err = GetLastError();
385 free(pattern);
386 free(dir);
387 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
388 return NULL;
389 }
390
391 char *
392 readdirectory(directory_type *dir)
393 {
394 for (;;)
395 {
396 if (!dir->first)
397 {
398 if (!FindNextFile(dir->handle, &(dir->data)))
399 return NULL;
400 }
401 else
402 {
403 dir->first = FALSE;
404 }
405 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
406 return dir->data.cFileName;
407 }
408 #ifndef _MSC_VER
409 return NULL; /* Keep compiler happy; never executed */
410 #endif
411 }
412
413 void
414 closedirectory(directory_type *dir)
415 {
416 FindClose(dir->handle);
417 free(dir);
418 }
419
420
421 /************* Test for regular file in Win32 **********/
422
423 /* I don't know how to do this, or if it can be done; assume all paths are
424 regular if they are not directories. */
425
426 int isregfile(char *filename)
427 {
428 return !isdirectory(filename);
429 }
430
431
432 /************* Test stdout for being a terminal in Win32 **********/
433
434 /* I don't know how to do this; assume never */
435
436 static BOOL
437 is_stdout_tty(void)
438 {
439 return FALSE;
440 }
441
442
443 /************* Directory scanning when we can't do it ***********/
444
445 /* The type is void, and apart from isdirectory(), the functions do nothing. */
446
447 #else
448
449 typedef void directory_type;
450
451 int isdirectory(char *filename) { return 0; }
452 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
453 char *readdirectory(directory_type *dir) { return (char*)0;}
454 void closedirectory(directory_type *dir) {}
455
456
457 /************* Test for regular when we can't do it **********/
458
459 /* Assume all files are regular. */
460
461 int isregfile(char *filename) { return 1; }
462
463
464 /************* Test stdout for being a terminal when we can't do it **********/
465
466 static BOOL
467 is_stdout_tty(void)
468 {
469 return FALSE;
470 }
471
472
473 #endif
474
475
476
477 #ifndef HAVE_STRERROR
478 /*************************************************
479 * Provide strerror() for non-ANSI libraries *
480 *************************************************/
481
482 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
483 in their libraries, but can provide the same facility by this simple
484 alternative function. */
485
486 extern int sys_nerr;
487 extern char *sys_errlist[];
488
489 char *
490 strerror(int n)
491 {
492 if (n < 0 || n >= sys_nerr) return "unknown error number";
493 return sys_errlist[n];
494 }
495 #endif /* HAVE_STRERROR */
496
497
498
499 /*************************************************
500 * Find end of line *
501 *************************************************/
502
503 /* The length of the endline sequence that is found is set via lenptr. This may
504 be zero at the very end of the file if there is no line-ending sequence there.
505
506 Arguments:
507 p current position in line
508 endptr end of available data
509 lenptr where to put the length of the eol sequence
510
511 Returns: pointer to the last byte of the line
512 */
513
514 static char *
515 end_of_line(char *p, char *endptr, int *lenptr)
516 {
517 switch(endlinetype)
518 {
519 default: /* Just in case */
520 case EL_LF:
521 while (p < endptr && *p != '\n') p++;
522 if (p < endptr)
523 {
524 *lenptr = 1;
525 return p + 1;
526 }
527 *lenptr = 0;
528 return endptr;
529
530 case EL_CR:
531 while (p < endptr && *p != '\r') p++;
532 if (p < endptr)
533 {
534 *lenptr = 1;
535 return p + 1;
536 }
537 *lenptr = 0;
538 return endptr;
539
540 case EL_CRLF:
541 for (;;)
542 {
543 while (p < endptr && *p != '\r') p++;
544 if (++p >= endptr)
545 {
546 *lenptr = 0;
547 return endptr;
548 }
549 if (*p == '\n')
550 {
551 *lenptr = 2;
552 return p + 1;
553 }
554 }
555 break;
556
557 case EL_ANYCRLF:
558 while (p < endptr)
559 {
560 int extra = 0;
561 register int c = *((unsigned char *)p);
562
563 if (utf8 && c >= 0xc0)
564 {
565 int gcii, gcss;
566 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
567 gcss = 6*extra;
568 c = (c & utf8_table3[extra]) << gcss;
569 for (gcii = 1; gcii <= extra; gcii++)
570 {
571 gcss -= 6;
572 c |= (p[gcii] & 0x3f) << gcss;
573 }
574 }
575
576 p += 1 + extra;
577
578 switch (c)
579 {
580 case 0x0a: /* LF */
581 *lenptr = 1;
582 return p;
583
584 case 0x0d: /* CR */
585 if (p < endptr && *p == 0x0a)
586 {
587 *lenptr = 2;
588 p++;
589 }
590 else *lenptr = 1;
591 return p;
592
593 default:
594 break;
595 }
596 } /* End of loop for ANYCRLF case */
597
598 *lenptr = 0; /* Must have hit the end */
599 return endptr;
600
601 case EL_ANY:
602 while (p < endptr)
603 {
604 int extra = 0;
605 register int c = *((unsigned char *)p);
606
607 if (utf8 && c >= 0xc0)
608 {
609 int gcii, gcss;
610 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
611 gcss = 6*extra;
612 c = (c & utf8_table3[extra]) << gcss;
613 for (gcii = 1; gcii <= extra; gcii++)
614 {
615 gcss -= 6;
616 c |= (p[gcii] & 0x3f) << gcss;
617 }
618 }
619
620 p += 1 + extra;
621
622 switch (c)
623 {
624 case 0x0a: /* LF */
625 case 0x0b: /* VT */
626 case 0x0c: /* FF */
627 *lenptr = 1;
628 return p;
629
630 case 0x0d: /* CR */
631 if (p < endptr && *p == 0x0a)
632 {
633 *lenptr = 2;
634 p++;
635 }
636 else *lenptr = 1;
637 return p;
638
639 case 0x85: /* NEL */
640 *lenptr = utf8? 2 : 1;
641 return p;
642
643 case 0x2028: /* LS */
644 case 0x2029: /* PS */
645 *lenptr = 3;
646 return p;
647
648 default:
649 break;
650 }
651 } /* End of loop for ANY case */
652
653 *lenptr = 0; /* Must have hit the end */
654 return endptr;
655 } /* End of overall switch */
656 }
657
658
659
660 /*************************************************
661 * Find start of previous line *
662 *************************************************/
663
664 /* This is called when looking back for before lines to print.
665
666 Arguments:
667 p start of the subsequent line
668 startptr start of available data
669
670 Returns: pointer to the start of the previous line
671 */
672
673 static char *
674 previous_line(char *p, char *startptr)
675 {
676 switch(endlinetype)
677 {
678 default: /* Just in case */
679 case EL_LF:
680 p--;
681 while (p > startptr && p[-1] != '\n') p--;
682 return p;
683
684 case EL_CR:
685 p--;
686 while (p > startptr && p[-1] != '\n') p--;
687 return p;
688
689 case EL_CRLF:
690 for (;;)
691 {
692 p -= 2;
693 while (p > startptr && p[-1] != '\n') p--;
694 if (p <= startptr + 1 || p[-2] == '\r') return p;
695 }
696 return p; /* But control should never get here */
697
698 case EL_ANY:
699 case EL_ANYCRLF:
700 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
701 if (utf8) while ((*p & 0xc0) == 0x80) p--;
702
703 while (p > startptr)
704 {
705 register int c;
706 char *pp = p - 1;
707
708 if (utf8)
709 {
710 int extra = 0;
711 while ((*pp & 0xc0) == 0x80) pp--;
712 c = *((unsigned char *)pp);
713 if (c >= 0xc0)
714 {
715 int gcii, gcss;
716 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
717 gcss = 6*extra;
718 c = (c & utf8_table3[extra]) << gcss;
719 for (gcii = 1; gcii <= extra; gcii++)
720 {
721 gcss -= 6;
722 c |= (pp[gcii] & 0x3f) << gcss;
723 }
724 }
725 }
726 else c = *((unsigned char *)pp);
727
728 if (endlinetype == EL_ANYCRLF) switch (c)
729 {
730 case 0x0a: /* LF */
731 case 0x0d: /* CR */
732 return p;
733
734 default:
735 break;
736 }
737
738 else switch (c)
739 {
740 case 0x0a: /* LF */
741 case 0x0b: /* VT */
742 case 0x0c: /* FF */
743 case 0x0d: /* CR */
744 case 0x85: /* NEL */
745 case 0x2028: /* LS */
746 case 0x2029: /* PS */
747 return p;
748
749 default:
750 break;
751 }
752
753 p = pp; /* Back one character */
754 } /* End of loop for ANY case */
755
756 return startptr; /* Hit start of data */
757 } /* End of overall switch */
758 }
759
760
761
762
763
764 /*************************************************
765 * Print the previous "after" lines *
766 *************************************************/
767
768 /* This is called if we are about to lose said lines because of buffer filling,
769 and at the end of the file. The data in the line is written using fwrite() so
770 that a binary zero does not terminate it.
771
772 Arguments:
773 lastmatchnumber the number of the last matching line, plus one
774 lastmatchrestart where we restarted after the last match
775 endptr end of available data
776 printname filename for printing
777
778 Returns: nothing
779 */
780
781 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
782 char *endptr, char *printname)
783 {
784 if (after_context > 0 && lastmatchnumber > 0)
785 {
786 int count = 0;
787 while (lastmatchrestart < endptr && count++ < after_context)
788 {
789 int ellength;
790 char *pp = lastmatchrestart;
791 if (printname != NULL) fprintf(stdout, "%s-", printname);
792 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
793 pp = end_of_line(pp, endptr, &ellength);
794 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
795 lastmatchrestart = pp;
796 }
797 hyphenpending = TRUE;
798 }
799 }
800
801
802
803 /*************************************************
804 * Grep an individual file *
805 *************************************************/
806
807 /* This is called from grep_or_recurse() below. It uses a buffer that is three
808 times the value of MBUFTHIRD. The matching point is never allowed to stray into
809 the top third of the buffer, thus keeping more of the file available for
810 context printing or for multiline scanning. For large files, the pointer will
811 be in the middle third most of the time, so the bottom third is available for
812 "before" context printing.
813
814 Arguments:
815 in the fopened FILE stream
816 printname the file name if it is to be printed for each match
817 or NULL if the file name is not to be printed
818 it cannot be NULL if filenames[_nomatch]_only is set
819
820 Returns: 0 if there was at least one match
821 1 otherwise (no matches)
822 */
823
824 static int
825 pcregrep(FILE *in, char *printname)
826 {
827 int rc = 1;
828 int linenumber = 1;
829 int lastmatchnumber = 0;
830 int count = 0;
831 int filepos = 0;
832 int offsets[99];
833 char *lastmatchrestart = NULL;
834 char buffer[3*MBUFTHIRD];
835 char *ptr = buffer;
836 char *endptr;
837 size_t bufflength;
838 BOOL endhyphenpending = FALSE;
839
840 /* Do the first read into the start of the buffer and set up the pointer to
841 end of what we have. */
842
843 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
844 endptr = buffer + bufflength;
845
846 /* Loop while the current pointer is not at the end of the file. For large
847 files, endptr will be at the end of the buffer when we are in the middle of the
848 file, but ptr will never get there, because as soon as it gets over 2/3 of the
849 way, the buffer is shifted left and re-filled. */
850
851 while (ptr < endptr)
852 {
853 int i, endlinelength;
854 int mrc = 0;
855 BOOL match = FALSE;
856 char *matchptr = ptr;
857 char *t = ptr;
858 size_t length, linelength;
859
860 /* At this point, ptr is at the start of a line. We need to find the length
861 of the subject string to pass to pcre_exec(). In multiline mode, it is the
862 length remainder of the data in the buffer. Otherwise, it is the length of
863 the next line. After matching, we always advance by the length of the next
864 line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
865 that any match is constrained to be in the first line. */
866
867 t = end_of_line(t, endptr, &endlinelength);
868 linelength = t - ptr - endlinelength;
869 length = multiline? (size_t)(endptr - ptr) : linelength;
870
871 /* Extra processing for Jeffrey Friedl's debugging. */
872
873 #ifdef JFRIEDL_DEBUG
874 if (jfriedl_XT || jfriedl_XR)
875 {
876 #include <sys/time.h>
877 #include <time.h>
878 struct timeval start_time, end_time;
879 struct timezone dummy;
880
881 if (jfriedl_XT)
882 {
883 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
884 const char *orig = ptr;
885 ptr = malloc(newlen + 1);
886 if (!ptr) {
887 printf("out of memory");
888 exit(2);
889 }
890 endptr = ptr;
891 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
892 for (i = 0; i < jfriedl_XT; i++) {
893 strncpy(endptr, orig, length);
894 endptr += length;
895 }
896 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
897 length = newlen;
898 }
899
900 if (gettimeofday(&start_time, &dummy) != 0)
901 perror("bad gettimeofday");
902
903
904 for (i = 0; i < jfriedl_XR; i++)
905 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
906
907 if (gettimeofday(&end_time, &dummy) != 0)
908 perror("bad gettimeofday");
909
910 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
911 -
912 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
913
914 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
915 return 0;
916 }
917 #endif
918
919 /* We come back here after a match when the -o option (only_matching) is set,
920 in order to find any further matches in the same line. */
921
922 ONLY_MATCHING_RESTART:
923
924 /* Run through all the patterns until one matches. Note that we don't include
925 the final newline in the subject string. */
926
927 for (i = 0; i < pattern_count; i++)
928 {
929 mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
930 offsets, 99);
931 if (mrc >= 0) { match = TRUE; break; }
932 if (mrc != PCRE_ERROR_NOMATCH)
933 {
934 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
935 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
936 fprintf(stderr, "this line:\n");
937 fwrite(matchptr, 1, linelength, stderr); /* In case binary zero included */
938 fprintf(stderr, "\n");
939 if (error_count == 0 &&
940 (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
941 {
942 fprintf(stderr, "pcregrep: error %d means that a resource limit "
943 "was exceeded\n", mrc);
944 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
945 }
946 if (error_count++ > 20)
947 {
948 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
949 exit(2);
950 }
951 match = invert; /* No more matching; don't show the line again */
952 break;
953 }
954 }
955
956 /* If it's a match or a not-match (as required), do what's wanted. */
957
958 if (match != invert)
959 {
960 BOOL hyphenprinted = FALSE;
961
962 /* We've failed if we want a file that doesn't have any matches. */
963
964 if (filenames == FN_NOMATCH_ONLY) return 1;
965
966 /* Just count if just counting is wanted. */
967
968 if (count_only) count++;
969
970 /* If all we want is a file name, there is no need to scan any more lines
971 in the file. */
972
973 else if (filenames == FN_ONLY)
974 {
975 fprintf(stdout, "%s\n", printname);
976 return 0;
977 }
978
979 /* Likewise, if all we want is a yes/no answer. */
980
981 else if (quiet) return 0;
982
983 /* The --only-matching option prints just the substring that matched, and
984 the --file-offsets and --line-offsets options output offsets for the
985 matching substring (they both force --only-matching). None of these options
986 prints any context. Afterwards, adjust the start and length, and then jump
987 back to look for further matches in the same line. If we are in invert
988 mode, however, nothing is printed - this could be still useful because the
989 return code is set. */
990
991 else if (only_matching)
992 {
993 if (!invert)
994 {
995 if (printname != NULL) fprintf(stdout, "%s:", printname);
996 if (number) fprintf(stdout, "%d:", linenumber);
997 if (line_offsets)
998 fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,
999 offsets[1] - offsets[0]);
1000 else if (file_offsets)
1001 fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,
1002 offsets[1] - offsets[0]);
1003 else
1004 fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1005 fprintf(stdout, "\n");
1006 matchptr += offsets[1];
1007 length -= offsets[1];
1008 match = FALSE;
1009 goto ONLY_MATCHING_RESTART;
1010 }
1011 }
1012
1013 /* This is the default case when none of the above options is set. We print
1014 the matching lines(s), possibly preceded and/or followed by other lines of
1015 context. */
1016
1017 else
1018 {
1019 /* See if there is a requirement to print some "after" lines from a
1020 previous match. We never print any overlaps. */
1021
1022 if (after_context > 0 && lastmatchnumber > 0)
1023 {
1024 int ellength;
1025 int linecount = 0;
1026 char *p = lastmatchrestart;
1027
1028 while (p < ptr && linecount < after_context)
1029 {
1030 p = end_of_line(p, ptr, &ellength);
1031 linecount++;
1032 }
1033
1034 /* It is important to advance lastmatchrestart during this printing so
1035 that it interacts correctly with any "before" printing below. Print
1036 each line's data using fwrite() in case there are binary zeroes. */
1037
1038 while (lastmatchrestart < p)
1039 {
1040 char *pp = lastmatchrestart;
1041 if (printname != NULL) fprintf(stdout, "%s-", printname);
1042 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1043 pp = end_of_line(pp, endptr, &ellength);
1044 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1045 lastmatchrestart = pp;
1046 }
1047 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1048 }
1049
1050 /* If there were non-contiguous lines printed above, insert hyphens. */
1051
1052 if (hyphenpending)
1053 {
1054 fprintf(stdout, "--\n");
1055 hyphenpending = FALSE;
1056 hyphenprinted = TRUE;
1057 }
1058
1059 /* See if there is a requirement to print some "before" lines for this
1060 match. Again, don't print overlaps. */
1061
1062 if (before_context > 0)
1063 {
1064 int linecount = 0;
1065 char *p = ptr;
1066
1067 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1068 linecount < before_context)
1069 {
1070 linecount++;
1071 p = previous_line(p, buffer);
1072 }
1073
1074 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1075 fprintf(stdout, "--\n");
1076
1077 while (p < ptr)
1078 {
1079 int ellength;
1080 char *pp = p;
1081 if (printname != NULL) fprintf(stdout, "%s-", printname);
1082 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1083 pp = end_of_line(pp, endptr, &ellength);
1084 fwrite(p, 1, pp - p, stdout);
1085 p = pp;
1086 }
1087 }
1088
1089 /* Now print the matching line(s); ensure we set hyphenpending at the end
1090 of the file if any context lines are being output. */
1091
1092 if (after_context > 0 || before_context > 0)
1093 endhyphenpending = TRUE;
1094
1095 if (printname != NULL) fprintf(stdout, "%s:", printname);
1096 if (number) fprintf(stdout, "%d:", linenumber);
1097
1098 /* In multiline mode, we want to print to the end of the line in which
1099 the end of the matched string is found, so we adjust linelength and the
1100 line number appropriately, but only when there actually was a match
1101 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1102 the match will always be before the first newline sequence. */
1103
1104 if (multiline)
1105 {
1106 int ellength;
1107 char *endmatch = ptr;
1108 if (!invert)
1109 {
1110 endmatch += offsets[1];
1111 t = ptr;
1112 while (t < endmatch)
1113 {
1114 t = end_of_line(t, endptr, &ellength);
1115 if (t <= endmatch) linenumber++; else break;
1116 }
1117 }
1118 endmatch = end_of_line(endmatch, endptr, &ellength);
1119 linelength = endmatch - ptr - ellength;
1120 }
1121
1122 /*** NOTE: Use only fwrite() to output the data line, so that binary
1123 zeroes are treated as just another data character. */
1124
1125 /* This extra option, for Jeffrey Friedl's debugging requirements,
1126 replaces the matched string, or a specific captured string if it exists,
1127 with X. When this happens, colouring is ignored. */
1128
1129 #ifdef JFRIEDL_DEBUG
1130 if (S_arg >= 0 && S_arg < mrc)
1131 {
1132 int first = S_arg * 2;
1133 int last = first + 1;
1134 fwrite(ptr, 1, offsets[first], stdout);
1135 fprintf(stdout, "X");
1136 fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1137 }
1138 else
1139 #endif
1140
1141 /* We have to split the line(s) up if colouring. */
1142
1143 if (do_colour)
1144 {
1145 fwrite(ptr, 1, offsets[0], stdout);
1146 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1147 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1148 fprintf(stdout, "%c[00m", 0x1b);
1149 fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1150 stdout);
1151 }
1152 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1153 }
1154
1155 /* End of doing what has to be done for a match */
1156
1157 rc = 0; /* Had some success */
1158
1159 /* Remember where the last match happened for after_context. We remember
1160 where we are about to restart, and that line's number. */
1161
1162 lastmatchrestart = ptr + linelength + endlinelength;
1163 lastmatchnumber = linenumber + 1;
1164 }
1165
1166 /* For a match in multiline inverted mode (which of course did not cause
1167 anything to be printed), we have to move on to the end of the match before
1168 proceeding. */
1169
1170 if (multiline && invert && match)
1171 {
1172 int ellength;
1173 char *endmatch = ptr + offsets[1];
1174 t = ptr;
1175 while (t < endmatch)
1176 {
1177 t = end_of_line(t, endptr, &ellength);
1178 if (t <= endmatch) linenumber++; else break;
1179 }
1180 endmatch = end_of_line(endmatch, endptr, &ellength);
1181 linelength = endmatch - ptr - ellength;
1182 }
1183
1184 /* Advance to after the newline and increment the line number. The file
1185 offset to the current line is maintained in filepos. */
1186
1187 ptr += linelength + endlinelength;
1188 filepos += linelength + endlinelength;
1189 linenumber++;
1190
1191 /* If we haven't yet reached the end of the file (the buffer is full), and
1192 the current point is in the top 1/3 of the buffer, slide the buffer down by
1193 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1194 about to be lost, print them. */
1195
1196 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1197 {
1198 if (after_context > 0 &&
1199 lastmatchnumber > 0 &&
1200 lastmatchrestart < buffer + MBUFTHIRD)
1201 {
1202 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1203 lastmatchnumber = 0;
1204 }
1205
1206 /* Now do the shuffle */
1207
1208 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1209 ptr -= MBUFTHIRD;
1210 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1211 endptr = buffer + bufflength;
1212
1213 /* Adjust any last match point */
1214
1215 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1216 }
1217 } /* Loop through the whole file */
1218
1219 /* End of file; print final "after" lines if wanted; do_after_lines sets
1220 hyphenpending if it prints something. */
1221
1222 if (!only_matching && !count_only)
1223 {
1224 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1225 hyphenpending |= endhyphenpending;
1226 }
1227
1228 /* Print the file name if we are looking for those without matches and there
1229 were none. If we found a match, we won't have got this far. */
1230
1231 if (filenames == FN_NOMATCH_ONLY)
1232 {
1233 fprintf(stdout, "%s\n", printname);
1234 return 0;
1235 }
1236
1237 /* Print the match count if wanted */
1238
1239 if (count_only)
1240 {
1241 if (printname != NULL) fprintf(stdout, "%s:", printname);
1242 fprintf(stdout, "%d\n", count);
1243 }
1244
1245 return rc;
1246 }
1247
1248
1249
1250 /*************************************************
1251 * Grep a file or recurse into a directory *
1252 *************************************************/
1253
1254 /* Given a path name, if it's a directory, scan all the files if we are
1255 recursing; if it's a file, grep it.
1256
1257 Arguments:
1258 pathname the path to investigate
1259 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1260 only_one_at_top TRUE if the path is the only one at toplevel
1261
1262 Returns: 0 if there was at least one match
1263 1 if there were no matches
1264 2 there was some kind of error
1265
1266 However, file opening failures are suppressed if "silent" is set.
1267 */
1268
1269 static int
1270 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1271 {
1272 int rc = 1;
1273 int sep;
1274 FILE *in;
1275
1276 /* If the file name is "-" we scan stdin */
1277
1278 if (strcmp(pathname, "-") == 0)
1279 {
1280 return pcregrep(stdin,
1281 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1282 stdin_name : NULL);
1283 }
1284
1285
1286 /* If the file is a directory, skip if skipping or if we are recursing, scan
1287 each file within it, subject to any include or exclude patterns that were set.
1288 The scanning code is localized so it can be made system-specific. */
1289
1290 if ((sep = isdirectory(pathname)) != 0)
1291 {
1292 if (dee_action == dee_SKIP) return 1;
1293 if (dee_action == dee_RECURSE)
1294 {
1295 char buffer[1024];
1296 char *nextfile;
1297 directory_type *dir = opendirectory(pathname);
1298
1299 if (dir == NULL)
1300 {
1301 if (!silent)
1302 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1303 strerror(errno));
1304 return 2;
1305 }
1306
1307 while ((nextfile = readdirectory(dir)) != NULL)
1308 {
1309 int frc, blen;
1310 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1311 blen = strlen(buffer);
1312
1313 if (exclude_compiled != NULL &&
1314 pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1315 continue;
1316
1317 if (include_compiled != NULL &&
1318 pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1319 continue;
1320
1321 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1322 if (frc > 1) rc = frc;
1323 else if (frc == 0 && rc == 1) rc = 0;
1324 }
1325
1326 closedirectory(dir);
1327 return rc;
1328 }
1329 }
1330
1331 /* If the file is not a directory and not a regular file, skip it if that's
1332 been requested. */
1333
1334 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1335
1336 /* Control reaches here if we have a regular file, or if we have a directory
1337 and recursion or skipping was not requested, or if we have anything else and
1338 skipping was not requested. The scan proceeds. If this is the first and only
1339 argument at top level, we don't show the file name, unless we are only showing
1340 the file name, or the filename was forced (-H). */
1341
1342 in = fopen(pathname, "r");
1343 if (in == NULL)
1344 {
1345 if (!silent)
1346 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1347 strerror(errno));
1348 return 2;
1349 }
1350
1351 rc = pcregrep(in, (filenames > FN_DEFAULT ||
1352 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1353
1354 fclose(in);
1355 return rc;
1356 }
1357
1358
1359
1360
1361 /*************************************************
1362 * Usage function *
1363 *************************************************/
1364
1365 static int
1366 usage(int rc)
1367 {
1368 option_item *op;
1369 fprintf(stderr, "Usage: pcregrep [-");
1370 for (op = optionlist; op->one_char != 0; op++)
1371 {
1372 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1373 }
1374 fprintf(stderr, "] [long options] [pattern] [files]\n");
1375 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1376 "options.\n");
1377 return rc;
1378 }
1379
1380
1381
1382
1383 /*************************************************
1384 * Help function *
1385 *************************************************/
1386
1387 static void
1388 help(void)
1389 {
1390 option_item *op;
1391
1392 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1393 printf("Search for PATTERN in each FILE or standard input.\n");
1394 printf("PATTERN must be present if neither -e nor -f is used.\n");
1395 printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1396 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1397
1398 printf("Options:\n");
1399
1400 for (op = optionlist; op->one_char != 0; op++)
1401 {
1402 int n;
1403 char s[4];
1404 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1405 printf(" %s --%s%n", s, op->long_name, &n);
1406 n = 30 - n;
1407 if (n < 1) n = 1;
1408 printf("%.*s%s\n", n, " ", op->help_text);
1409 }
1410
1411 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1412 printf("trailing white space is removed and blank lines are ignored.\n");
1413 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1414
1415 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1416 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1417 }
1418
1419
1420
1421
1422 /*************************************************
1423 * Handle a single-letter, no data option *
1424 *************************************************/
1425
1426 static int
1427 handle_option(int letter, int options)
1428 {
1429 switch(letter)
1430 {
1431 case N_FOFFSETS: file_offsets = TRUE; break;
1432 case N_HELP: help(); exit(0);
1433 case N_LOFFSETS: line_offsets = number = TRUE; break;
1434 case 'c': count_only = TRUE; break;
1435 case 'F': process_options |= PO_FIXED_STRINGS; break;
1436 case 'H': filenames = FN_FORCE; break;
1437 case 'h': filenames = FN_NONE; break;
1438 case 'i': options |= PCRE_CASELESS; break;
1439 case 'l': filenames = FN_ONLY; break;
1440 case 'L': filenames = FN_NOMATCH_ONLY; break;
1441 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1442 case 'n': number = TRUE; break;
1443 case 'o': only_matching = TRUE; break;
1444 case 'q': quiet = TRUE; break;
1445 case 'r': dee_action = dee_RECURSE; break;
1446 case 's': silent = TRUE; break;
1447 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1448 case 'v': invert = TRUE; break;
1449 case 'w': process_options |= PO_WORD_MATCH; break;
1450 case 'x': process_options |= PO_LINE_MATCH; break;
1451
1452 case 'V':
1453 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1454 exit(0);
1455 break;
1456
1457 default:
1458 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1459 exit(usage(2));
1460 }
1461
1462 return options;
1463 }
1464
1465
1466
1467
1468 /*************************************************
1469 * Construct printed ordinal *
1470 *************************************************/
1471
1472 /* This turns a number into "1st", "3rd", etc. */
1473
1474 static char *
1475 ordin(int n)
1476 {
1477 static char buffer[8];
1478 char *p = buffer;
1479 sprintf(p, "%d", n);
1480 while (*p != 0) p++;
1481 switch (n%10)
1482 {
1483 case 1: strcpy(p, "st"); break;
1484 case 2: strcpy(p, "nd"); break;
1485 case 3: strcpy(p, "rd"); break;
1486 default: strcpy(p, "th"); break;
1487 }
1488 return buffer;
1489 }
1490
1491
1492
1493 /*************************************************
1494 * Compile a single pattern *
1495 *************************************************/
1496
1497 /* When the -F option has been used, this is called for each substring.
1498 Otherwise it's called for each supplied pattern.
1499
1500 Arguments:
1501 pattern the pattern string
1502 options the PCRE options
1503 filename the file name, or NULL for a command-line pattern
1504 count 0 if this is the only command line pattern, or
1505 number of the command line pattern, or
1506 linenumber for a pattern from a file
1507
1508 Returns: TRUE on success, FALSE after an error
1509 */
1510
1511 static BOOL
1512 compile_single_pattern(char *pattern, int options, char *filename, int count)
1513 {
1514 char buffer[MBUFTHIRD + 16];
1515 const char *error;
1516 int errptr;
1517
1518 if (pattern_count >= MAX_PATTERN_COUNT)
1519 {
1520 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1521 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1522 return FALSE;
1523 }
1524
1525 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1526 suffix[process_options]);
1527 pattern_list[pattern_count] =
1528 pcre_compile(buffer, options, &error, &errptr, pcretables);
1529 if (pattern_list[pattern_count] != NULL)
1530 {
1531 pattern_count++;
1532 return TRUE;
1533 }
1534
1535 /* Handle compile errors */
1536
1537 errptr -= (int)strlen(prefix[process_options]);
1538 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1539
1540 if (filename == NULL)
1541 {
1542 if (count == 0)
1543 fprintf(stderr, "pcregrep: Error in command-line regex "
1544 "at offset %d: %s\n", errptr, error);
1545 else
1546 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1547 "at offset %d: %s\n", ordin(count), errptr, error);
1548 }
1549 else
1550 {
1551 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1552 "at offset %d: %s\n", count, filename, errptr, error);
1553 }
1554
1555 return FALSE;
1556 }
1557
1558
1559
1560 /*************************************************
1561 * Compile one supplied pattern *
1562 *************************************************/
1563
1564 /* When the -F option has been used, each string may be a list of strings,
1565 separated by line breaks. They will be matched literally.
1566
1567 Arguments:
1568 pattern the pattern string
1569 options the PCRE options
1570 filename the file name, or NULL for a command-line pattern
1571 count 0 if this is the only command line pattern, or
1572 number of the command line pattern, or
1573 linenumber for a pattern from a file
1574
1575 Returns: TRUE on success, FALSE after an error
1576 */
1577
1578 static BOOL
1579 compile_pattern(char *pattern, int options, char *filename, int count)
1580 {
1581 if ((process_options & PO_FIXED_STRINGS) != 0)
1582 {
1583 char *eop = pattern + strlen(pattern);
1584 char buffer[MBUFTHIRD];
1585 for(;;)
1586 {
1587 int ellength;
1588 char *p = end_of_line(pattern, eop, &ellength);
1589 if (ellength == 0)
1590 return compile_single_pattern(pattern, options, filename, count);
1591 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1592 pattern = p;
1593 if (!compile_single_pattern(buffer, options, filename, count))
1594 return FALSE;
1595 }
1596 }
1597 else return compile_single_pattern(pattern, options, filename, count);
1598 }
1599
1600
1601
1602 /*************************************************
1603 * Main program *
1604 *************************************************/
1605
1606 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1607
1608 int
1609 main(int argc, char **argv)
1610 {
1611 int i, j;
1612 int rc = 1;
1613 int pcre_options = 0;
1614 int cmd_pattern_count = 0;
1615 int hint_count = 0;
1616 int errptr;
1617 BOOL only_one_at_top;
1618 char *patterns[MAX_PATTERN_COUNT];
1619 const char *locale_from = "--locale";
1620 const char *error;
1621
1622 /* Set the default line ending value from the default in the PCRE library;
1623 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1624 */
1625
1626 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1627 switch(i)
1628 {
1629 default: newline = (char *)"lf"; break;
1630 case '\r': newline = (char *)"cr"; break;
1631 case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1632 case -1: newline = (char *)"any"; break;
1633 case -2: newline = (char *)"anycrlf"; break;
1634 }
1635
1636 /* Process the options */
1637
1638 for (i = 1; i < argc; i++)
1639 {
1640 option_item *op = NULL;
1641 char *option_data = (char *)""; /* default to keep compiler happy */
1642 BOOL longop;
1643 BOOL longopwasequals = FALSE;
1644
1645 if (argv[i][0] != '-') break;
1646
1647 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1648 but only if we have previously had -e or -f to define the patterns. */
1649
1650 if (argv[i][1] == 0)
1651 {
1652 if (pattern_filename != NULL || pattern_count > 0) break;
1653 else exit(usage(2));
1654 }
1655
1656 /* Handle a long name option, or -- to terminate the options */
1657
1658 if (argv[i][1] == '-')
1659 {
1660 char *arg = argv[i] + 2;
1661 char *argequals = strchr(arg, '=');
1662
1663 if (*arg == 0) /* -- terminates options */
1664 {
1665 i++;
1666 break; /* out of the options-handling loop */
1667 }
1668
1669 longop = TRUE;
1670
1671 /* Some long options have data that follows after =, for example file=name.
1672 Some options have variations in the long name spelling: specifically, we
1673 allow "regexp" because GNU grep allows it, though I personally go along
1674 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1675 These options are entered in the table as "regex(p)". No option is in both
1676 these categories, fortunately. */
1677
1678 for (op = optionlist; op->one_char != 0; op++)
1679 {
1680 char *opbra = strchr(op->long_name, '(');
1681 char *equals = strchr(op->long_name, '=');
1682 if (opbra == NULL) /* Not a (p) case */
1683 {
1684 if (equals == NULL) /* Not thing=data case */
1685 {
1686 if (strcmp(arg, op->long_name) == 0) break;
1687 }
1688 else /* Special case xxx=data */
1689 {
1690 int oplen = equals - op->long_name;
1691 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1692 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1693 {
1694 option_data = arg + arglen;
1695 if (*option_data == '=')
1696 {
1697 option_data++;
1698 longopwasequals = TRUE;
1699 }
1700 break;
1701 }
1702 }
1703 }
1704 else /* Special case xxxx(p) */
1705 {
1706 char buff1[24];
1707 char buff2[24];
1708 int baselen = opbra - op->long_name;
1709 sprintf(buff1, "%.*s", baselen, op->long_name);
1710 sprintf(buff2, "%s%.*s", buff1,
1711 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1712 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1713 break;
1714 }
1715 }
1716
1717 if (op->one_char == 0)
1718 {
1719 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1720 exit(usage(2));
1721 }
1722 }
1723
1724
1725 /* Jeffrey Friedl's debugging harness uses these additional options which
1726 are not in the right form for putting in the option table because they use
1727 only one hyphen, yet are more than one character long. By putting them
1728 separately here, they will not get displayed as part of the help() output,
1729 but I don't think Jeffrey will care about that. */
1730
1731 #ifdef JFRIEDL_DEBUG
1732 else if (strcmp(argv[i], "-pre") == 0) {
1733 jfriedl_prefix = argv[++i];
1734 continue;
1735 } else if (strcmp(argv[i], "-post") == 0) {
1736 jfriedl_postfix = argv[++i];
1737 continue;
1738 } else if (strcmp(argv[i], "-XT") == 0) {
1739 sscanf(argv[++i], "%d", &jfriedl_XT);
1740 continue;
1741 } else if (strcmp(argv[i], "-XR") == 0) {
1742 sscanf(argv[++i], "%d", &jfriedl_XR);
1743 continue;
1744 }
1745 #endif
1746
1747
1748 /* One-char options; many that have no data may be in a single argument; we
1749 continue till we hit the last one or one that needs data. */
1750
1751 else
1752 {
1753 char *s = argv[i] + 1;
1754 longop = FALSE;
1755 while (*s != 0)
1756 {
1757 for (op = optionlist; op->one_char != 0; op++)
1758 { if (*s == op->one_char) break; }
1759 if (op->one_char == 0)
1760 {
1761 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1762 *s, argv[i]);
1763 exit(usage(2));
1764 }
1765 if (op->type != OP_NODATA || s[1] == 0)
1766 {
1767 option_data = s+1;
1768 break;
1769 }
1770 pcre_options = handle_option(*s++, pcre_options);
1771 }
1772 }
1773
1774 /* At this point we should have op pointing to a matched option. If the type
1775 is NO_DATA, it means that there is no data, and the option might set
1776 something in the PCRE options. */
1777
1778 if (op->type == OP_NODATA)
1779 {
1780 pcre_options = handle_option(op->one_char, pcre_options);
1781 continue;
1782 }
1783
1784 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1785 either has a value or defaults to something. It cannot have data in a
1786 separate item. At the moment, the only such options are "colo(u)r" and
1787 Jeffrey Friedl's special -S debugging option. */
1788
1789 if (*option_data == 0 &&
1790 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1791 {
1792 switch (op->one_char)
1793 {
1794 case N_COLOUR:
1795 colour_option = (char *)"auto";
1796 break;
1797 #ifdef JFRIEDL_DEBUG
1798 case 'S':
1799 S_arg = 0;
1800 break;
1801 #endif
1802 }
1803 continue;
1804 }
1805
1806 /* Otherwise, find the data string for the option. */
1807
1808 if (*option_data == 0)
1809 {
1810 if (i >= argc - 1 || longopwasequals)
1811 {
1812 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1813 exit(usage(2));
1814 }
1815 option_data = argv[++i];
1816 }
1817
1818 /* If the option type is OP_PATLIST, it's the -e option, which can be called
1819 multiple times to create a list of patterns. */
1820
1821 if (op->type == OP_PATLIST)
1822 {
1823 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1824 {
1825 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1826 MAX_PATTERN_COUNT);
1827 return 2;
1828 }
1829 patterns[cmd_pattern_count++] = option_data;
1830 }
1831
1832 /* Otherwise, deal with single string or numeric data values. */
1833
1834 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1835 {
1836 *((char **)op->dataptr) = option_data;
1837 }
1838 else
1839 {
1840 char *endptr;
1841 int n = strtoul(option_data, &endptr, 10);
1842 if (*endptr != 0)
1843 {
1844 if (longop)
1845 {
1846 char *equals = strchr(op->long_name, '=');
1847 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1848 equals - op->long_name;
1849 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1850 option_data, nlen, op->long_name);
1851 }
1852 else
1853 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1854 option_data, op->one_char);
1855 exit(usage(2));
1856 }
1857 *((int *)op->dataptr) = n;
1858 }
1859 }
1860
1861 /* Options have been decoded. If -C was used, its value is used as a default
1862 for -A and -B. */
1863
1864 if (both_context > 0)
1865 {
1866 if (after_context == 0) after_context = both_context;
1867 if (before_context == 0) before_context = both_context;
1868 }
1869
1870 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
1871 However, the latter two set the only_matching flag. */
1872
1873 if ((only_matching && (file_offsets || line_offsets)) ||
1874 (file_offsets && line_offsets))
1875 {
1876 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
1877 "and/or --line-offsets\n");
1878 exit(usage(2));
1879 }
1880
1881 if (file_offsets || line_offsets) only_matching = TRUE;
1882
1883 /* If a locale has not been provided as an option, see if the LC_CTYPE or
1884 LC_ALL environment variable is set, and if so, use it. */
1885
1886 if (locale == NULL)
1887 {
1888 locale = getenv("LC_ALL");
1889 locale_from = "LCC_ALL";
1890 }
1891
1892 if (locale == NULL)
1893 {
1894 locale = getenv("LC_CTYPE");
1895 locale_from = "LC_CTYPE";
1896 }
1897
1898 /* If a locale has been provided, set it, and generate the tables the PCRE
1899 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1900
1901 if (locale != NULL)
1902 {
1903 if (setlocale(LC_CTYPE, locale) == NULL)
1904 {
1905 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1906 locale, locale_from);
1907 return 2;
1908 }
1909 pcretables = pcre_maketables();
1910 }
1911
1912 /* Sort out colouring */
1913
1914 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1915 {
1916 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1917 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1918 else
1919 {
1920 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1921 colour_option);
1922 return 2;
1923 }
1924 if (do_colour)
1925 {
1926 char *cs = getenv("PCREGREP_COLOUR");
1927 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1928 if (cs != NULL) colour_string = cs;
1929 }
1930 }
1931
1932 /* Interpret the newline type; the default settings are Unix-like. */
1933
1934 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1935 {
1936 pcre_options |= PCRE_NEWLINE_CR;
1937 endlinetype = EL_CR;
1938 }
1939 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1940 {
1941 pcre_options |= PCRE_NEWLINE_LF;
1942 endlinetype = EL_LF;
1943 }
1944 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1945 {
1946 pcre_options |= PCRE_NEWLINE_CRLF;
1947 endlinetype = EL_CRLF;
1948 }
1949 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1950 {
1951 pcre_options |= PCRE_NEWLINE_ANY;
1952 endlinetype = EL_ANY;
1953 }
1954 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1955 {
1956 pcre_options |= PCRE_NEWLINE_ANYCRLF;
1957 endlinetype = EL_ANYCRLF;
1958 }
1959 else
1960 {
1961 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1962 return 2;
1963 }
1964
1965 /* Interpret the text values for -d and -D */
1966
1967 if (dee_option != NULL)
1968 {
1969 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1970 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1971 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1972 else
1973 {
1974 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1975 return 2;
1976 }
1977 }
1978
1979 if (DEE_option != NULL)
1980 {
1981 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1982 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1983 else
1984 {
1985 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1986 return 2;
1987 }
1988 }
1989
1990 /* Check the values for Jeffrey Friedl's debugging options. */
1991
1992 #ifdef JFRIEDL_DEBUG
1993 if (S_arg > 9)
1994 {
1995 fprintf(stderr, "pcregrep: bad value for -S option\n");
1996 return 2;
1997 }
1998 if (jfriedl_XT != 0 || jfriedl_XR != 0)
1999 {
2000 if (jfriedl_XT == 0) jfriedl_XT = 1;
2001 if (jfriedl_XR == 0) jfriedl_XR = 1;
2002 }
2003 #endif
2004
2005 /* Get memory to store the pattern and hints lists. */
2006
2007 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2008 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2009
2010 if (pattern_list == NULL || hints_list == NULL)
2011 {
2012 fprintf(stderr, "pcregrep: malloc failed\n");
2013 goto EXIT2;
2014 }
2015
2016 /* If no patterns were provided by -e, and there is no file provided by -f,
2017 the first argument is the one and only pattern, and it must exist. */
2018
2019 if (cmd_pattern_count == 0 && pattern_filename == NULL)
2020 {
2021 if (i >= argc) return usage(2);
2022 patterns[cmd_pattern_count++] = argv[i++];
2023 }
2024
2025 /* Compile the patterns that were provided on the command line, either by
2026 multiple uses of -e or as a single unkeyed pattern. */
2027
2028 for (j = 0; j < cmd_pattern_count; j++)
2029 {
2030 if (!compile_pattern(patterns[j], pcre_options, NULL,
2031 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2032 goto EXIT2;
2033 }
2034
2035 /* Compile the regular expressions that are provided in a file. */
2036
2037 if (pattern_filename != NULL)
2038 {
2039 int linenumber = 0;
2040 FILE *f;
2041 char *filename;
2042 char buffer[MBUFTHIRD];
2043
2044 if (strcmp(pattern_filename, "-") == 0)
2045 {
2046 f = stdin;
2047 filename = stdin_name;
2048 }
2049 else
2050 {
2051 f = fopen(pattern_filename, "r");
2052 if (f == NULL)
2053 {
2054 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2055 strerror(errno));
2056 goto EXIT2;
2057 }
2058 filename = pattern_filename;
2059 }
2060
2061 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2062 {
2063 char *s = buffer + (int)strlen(buffer);
2064 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2065 *s = 0;
2066 linenumber++;
2067 if (buffer[0] == 0) continue; /* Skip blank lines */
2068 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2069 goto EXIT2;
2070 }
2071
2072 if (f != stdin) fclose(f);
2073 }
2074
2075 /* Study the regular expressions, as we will be running them many times */
2076
2077 for (j = 0; j < pattern_count; j++)
2078 {
2079 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2080 if (error != NULL)
2081 {
2082 char s[16];
2083 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2084 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2085 goto EXIT2;
2086 }
2087 hint_count++;
2088 }
2089
2090 /* If there are include or exclude patterns, compile them. */
2091
2092 if (exclude_pattern != NULL)
2093 {
2094 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2095 pcretables);
2096 if (exclude_compiled == NULL)
2097 {
2098 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2099 errptr, error);
2100 goto EXIT2;
2101 }
2102 }
2103
2104 if (include_pattern != NULL)
2105 {
2106 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2107 pcretables);
2108 if (include_compiled == NULL)
2109 {
2110 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2111 errptr, error);
2112 goto EXIT2;
2113 }
2114 }
2115
2116 /* If there are no further arguments, do the business on stdin and exit. */
2117
2118 if (i >= argc)
2119 {
2120 rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2121 goto EXIT;
2122 }
2123
2124 /* Otherwise, work through the remaining arguments as files or directories.
2125 Pass in the fact that there is only one argument at top level - this suppresses
2126 the file name if the argument is not a directory and filenames are not
2127 otherwise forced. */
2128
2129 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2130
2131 for (; i < argc; i++)
2132 {
2133 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2134 only_one_at_top);
2135 if (frc > 1) rc = frc;
2136 else if (frc == 0 && rc == 1) rc = 0;
2137 }
2138
2139 EXIT:
2140 if (pattern_list != NULL)
2141 {
2142 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2143 free(pattern_list);
2144 }
2145 if (hints_list != NULL)
2146 {
2147 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2148 free(hints_list);
2149 }
2150 return rc;
2151
2152 EXIT2:
2153 rc = 2;
2154 goto EXIT;
2155 }
2156
2157 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5