/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 944 - (show annotations)
Tue Feb 28 16:49:21 2012 UTC (3 years, 5 months ago) by ph10
File MIME type: text/plain
File size: 81152 byte(s)
Error occurred while calculating annotation data.
Added --file-list to pcregrep.
1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2012 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65
66 #include "pcre.h"
67
68 #define FALSE 0
69 #define TRUE 1
70
71 typedef int BOOL;
72
73 #define MAX_PATTERN_COUNT 100
74 #define OFFSET_SIZE 99
75
76 #if BUFSIZ > 8192
77 #define PATBUFSIZE BUFSIZ
78 #else
79 #define PATBUFSIZE 8192
80 #endif
81
82 /* Values for the "filenames" variable, which specifies options for file name
83 output. The order is important; it is assumed that a file name is wanted for
84 all values greater than FN_DEFAULT. */
85
86 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87
88 /* File reading styles */
89
90 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91
92 /* Actions for the -d and -D options */
93
94 enum { dee_READ, dee_SKIP, dee_RECURSE };
95 enum { DEE_READ, DEE_SKIP };
96
97 /* Actions for special processing options (flag bits) */
98
99 #define PO_WORD_MATCH 0x0001
100 #define PO_LINE_MATCH 0x0002
101 #define PO_FIXED_STRINGS 0x0004
102
103 /* Line ending types */
104
105 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106
107 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108 environments), a warning is issued if the value of fwrite() is ignored.
109 Unfortunately, casting to (void) does not suppress the warning. To get round
110 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 apply to fprintf(). */
112
113 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114
115
116
117 /*************************************************
118 * Global variables *
119 *************************************************/
120
121 /* Jeffrey Friedl has some debugging requirements that are not part of the
122 regular code. */
123
124 #ifdef JFRIEDL_DEBUG
125 static int S_arg = -1;
126 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128 static const char *jfriedl_prefix = "";
129 static const char *jfriedl_postfix = "";
130 #endif
131
132 static int endlinetype;
133
134 static char *colour_string = (char *)"1;31";
135 static char *colour_option = NULL;
136 static char *dee_option = NULL;
137 static char *DEE_option = NULL;
138 static char *main_buffer = NULL;
139 static char *newline = NULL;
140 static char *pattern_filename = NULL;
141 static char *stdin_name = (char *)"(standard input)";
142 static char *locale = NULL;
143
144 static const unsigned char *pcretables = NULL;
145
146 static int pattern_count = 0;
147 static pcre **pattern_list = NULL;
148 static pcre_extra **hints_list = NULL;
149
150 static char *file_list = NULL;
151 static char *include_pattern = NULL;
152 static char *exclude_pattern = NULL;
153 static char *include_dir_pattern = NULL;
154 static char *exclude_dir_pattern = NULL;
155
156 static pcre *include_compiled = NULL;
157 static pcre *exclude_compiled = NULL;
158 static pcre *include_dir_compiled = NULL;
159 static pcre *exclude_dir_compiled = NULL;
160
161 static int after_context = 0;
162 static int before_context = 0;
163 static int both_context = 0;
164 static int bufthird = PCREGREP_BUFSIZE;
165 static int bufsize = 3*PCREGREP_BUFSIZE;
166 static int dee_action = dee_READ;
167 static int DEE_action = DEE_READ;
168 static int error_count = 0;
169 static int filenames = FN_DEFAULT;
170 static int only_matching = -1;
171 static int process_options = 0;
172
173 #ifdef SUPPORT_PCREGREP_JIT
174 static int study_options = PCRE_STUDY_JIT_COMPILE;
175 #else
176 static int study_options = 0;
177 #endif
178
179 static unsigned long int match_limit = 0;
180 static unsigned long int match_limit_recursion = 0;
181
182 static BOOL count_only = FALSE;
183 static BOOL do_colour = FALSE;
184 static BOOL file_offsets = FALSE;
185 static BOOL hyphenpending = FALSE;
186 static BOOL invert = FALSE;
187 static BOOL line_buffered = FALSE;
188 static BOOL line_offsets = FALSE;
189 static BOOL multiline = FALSE;
190 static BOOL number = FALSE;
191 static BOOL omit_zero_count = FALSE;
192 static BOOL resource_error = FALSE;
193 static BOOL quiet = FALSE;
194 static BOOL silent = FALSE;
195 static BOOL utf8 = FALSE;
196
197 /* Structure for options and list of them */
198
199 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
200 OP_OP_NUMBER, OP_PATLIST };
201
202 typedef struct option_item {
203 int type;
204 int one_char;
205 void *dataptr;
206 const char *long_name;
207 const char *help_text;
208 } option_item;
209
210 /* Options without a single-letter equivalent get a negative value. This can be
211 used to identify them. */
212
213 #define N_COLOUR (-1)
214 #define N_EXCLUDE (-2)
215 #define N_EXCLUDE_DIR (-3)
216 #define N_HELP (-4)
217 #define N_INCLUDE (-5)
218 #define N_INCLUDE_DIR (-6)
219 #define N_LABEL (-7)
220 #define N_LOCALE (-8)
221 #define N_NULL (-9)
222 #define N_LOFFSETS (-10)
223 #define N_FOFFSETS (-11)
224 #define N_LBUFFER (-12)
225 #define N_M_LIMIT (-13)
226 #define N_M_LIMIT_REC (-14)
227 #define N_BUFSIZE (-15)
228 #define N_NOJIT (-16)
229 #define N_FILE_LIST (-17)
230
231 static option_item optionlist[] = {
232 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
233 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
234 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
235 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
236 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" },
237 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
238 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
239 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
240 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
241 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
242 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
243 { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
244 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
245 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
246 { OP_STRING, N_FILE_LIST, &file_list, "file-list=path","read files to search from file" },
247 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
248 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
249 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
250 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
251 #ifdef SUPPORT_PCREGREP_JIT
252 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
253 #else
254 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcregrep does not support JIT" },
255 #endif
256 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
257 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
258 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
259 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
260 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
261 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
262 { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
263 { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
264 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
265 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
266 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
267 { OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" },
268 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
269 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
270 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
271 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
272 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
273 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
274
275 /* These two were accidentally implemented with underscores instead of
276 hyphens in the option names. As this was not discovered for several releases,
277 the incorrect versions are left in the table for compatibility. However, the
278 --help function misses out any option that has an underscore in its name. */
279
280 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
281 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
282
283 #ifdef JFRIEDL_DEBUG
284 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
285 #endif
286 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
287 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
288 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
289 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
290 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
291 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
292 { OP_NODATA, 0, NULL, NULL, NULL }
293 };
294
295 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
296 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
297 that the combination of -w and -x has the same effect as -x on its own, so we
298 can treat them as the same. */
299
300 static const char *prefix[] = {
301 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
302
303 static const char *suffix[] = {
304 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
305
306 /* UTF-8 tables - used only when the newline setting is "any". */
307
308 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
309
310 const char utf8_table4[] = {
311 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
312 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
313 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
314 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
315
316
317
318 /*************************************************
319 * Exit from the program *
320 *************************************************/
321
322 /* If there has been a resource error, give a suitable message.
323
324 Argument: the return code
325 Returns: does not return
326 */
327
328 static void
329 pcregrep_exit(int rc)
330 {
331 if (resource_error)
332 {
333 fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
334 "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
335 PCRE_ERROR_JIT_STACKLIMIT);
336 fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
337 }
338
339 exit(rc);
340 }
341
342
343 /*************************************************
344 * OS-specific functions *
345 *************************************************/
346
347 /* These functions are defined so that they can be made system specific,
348 although at present the only ones are for Unix, Win32, and for "no support". */
349
350
351 /************* Directory scanning in Unix ***********/
352
353 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
354 #include <sys/types.h>
355 #include <sys/stat.h>
356 #include <dirent.h>
357
358 typedef DIR directory_type;
359
360 static int
361 isdirectory(char *filename)
362 {
363 struct stat statbuf;
364 if (stat(filename, &statbuf) < 0)
365 return 0; /* In the expectation that opening as a file will fail */
366 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
367 }
368
369 static directory_type *
370 opendirectory(char *filename)
371 {
372 return opendir(filename);
373 }
374
375 static char *
376 readdirectory(directory_type *dir)
377 {
378 for (;;)
379 {
380 struct dirent *dent = readdir(dir);
381 if (dent == NULL) return NULL;
382 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
383 return dent->d_name;
384 }
385 /* Control never reaches here */
386 }
387
388 static void
389 closedirectory(directory_type *dir)
390 {
391 closedir(dir);
392 }
393
394
395 /************* Test for regular file in Unix **********/
396
397 static int
398 isregfile(char *filename)
399 {
400 struct stat statbuf;
401 if (stat(filename, &statbuf) < 0)
402 return 1; /* In the expectation that opening as a file will fail */
403 return (statbuf.st_mode & S_IFMT) == S_IFREG;
404 }
405
406
407 /************* Test for a terminal in Unix **********/
408
409 static BOOL
410 is_stdout_tty(void)
411 {
412 return isatty(fileno(stdout));
413 }
414
415 static BOOL
416 is_file_tty(FILE *f)
417 {
418 return isatty(fileno(f));
419 }
420
421
422 /************* Directory scanning in Win32 ***********/
423
424 /* I (Philip Hazel) have no means of testing this code. It was contributed by
425 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
426 when it did not exist. David Byron added a patch that moved the #include of
427 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
428 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
429 undefined when it is indeed undefined. */
430
431 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
432
433 #ifndef STRICT
434 # define STRICT
435 #endif
436 #ifndef WIN32_LEAN_AND_MEAN
437 # define WIN32_LEAN_AND_MEAN
438 #endif
439
440 #include <windows.h>
441
442 #ifndef INVALID_FILE_ATTRIBUTES
443 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
444 #endif
445
446 typedef struct directory_type
447 {
448 HANDLE handle;
449 BOOL first;
450 WIN32_FIND_DATA data;
451 } directory_type;
452
453 int
454 isdirectory(char *filename)
455 {
456 DWORD attr = GetFileAttributes(filename);
457 if (attr == INVALID_FILE_ATTRIBUTES)
458 return 0;
459 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
460 }
461
462 directory_type *
463 opendirectory(char *filename)
464 {
465 size_t len;
466 char *pattern;
467 directory_type *dir;
468 DWORD err;
469 len = strlen(filename);
470 pattern = (char *) malloc(len + 3);
471 dir = (directory_type *) malloc(sizeof(*dir));
472 if ((pattern == NULL) || (dir == NULL))
473 {
474 fprintf(stderr, "pcregrep: malloc failed\n");
475 pcregrep_exit(2);
476 }
477 memcpy(pattern, filename, len);
478 memcpy(&(pattern[len]), "\\*", 3);
479 dir->handle = FindFirstFile(pattern, &(dir->data));
480 if (dir->handle != INVALID_HANDLE_VALUE)
481 {
482 free(pattern);
483 dir->first = TRUE;
484 return dir;
485 }
486 err = GetLastError();
487 free(pattern);
488 free(dir);
489 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
490 return NULL;
491 }
492
493 char *
494 readdirectory(directory_type *dir)
495 {
496 for (;;)
497 {
498 if (!dir->first)
499 {
500 if (!FindNextFile(dir->handle, &(dir->data)))
501 return NULL;
502 }
503 else
504 {
505 dir->first = FALSE;
506 }
507 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
508 return dir->data.cFileName;
509 }
510 #ifndef _MSC_VER
511 return NULL; /* Keep compiler happy; never executed */
512 #endif
513 }
514
515 void
516 closedirectory(directory_type *dir)
517 {
518 FindClose(dir->handle);
519 free(dir);
520 }
521
522
523 /************* Test for regular file in Win32 **********/
524
525 /* I don't know how to do this, or if it can be done; assume all paths are
526 regular if they are not directories. */
527
528 int isregfile(char *filename)
529 {
530 return !isdirectory(filename);
531 }
532
533
534 /************* Test for a terminal in Win32 **********/
535
536 /* I don't know how to do this; assume never */
537
538 static BOOL
539 is_stdout_tty(void)
540 {
541 return FALSE;
542 }
543
544 static BOOL
545 is_file_tty(FILE *f)
546 {
547 return FALSE;
548 }
549
550
551 /************* Directory scanning when we can't do it ***********/
552
553 /* The type is void, and apart from isdirectory(), the functions do nothing. */
554
555 #else
556
557 typedef void directory_type;
558
559 int isdirectory(char *filename) { return 0; }
560 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
561 char *readdirectory(directory_type *dir) { return (char*)0;}
562 void closedirectory(directory_type *dir) {}
563
564
565 /************* Test for regular when we can't do it **********/
566
567 /* Assume all files are regular. */
568
569 int isregfile(char *filename) { return 1; }
570
571
572 /************* Test for a terminal when we can't do it **********/
573
574 static BOOL
575 is_stdout_tty(void)
576 {
577 return FALSE;
578 }
579
580 static BOOL
581 is_file_tty(FILE *f)
582 {
583 return FALSE;
584 }
585
586 #endif
587
588
589
590 #ifndef HAVE_STRERROR
591 /*************************************************
592 * Provide strerror() for non-ANSI libraries *
593 *************************************************/
594
595 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
596 in their libraries, but can provide the same facility by this simple
597 alternative function. */
598
599 extern int sys_nerr;
600 extern char *sys_errlist[];
601
602 char *
603 strerror(int n)
604 {
605 if (n < 0 || n >= sys_nerr) return "unknown error number";
606 return sys_errlist[n];
607 }
608 #endif /* HAVE_STRERROR */
609
610
611
612 /*************************************************
613 * Read one line of input *
614 *************************************************/
615
616 /* Normally, input is read using fread() into a large buffer, so many lines may
617 be read at once. However, doing this for tty input means that no output appears
618 until a lot of input has been typed. Instead, tty input is handled line by
619 line. We cannot use fgets() for this, because it does not stop at a binary
620 zero, and therefore there is no way of telling how many characters it has read,
621 because there may be binary zeros embedded in the data.
622
623 Arguments:
624 buffer the buffer to read into
625 length the maximum number of characters to read
626 f the file
627
628 Returns: the number of characters read, zero at end of file
629 */
630
631 static unsigned int
632 read_one_line(char *buffer, int length, FILE *f)
633 {
634 int c;
635 int yield = 0;
636 while ((c = fgetc(f)) != EOF)
637 {
638 buffer[yield++] = c;
639 if (c == '\n' || yield >= length) break;
640 }
641 return yield;
642 }
643
644
645
646 /*************************************************
647 * Find end of line *
648 *************************************************/
649
650 /* The length of the endline sequence that is found is set via lenptr. This may
651 be zero at the very end of the file if there is no line-ending sequence there.
652
653 Arguments:
654 p current position in line
655 endptr end of available data
656 lenptr where to put the length of the eol sequence
657
658 Returns: pointer after the last byte of the line,
659 including the newline byte(s)
660 */
661
662 static char *
663 end_of_line(char *p, char *endptr, int *lenptr)
664 {
665 switch(endlinetype)
666 {
667 default: /* Just in case */
668 case EL_LF:
669 while (p < endptr && *p != '\n') p++;
670 if (p < endptr)
671 {
672 *lenptr = 1;
673 return p + 1;
674 }
675 *lenptr = 0;
676 return endptr;
677
678 case EL_CR:
679 while (p < endptr && *p != '\r') p++;
680 if (p < endptr)
681 {
682 *lenptr = 1;
683 return p + 1;
684 }
685 *lenptr = 0;
686 return endptr;
687
688 case EL_CRLF:
689 for (;;)
690 {
691 while (p < endptr && *p != '\r') p++;
692 if (++p >= endptr)
693 {
694 *lenptr = 0;
695 return endptr;
696 }
697 if (*p == '\n')
698 {
699 *lenptr = 2;
700 return p + 1;
701 }
702 }
703 break;
704
705 case EL_ANYCRLF:
706 while (p < endptr)
707 {
708 int extra = 0;
709 register int c = *((unsigned char *)p);
710
711 if (utf8 && c >= 0xc0)
712 {
713 int gcii, gcss;
714 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
715 gcss = 6*extra;
716 c = (c & utf8_table3[extra]) << gcss;
717 for (gcii = 1; gcii <= extra; gcii++)
718 {
719 gcss -= 6;
720 c |= (p[gcii] & 0x3f) << gcss;
721 }
722 }
723
724 p += 1 + extra;
725
726 switch (c)
727 {
728 case 0x0a: /* LF */
729 *lenptr = 1;
730 return p;
731
732 case 0x0d: /* CR */
733 if (p < endptr && *p == 0x0a)
734 {
735 *lenptr = 2;
736 p++;
737 }
738 else *lenptr = 1;
739 return p;
740
741 default:
742 break;
743 }
744 } /* End of loop for ANYCRLF case */
745
746 *lenptr = 0; /* Must have hit the end */
747 return endptr;
748
749 case EL_ANY:
750 while (p < endptr)
751 {
752 int extra = 0;
753 register int c = *((unsigned char *)p);
754
755 if (utf8 && c >= 0xc0)
756 {
757 int gcii, gcss;
758 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
759 gcss = 6*extra;
760 c = (c & utf8_table3[extra]) << gcss;
761 for (gcii = 1; gcii <= extra; gcii++)
762 {
763 gcss -= 6;
764 c |= (p[gcii] & 0x3f) << gcss;
765 }
766 }
767
768 p += 1 + extra;
769
770 switch (c)
771 {
772 case 0x0a: /* LF */
773 case 0x0b: /* VT */
774 case 0x0c: /* FF */
775 *lenptr = 1;
776 return p;
777
778 case 0x0d: /* CR */
779 if (p < endptr && *p == 0x0a)
780 {
781 *lenptr = 2;
782 p++;
783 }
784 else *lenptr = 1;
785 return p;
786
787 case 0x85: /* NEL */
788 *lenptr = utf8? 2 : 1;
789 return p;
790
791 case 0x2028: /* LS */
792 case 0x2029: /* PS */
793 *lenptr = 3;
794 return p;
795
796 default:
797 break;
798 }
799 } /* End of loop for ANY case */
800
801 *lenptr = 0; /* Must have hit the end */
802 return endptr;
803 } /* End of overall switch */
804 }
805
806
807
808 /*************************************************
809 * Find start of previous line *
810 *************************************************/
811
812 /* This is called when looking back for before lines to print.
813
814 Arguments:
815 p start of the subsequent line
816 startptr start of available data
817
818 Returns: pointer to the start of the previous line
819 */
820
821 static char *
822 previous_line(char *p, char *startptr)
823 {
824 switch(endlinetype)
825 {
826 default: /* Just in case */
827 case EL_LF:
828 p--;
829 while (p > startptr && p[-1] != '\n') p--;
830 return p;
831
832 case EL_CR:
833 p--;
834 while (p > startptr && p[-1] != '\n') p--;
835 return p;
836
837 case EL_CRLF:
838 for (;;)
839 {
840 p -= 2;
841 while (p > startptr && p[-1] != '\n') p--;
842 if (p <= startptr + 1 || p[-2] == '\r') return p;
843 }
844 return p; /* But control should never get here */
845
846 case EL_ANY:
847 case EL_ANYCRLF:
848 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
849 if (utf8) while ((*p & 0xc0) == 0x80) p--;
850
851 while (p > startptr)
852 {
853 register int c;
854 char *pp = p - 1;
855
856 if (utf8)
857 {
858 int extra = 0;
859 while ((*pp & 0xc0) == 0x80) pp--;
860 c = *((unsigned char *)pp);
861 if (c >= 0xc0)
862 {
863 int gcii, gcss;
864 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
865 gcss = 6*extra;
866 c = (c & utf8_table3[extra]) << gcss;
867 for (gcii = 1; gcii <= extra; gcii++)
868 {
869 gcss -= 6;
870 c |= (pp[gcii] & 0x3f) << gcss;
871 }
872 }
873 }
874 else c = *((unsigned char *)pp);
875
876 if (endlinetype == EL_ANYCRLF) switch (c)
877 {
878 case 0x0a: /* LF */
879 case 0x0d: /* CR */
880 return p;
881
882 default:
883 break;
884 }
885
886 else switch (c)
887 {
888 case 0x0a: /* LF */
889 case 0x0b: /* VT */
890 case 0x0c: /* FF */
891 case 0x0d: /* CR */
892 case 0x85: /* NEL */
893 case 0x2028: /* LS */
894 case 0x2029: /* PS */
895 return p;
896
897 default:
898 break;
899 }
900
901 p = pp; /* Back one character */
902 } /* End of loop for ANY case */
903
904 return startptr; /* Hit start of data */
905 } /* End of overall switch */
906 }
907
908
909
910
911
912 /*************************************************
913 * Print the previous "after" lines *
914 *************************************************/
915
916 /* This is called if we are about to lose said lines because of buffer filling,
917 and at the end of the file. The data in the line is written using fwrite() so
918 that a binary zero does not terminate it.
919
920 Arguments:
921 lastmatchnumber the number of the last matching line, plus one
922 lastmatchrestart where we restarted after the last match
923 endptr end of available data
924 printname filename for printing
925
926 Returns: nothing
927 */
928
929 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
930 char *endptr, char *printname)
931 {
932 if (after_context > 0 && lastmatchnumber > 0)
933 {
934 int count = 0;
935 while (lastmatchrestart < endptr && count++ < after_context)
936 {
937 int ellength;
938 char *pp = lastmatchrestart;
939 if (printname != NULL) fprintf(stdout, "%s-", printname);
940 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
941 pp = end_of_line(pp, endptr, &ellength);
942 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
943 lastmatchrestart = pp;
944 }
945 hyphenpending = TRUE;
946 }
947 }
948
949
950
951 /*************************************************
952 * Apply patterns to subject till one matches *
953 *************************************************/
954
955 /* This function is called to run through all patterns, looking for a match. It
956 is used multiple times for the same subject when colouring is enabled, in order
957 to find all possible matches.
958
959 Arguments:
960 matchptr the start of the subject
961 length the length of the subject to match
962 startoffset where to start matching
963 offsets the offets vector to fill in
964 mrc address of where to put the result of pcre_exec()
965
966 Returns: TRUE if there was a match
967 FALSE if there was no match
968 invert if there was a non-fatal error
969 */
970
971 static BOOL
972 match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
973 int *mrc)
974 {
975 int i;
976 size_t slen = length;
977 const char *msg = "this text:\n\n";
978 if (slen > 200)
979 {
980 slen = 200;
981 msg = "text that starts:\n\n";
982 }
983 for (i = 0; i < pattern_count; i++)
984 {
985 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
986 startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
987 if (*mrc >= 0) return TRUE;
988 if (*mrc == PCRE_ERROR_NOMATCH) continue;
989 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
990 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
991 fprintf(stderr, "%s", msg);
992 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
993 fprintf(stderr, "\n\n");
994 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
995 *mrc == PCRE_ERROR_JIT_STACKLIMIT)
996 resource_error = TRUE;
997 if (error_count++ > 20)
998 {
999 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1000 pcregrep_exit(2);
1001 }
1002 return invert; /* No more matching; don't show the line again */
1003 }
1004
1005 return FALSE; /* No match, no errors */
1006 }
1007
1008
1009
1010 /*************************************************
1011 * Grep an individual file *
1012 *************************************************/
1013
1014 /* This is called from grep_or_recurse() below. It uses a buffer that is three
1015 times the value of bufthird. The matching point is never allowed to stray into
1016 the top third of the buffer, thus keeping more of the file available for
1017 context printing or for multiline scanning. For large files, the pointer will
1018 be in the middle third most of the time, so the bottom third is available for
1019 "before" context printing.
1020
1021 Arguments:
1022 handle the fopened FILE stream for a normal file
1023 the gzFile pointer when reading is via libz
1024 the BZFILE pointer when reading is via libbz2
1025 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1026 filename the file name or NULL (for errors)
1027 printname the file name if it is to be printed for each match
1028 or NULL if the file name is not to be printed
1029 it cannot be NULL if filenames[_nomatch]_only is set
1030
1031 Returns: 0 if there was at least one match
1032 1 otherwise (no matches)
1033 2 if an overlong line is encountered
1034 3 if there is a read error on a .bz2 file
1035 */
1036
1037 static int
1038 pcregrep(void *handle, int frtype, char *filename, char *printname)
1039 {
1040 int rc = 1;
1041 int linenumber = 1;
1042 int lastmatchnumber = 0;
1043 int count = 0;
1044 int filepos = 0;
1045 int offsets[OFFSET_SIZE];
1046 char *lastmatchrestart = NULL;
1047 char *ptr = main_buffer;
1048 char *endptr;
1049 size_t bufflength;
1050 BOOL endhyphenpending = FALSE;
1051 BOOL input_line_buffered = line_buffered;
1052 FILE *in = NULL; /* Ensure initialized */
1053
1054 #ifdef SUPPORT_LIBZ
1055 gzFile ingz = NULL;
1056 #endif
1057
1058 #ifdef SUPPORT_LIBBZ2
1059 BZFILE *inbz2 = NULL;
1060 #endif
1061
1062
1063 /* Do the first read into the start of the buffer and set up the pointer to end
1064 of what we have. In the case of libz, a non-zipped .gz file will be read as a
1065 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1066 fail. */
1067
1068 #ifdef SUPPORT_LIBZ
1069 if (frtype == FR_LIBZ)
1070 {
1071 ingz = (gzFile)handle;
1072 bufflength = gzread (ingz, main_buffer, bufsize);
1073 }
1074 else
1075 #endif
1076
1077 #ifdef SUPPORT_LIBBZ2
1078 if (frtype == FR_LIBBZ2)
1079 {
1080 inbz2 = (BZFILE *)handle;
1081 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1082 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1083 } /* without the cast it is unsigned. */
1084 else
1085 #endif
1086
1087 {
1088 in = (FILE *)handle;
1089 if (is_file_tty(in)) input_line_buffered = TRUE;
1090 bufflength = input_line_buffered?
1091 read_one_line(main_buffer, bufsize, in) :
1092 fread(main_buffer, 1, bufsize, in);
1093 }
1094
1095 endptr = main_buffer + bufflength;
1096
1097 /* Loop while the current pointer is not at the end of the file. For large
1098 files, endptr will be at the end of the buffer when we are in the middle of the
1099 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1100 way, the buffer is shifted left and re-filled. */
1101
1102 while (ptr < endptr)
1103 {
1104 int endlinelength;
1105 int mrc = 0;
1106 int startoffset = 0;
1107 BOOL match;
1108 char *matchptr = ptr;
1109 char *t = ptr;
1110 size_t length, linelength;
1111
1112 /* At this point, ptr is at the start of a line. We need to find the length
1113 of the subject string to pass to pcre_exec(). In multiline mode, it is the
1114 length remainder of the data in the buffer. Otherwise, it is the length of
1115 the next line, excluding the terminating newline. After matching, we always
1116 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1117 option is used for compiling, so that any match is constrained to be in the
1118 first line. */
1119
1120 t = end_of_line(t, endptr, &endlinelength);
1121 linelength = t - ptr - endlinelength;
1122 length = multiline? (size_t)(endptr - ptr) : linelength;
1123
1124 /* Check to see if the line we are looking at extends right to the very end
1125 of the buffer without a line terminator. This means the line is too long to
1126 handle. */
1127
1128 if (endlinelength == 0 && t == main_buffer + bufsize)
1129 {
1130 fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1131 "pcregrep: check the --buffer-size option\n",
1132 linenumber,
1133 (filename == NULL)? "" : " of file ",
1134 (filename == NULL)? "" : filename);
1135 return 2;
1136 }
1137
1138 /* Extra processing for Jeffrey Friedl's debugging. */
1139
1140 #ifdef JFRIEDL_DEBUG
1141 if (jfriedl_XT || jfriedl_XR)
1142 {
1143 #include <sys/time.h>
1144 #include <time.h>
1145 struct timeval start_time, end_time;
1146 struct timezone dummy;
1147 int i;
1148
1149 if (jfriedl_XT)
1150 {
1151 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1152 const char *orig = ptr;
1153 ptr = malloc(newlen + 1);
1154 if (!ptr) {
1155 printf("out of memory");
1156 pcregrep_exit(2);
1157 }
1158 endptr = ptr;
1159 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1160 for (i = 0; i < jfriedl_XT; i++) {
1161 strncpy(endptr, orig, length);
1162 endptr += length;
1163 }
1164 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1165 length = newlen;
1166 }
1167
1168 if (gettimeofday(&start_time, &dummy) != 0)
1169 perror("bad gettimeofday");
1170
1171
1172 for (i = 0; i < jfriedl_XR; i++)
1173 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1174 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1175
1176 if (gettimeofday(&end_time, &dummy) != 0)
1177 perror("bad gettimeofday");
1178
1179 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1180 -
1181 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1182
1183 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1184 return 0;
1185 }
1186 #endif
1187
1188 /* We come back here after a match when the -o option (only_matching) is set,
1189 in order to find any further matches in the same line. */
1190
1191 ONLY_MATCHING_RESTART:
1192
1193 /* Run through all the patterns until one matches or there is an error other
1194 than NOMATCH. This code is in a subroutine so that it can be re-used for
1195 finding subsequent matches when colouring matched lines. */
1196
1197 match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1198
1199 /* If it's a match or a not-match (as required), do what's wanted. */
1200
1201 if (match != invert)
1202 {
1203 BOOL hyphenprinted = FALSE;
1204
1205 /* We've failed if we want a file that doesn't have any matches. */
1206
1207 if (filenames == FN_NOMATCH_ONLY) return 1;
1208
1209 /* Just count if just counting is wanted. */
1210
1211 if (count_only) count++;
1212
1213 /* If all we want is a file name, there is no need to scan any more lines
1214 in the file. */
1215
1216 else if (filenames == FN_MATCH_ONLY)
1217 {
1218 fprintf(stdout, "%s\n", printname);
1219 return 0;
1220 }
1221
1222 /* Likewise, if all we want is a yes/no answer. */
1223
1224 else if (quiet) return 0;
1225
1226 /* The --only-matching option prints just the substring that matched, or a
1227 captured portion of it, as long as this string is not empty, and the
1228 --file-offsets and --line-offsets options output offsets for the matching
1229 substring (they both force --only-matching = 0). None of these options
1230 prints any context. Afterwards, adjust the start and then jump back to look
1231 for further matches in the same line. If we are in invert mode, however,
1232 nothing is printed and we do not restart - this could still be useful
1233 because the return code is set. */
1234
1235 else if (only_matching >= 0)
1236 {
1237 if (!invert)
1238 {
1239 if (printname != NULL) fprintf(stdout, "%s:", printname);
1240 if (number) fprintf(stdout, "%d:", linenumber);
1241 if (line_offsets)
1242 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1243 offsets[1] - offsets[0]);
1244 else if (file_offsets)
1245 fprintf(stdout, "%d,%d\n",
1246 (int)(filepos + matchptr + offsets[0] - ptr),
1247 offsets[1] - offsets[0]);
1248 else if (only_matching < mrc)
1249 {
1250 int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1251 if (plen > 0)
1252 {
1253 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1254 FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1255 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1256 fprintf(stdout, "\n");
1257 }
1258 }
1259 else if (printname != NULL || number) fprintf(stdout, "\n");
1260 match = FALSE;
1261 if (line_buffered) fflush(stdout);
1262 rc = 0; /* Had some success */
1263 startoffset = offsets[1]; /* Restart after the match */
1264 goto ONLY_MATCHING_RESTART;
1265 }
1266 }
1267
1268 /* This is the default case when none of the above options is set. We print
1269 the matching lines(s), possibly preceded and/or followed by other lines of
1270 context. */
1271
1272 else
1273 {
1274 /* See if there is a requirement to print some "after" lines from a
1275 previous match. We never print any overlaps. */
1276
1277 if (after_context > 0 && lastmatchnumber > 0)
1278 {
1279 int ellength;
1280 int linecount = 0;
1281 char *p = lastmatchrestart;
1282
1283 while (p < ptr && linecount < after_context)
1284 {
1285 p = end_of_line(p, ptr, &ellength);
1286 linecount++;
1287 }
1288
1289 /* It is important to advance lastmatchrestart during this printing so
1290 that it interacts correctly with any "before" printing below. Print
1291 each line's data using fwrite() in case there are binary zeroes. */
1292
1293 while (lastmatchrestart < p)
1294 {
1295 char *pp = lastmatchrestart;
1296 if (printname != NULL) fprintf(stdout, "%s-", printname);
1297 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1298 pp = end_of_line(pp, endptr, &ellength);
1299 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1300 lastmatchrestart = pp;
1301 }
1302 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1303 }
1304
1305 /* If there were non-contiguous lines printed above, insert hyphens. */
1306
1307 if (hyphenpending)
1308 {
1309 fprintf(stdout, "--\n");
1310 hyphenpending = FALSE;
1311 hyphenprinted = TRUE;
1312 }
1313
1314 /* See if there is a requirement to print some "before" lines for this
1315 match. Again, don't print overlaps. */
1316
1317 if (before_context > 0)
1318 {
1319 int linecount = 0;
1320 char *p = ptr;
1321
1322 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1323 linecount < before_context)
1324 {
1325 linecount++;
1326 p = previous_line(p, main_buffer);
1327 }
1328
1329 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1330 fprintf(stdout, "--\n");
1331
1332 while (p < ptr)
1333 {
1334 int ellength;
1335 char *pp = p;
1336 if (printname != NULL) fprintf(stdout, "%s-", printname);
1337 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1338 pp = end_of_line(pp, endptr, &ellength);
1339 FWRITE(p, 1, pp - p, stdout);
1340 p = pp;
1341 }
1342 }
1343
1344 /* Now print the matching line(s); ensure we set hyphenpending at the end
1345 of the file if any context lines are being output. */
1346
1347 if (after_context > 0 || before_context > 0)
1348 endhyphenpending = TRUE;
1349
1350 if (printname != NULL) fprintf(stdout, "%s:", printname);
1351 if (number) fprintf(stdout, "%d:", linenumber);
1352
1353 /* In multiline mode, we want to print to the end of the line in which
1354 the end of the matched string is found, so we adjust linelength and the
1355 line number appropriately, but only when there actually was a match
1356 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1357 the match will always be before the first newline sequence. */
1358
1359 if (multiline & !invert)
1360 {
1361 char *endmatch = ptr + offsets[1];
1362 t = ptr;
1363 while (t < endmatch)
1364 {
1365 t = end_of_line(t, endptr, &endlinelength);
1366 if (t < endmatch) linenumber++; else break;
1367 }
1368 linelength = t - ptr - endlinelength;
1369 }
1370
1371 /*** NOTE: Use only fwrite() to output the data line, so that binary
1372 zeroes are treated as just another data character. */
1373
1374 /* This extra option, for Jeffrey Friedl's debugging requirements,
1375 replaces the matched string, or a specific captured string if it exists,
1376 with X. When this happens, colouring is ignored. */
1377
1378 #ifdef JFRIEDL_DEBUG
1379 if (S_arg >= 0 && S_arg < mrc)
1380 {
1381 int first = S_arg * 2;
1382 int last = first + 1;
1383 FWRITE(ptr, 1, offsets[first], stdout);
1384 fprintf(stdout, "X");
1385 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1386 }
1387 else
1388 #endif
1389
1390 /* We have to split the line(s) up if colouring, and search for further
1391 matches, but not of course if the line is a non-match. */
1392
1393 if (do_colour && !invert)
1394 {
1395 int plength;
1396 FWRITE(ptr, 1, offsets[0], stdout);
1397 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1398 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1399 fprintf(stdout, "%c[00m", 0x1b);
1400 for (;;)
1401 {
1402 startoffset = offsets[1];
1403 if (startoffset >= (int)linelength + endlinelength ||
1404 !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1405 break;
1406 FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1407 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1408 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1409 fprintf(stdout, "%c[00m", 0x1b);
1410 }
1411
1412 /* In multiline mode, we may have already printed the complete line
1413 and its line-ending characters (if they matched the pattern), so there
1414 may be no more to print. */
1415
1416 plength = (int)((linelength + endlinelength) - startoffset);
1417 if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1418 }
1419
1420 /* Not colouring; no need to search for further matches */
1421
1422 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1423 }
1424
1425 /* End of doing what has to be done for a match. If --line-buffered was
1426 given, flush the output. */
1427
1428 if (line_buffered) fflush(stdout);
1429 rc = 0; /* Had some success */
1430
1431 /* Remember where the last match happened for after_context. We remember
1432 where we are about to restart, and that line's number. */
1433
1434 lastmatchrestart = ptr + linelength + endlinelength;
1435 lastmatchnumber = linenumber + 1;
1436 }
1437
1438 /* For a match in multiline inverted mode (which of course did not cause
1439 anything to be printed), we have to move on to the end of the match before
1440 proceeding. */
1441
1442 if (multiline && invert && match)
1443 {
1444 int ellength;
1445 char *endmatch = ptr + offsets[1];
1446 t = ptr;
1447 while (t < endmatch)
1448 {
1449 t = end_of_line(t, endptr, &ellength);
1450 if (t <= endmatch) linenumber++; else break;
1451 }
1452 endmatch = end_of_line(endmatch, endptr, &ellength);
1453 linelength = endmatch - ptr - ellength;
1454 }
1455
1456 /* Advance to after the newline and increment the line number. The file
1457 offset to the current line is maintained in filepos. */
1458
1459 ptr += linelength + endlinelength;
1460 filepos += (int)(linelength + endlinelength);
1461 linenumber++;
1462
1463 /* If input is line buffered, and the buffer is not yet full, read another
1464 line and add it into the buffer. */
1465
1466 if (input_line_buffered && bufflength < (size_t)bufsize)
1467 {
1468 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1469 bufflength += add;
1470 endptr += add;
1471 }
1472
1473 /* If we haven't yet reached the end of the file (the buffer is full), and
1474 the current point is in the top 1/3 of the buffer, slide the buffer down by
1475 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1476 about to be lost, print them. */
1477
1478 if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1479 {
1480 if (after_context > 0 &&
1481 lastmatchnumber > 0 &&
1482 lastmatchrestart < main_buffer + bufthird)
1483 {
1484 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1485 lastmatchnumber = 0;
1486 }
1487
1488 /* Now do the shuffle */
1489
1490 memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1491 ptr -= bufthird;
1492
1493 #ifdef SUPPORT_LIBZ
1494 if (frtype == FR_LIBZ)
1495 bufflength = 2*bufthird +
1496 gzread (ingz, main_buffer + 2*bufthird, bufthird);
1497 else
1498 #endif
1499
1500 #ifdef SUPPORT_LIBBZ2
1501 if (frtype == FR_LIBBZ2)
1502 bufflength = 2*bufthird +
1503 BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1504 else
1505 #endif
1506
1507 bufflength = 2*bufthird +
1508 (input_line_buffered?
1509 read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1510 fread(main_buffer + 2*bufthird, 1, bufthird, in));
1511 endptr = main_buffer + bufflength;
1512
1513 /* Adjust any last match point */
1514
1515 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1516 }
1517 } /* Loop through the whole file */
1518
1519 /* End of file; print final "after" lines if wanted; do_after_lines sets
1520 hyphenpending if it prints something. */
1521
1522 if (only_matching < 0 && !count_only)
1523 {
1524 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1525 hyphenpending |= endhyphenpending;
1526 }
1527
1528 /* Print the file name if we are looking for those without matches and there
1529 were none. If we found a match, we won't have got this far. */
1530
1531 if (filenames == FN_NOMATCH_ONLY)
1532 {
1533 fprintf(stdout, "%s\n", printname);
1534 return 0;
1535 }
1536
1537 /* Print the match count if wanted */
1538
1539 if (count_only)
1540 {
1541 if (count > 0 || !omit_zero_count)
1542 {
1543 if (printname != NULL && filenames != FN_NONE)
1544 fprintf(stdout, "%s:", printname);
1545 fprintf(stdout, "%d\n", count);
1546 }
1547 }
1548
1549 return rc;
1550 }
1551
1552
1553
1554 /*************************************************
1555 * Grep a file or recurse into a directory *
1556 *************************************************/
1557
1558 /* Given a path name, if it's a directory, scan all the files if we are
1559 recursing; if it's a file, grep it.
1560
1561 Arguments:
1562 pathname the path to investigate
1563 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1564 only_one_at_top TRUE if the path is the only one at toplevel
1565
1566 Returns: 0 if there was at least one match
1567 1 if there were no matches
1568 2 there was some kind of error
1569
1570 However, file opening failures are suppressed if "silent" is set.
1571 */
1572
1573 static int
1574 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1575 {
1576 int rc = 1;
1577 int sep;
1578 int frtype;
1579 void *handle;
1580 FILE *in = NULL; /* Ensure initialized */
1581
1582 #ifdef SUPPORT_LIBZ
1583 gzFile ingz = NULL;
1584 #endif
1585
1586 #ifdef SUPPORT_LIBBZ2
1587 BZFILE *inbz2 = NULL;
1588 #endif
1589
1590 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBZ2
1591 int pathlen;
1592 #endif
1593
1594 /* If the file name is "-" we scan stdin */
1595
1596 if (strcmp(pathname, "-") == 0)
1597 {
1598 return pcregrep(stdin, FR_PLAIN, stdin_name,
1599 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1600 stdin_name : NULL);
1601 }
1602
1603 /* If the file is a directory, skip if skipping or if we are recursing, scan
1604 each file and directory within it, subject to any include or exclude patterns
1605 that were set. The scanning code is localized so it can be made
1606 system-specific. */
1607
1608 if ((sep = isdirectory(pathname)) != 0)
1609 {
1610 if (dee_action == dee_SKIP) return 1;
1611 if (dee_action == dee_RECURSE)
1612 {
1613 char buffer[1024];
1614 char *nextfile;
1615 directory_type *dir = opendirectory(pathname);
1616
1617 if (dir == NULL)
1618 {
1619 if (!silent)
1620 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1621 strerror(errno));
1622 return 2;
1623 }
1624
1625 while ((nextfile = readdirectory(dir)) != NULL)
1626 {
1627 int frc, nflen;
1628 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1629 nflen = (int)(strlen(nextfile));
1630
1631 if (isdirectory(buffer))
1632 {
1633 if (exclude_dir_compiled != NULL &&
1634 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1635 continue;
1636
1637 if (include_dir_compiled != NULL &&
1638 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1639 continue;
1640 }
1641 else
1642 {
1643 if (exclude_compiled != NULL &&
1644 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1645 continue;
1646
1647 if (include_compiled != NULL &&
1648 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1649 continue;
1650 }
1651
1652 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1653 if (frc > 1) rc = frc;
1654 else if (frc == 0 && rc == 1) rc = 0;
1655 }
1656
1657 closedirectory(dir);
1658 return rc;
1659 }
1660 }
1661
1662 /* If the file is not a directory and not a regular file, skip it if that's
1663 been requested. */
1664
1665 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1666
1667 /* Control reaches here if we have a regular file, or if we have a directory
1668 and recursion or skipping was not requested, or if we have anything else and
1669 skipping was not requested. The scan proceeds. If this is the first and only
1670 argument at top level, we don't show the file name, unless we are only showing
1671 the file name, or the filename was forced (-H). */
1672
1673 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBZ2
1674 pathlen = (int)(strlen(pathname));
1675 #endif
1676
1677 /* Open using zlib if it is supported and the file name ends with .gz. */
1678
1679 #ifdef SUPPORT_LIBZ
1680 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1681 {
1682 ingz = gzopen(pathname, "rb");
1683 if (ingz == NULL)
1684 {
1685 if (!silent)
1686 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1687 strerror(errno));
1688 return 2;
1689 }
1690 handle = (void *)ingz;
1691 frtype = FR_LIBZ;
1692 }
1693 else
1694 #endif
1695
1696 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1697
1698 #ifdef SUPPORT_LIBBZ2
1699 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1700 {
1701 inbz2 = BZ2_bzopen(pathname, "rb");
1702 handle = (void *)inbz2;
1703 frtype = FR_LIBBZ2;
1704 }
1705 else
1706 #endif
1707
1708 /* Otherwise use plain fopen(). The label is so that we can come back here if
1709 an attempt to read a .bz2 file indicates that it really is a plain file. */
1710
1711 #ifdef SUPPORT_LIBBZ2
1712 PLAIN_FILE:
1713 #endif
1714 {
1715 in = fopen(pathname, "rb");
1716 handle = (void *)in;
1717 frtype = FR_PLAIN;
1718 }
1719
1720 /* All the opening methods return errno when they fail. */
1721
1722 if (handle == NULL)
1723 {
1724 if (!silent)
1725 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1726 strerror(errno));
1727 return 2;
1728 }
1729
1730 /* Now grep the file */
1731
1732 rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1733 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1734
1735 /* Close in an appropriate manner. */
1736
1737 #ifdef SUPPORT_LIBZ
1738 if (frtype == FR_LIBZ)
1739 gzclose(ingz);
1740 else
1741 #endif
1742
1743 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1744 read failed. If the error indicates that the file isn't in fact bzipped, try
1745 again as a normal file. */
1746
1747 #ifdef SUPPORT_LIBBZ2
1748 if (frtype == FR_LIBBZ2)
1749 {
1750 if (rc == 3)
1751 {
1752 int errnum;
1753 const char *err = BZ2_bzerror(inbz2, &errnum);
1754 if (errnum == BZ_DATA_ERROR_MAGIC)
1755 {
1756 BZ2_bzclose(inbz2);
1757 goto PLAIN_FILE;
1758 }
1759 else if (!silent)
1760 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1761 pathname, err);
1762 rc = 2; /* The normal "something went wrong" code */
1763 }
1764 BZ2_bzclose(inbz2);
1765 }
1766 else
1767 #endif
1768
1769 /* Normal file close */
1770
1771 fclose(in);
1772
1773 /* Pass back the yield from pcregrep(). */
1774
1775 return rc;
1776 }
1777
1778
1779
1780
1781 /*************************************************
1782 * Usage function *
1783 *************************************************/
1784
1785 static int
1786 usage(int rc)
1787 {
1788 option_item *op;
1789 fprintf(stderr, "Usage: pcregrep [-");
1790 for (op = optionlist; op->one_char != 0; op++)
1791 {
1792 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1793 }
1794 fprintf(stderr, "] [long options] [pattern] [files]\n");
1795 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1796 "options.\n");
1797 return rc;
1798 }
1799
1800
1801
1802
1803 /*************************************************
1804 * Help function *
1805 *************************************************/
1806
1807 static void
1808 help(void)
1809 {
1810 option_item *op;
1811
1812 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1813 printf("Search for PATTERN in each FILE or standard input.\n");
1814 printf("PATTERN must be present if neither -e nor -f is used.\n");
1815 printf("\"-\" can be used as a file name to mean STDIN.\n");
1816
1817 #ifdef SUPPORT_LIBZ
1818 printf("Files whose names end in .gz are read using zlib.\n");
1819 #endif
1820
1821 #ifdef SUPPORT_LIBBZ2
1822 printf("Files whose names end in .bz2 are read using bzlib2.\n");
1823 #endif
1824
1825 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1826 printf("Other files and the standard input are read as plain files.\n\n");
1827 #else
1828 printf("All files are read as plain files, without any interpretation.\n\n");
1829 #endif
1830
1831 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1832 printf("Options:\n");
1833
1834 for (op = optionlist; op->one_char != 0; op++)
1835 {
1836 int n;
1837 char s[4];
1838
1839 /* Two options were accidentally implemented and documented with underscores
1840 instead of hyphens in their names, something that was not noticed for quite a
1841 few releases. When fixing this, I left the underscored versions in the list
1842 in case people were using them. However, we don't want to display them in the
1843 help data. There are no other options that contain underscores, and we do not
1844 expect ever to implement such options. Therefore, just omit any option that
1845 contains an underscore. */
1846
1847 if (strchr(op->long_name, '_') != NULL) continue;
1848
1849 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1850 n = 31 - printf(" %s --%s", s, op->long_name);
1851 if (n < 1) n = 1;
1852 printf("%.*s%s\n", n, " ", op->help_text);
1853 }
1854
1855 printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
1856 printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
1857 printf("When reading patterns or file names from a file, trailing white\n");
1858 printf("space is removed and blank lines are ignored.\n");
1859 printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",
1860 MAX_PATTERN_COUNT, PATBUFSIZE);
1861
1862 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1863 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1864 }
1865
1866
1867
1868
1869 /*************************************************
1870 * Handle a single-letter, no data option *
1871 *************************************************/
1872
1873 static int
1874 handle_option(int letter, int options)
1875 {
1876 switch(letter)
1877 {
1878 case N_FOFFSETS: file_offsets = TRUE; break;
1879 case N_HELP: help(); pcregrep_exit(0);
1880 case N_LBUFFER: line_buffered = TRUE; break;
1881 case N_LOFFSETS: line_offsets = number = TRUE; break;
1882 case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
1883 case 'c': count_only = TRUE; break;
1884 case 'F': process_options |= PO_FIXED_STRINGS; break;
1885 case 'H': filenames = FN_FORCE; break;
1886 case 'h': filenames = FN_NONE; break;
1887 case 'i': options |= PCRE_CASELESS; break;
1888 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1889 case 'L': filenames = FN_NOMATCH_ONLY; break;
1890 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1891 case 'n': number = TRUE; break;
1892 case 'o': only_matching = 0; break;
1893 case 'q': quiet = TRUE; break;
1894 case 'r': dee_action = dee_RECURSE; break;
1895 case 's': silent = TRUE; break;
1896 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1897 case 'v': invert = TRUE; break;
1898 case 'w': process_options |= PO_WORD_MATCH; break;
1899 case 'x': process_options |= PO_LINE_MATCH; break;
1900
1901 case 'V':
1902 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1903 pcregrep_exit(0);
1904 break;
1905
1906 default:
1907 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1908 pcregrep_exit(usage(2));
1909 }
1910
1911 return options;
1912 }
1913
1914
1915
1916
1917 /*************************************************
1918 * Construct printed ordinal *
1919 *************************************************/
1920
1921 /* This turns a number into "1st", "3rd", etc. */
1922
1923 static char *
1924 ordin(int n)
1925 {
1926 static char buffer[8];
1927 char *p = buffer;
1928 sprintf(p, "%d", n);
1929 while (*p != 0) p++;
1930 switch (n%10)
1931 {
1932 case 1: strcpy(p, "st"); break;
1933 case 2: strcpy(p, "nd"); break;
1934 case 3: strcpy(p, "rd"); break;
1935 default: strcpy(p, "th"); break;
1936 }
1937 return buffer;
1938 }
1939
1940
1941
1942 /*************************************************
1943 * Compile a single pattern *
1944 *************************************************/
1945
1946 /* When the -F option has been used, this is called for each substring.
1947 Otherwise it's called for each supplied pattern.
1948
1949 Arguments:
1950 pattern the pattern string
1951 options the PCRE options
1952 filename the file name, or NULL for a command-line pattern
1953 count 0 if this is the only command line pattern, or
1954 number of the command line pattern, or
1955 linenumber for a pattern from a file
1956
1957 Returns: TRUE on success, FALSE after an error
1958 */
1959
1960 static BOOL
1961 compile_single_pattern(char *pattern, int options, char *filename, int count)
1962 {
1963 char buffer[PATBUFSIZE];
1964 const char *error;
1965 int errptr;
1966
1967 if (pattern_count >= MAX_PATTERN_COUNT)
1968 {
1969 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1970 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1971 return FALSE;
1972 }
1973
1974 sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,
1975 suffix[process_options]);
1976 pattern_list[pattern_count] =
1977 pcre_compile(buffer, options, &error, &errptr, pcretables);
1978 if (pattern_list[pattern_count] != NULL)
1979 {
1980 pattern_count++;
1981 return TRUE;
1982 }
1983
1984 /* Handle compile errors */
1985
1986 errptr -= (int)strlen(prefix[process_options]);
1987 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1988
1989 if (filename == NULL)
1990 {
1991 if (count == 0)
1992 fprintf(stderr, "pcregrep: Error in command-line regex "
1993 "at offset %d: %s\n", errptr, error);
1994 else
1995 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1996 "at offset %d: %s\n", ordin(count), errptr, error);
1997 }
1998 else
1999 {
2000 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2001 "at offset %d: %s\n", count, filename, errptr, error);
2002 }
2003
2004 return FALSE;
2005 }
2006
2007
2008
2009 /*************************************************
2010 * Compile one supplied pattern *
2011 *************************************************/
2012
2013 /* When the -F option has been used, each string may be a list of strings,
2014 separated by line breaks. They will be matched literally.
2015
2016 Arguments:
2017 pattern the pattern string
2018 options the PCRE options
2019 filename the file name, or NULL for a command-line pattern
2020 count 0 if this is the only command line pattern, or
2021 number of the command line pattern, or
2022 linenumber for a pattern from a file
2023
2024 Returns: TRUE on success, FALSE after an error
2025 */
2026
2027 static BOOL
2028 compile_pattern(char *pattern, int options, char *filename, int count)
2029 {
2030 if ((process_options & PO_FIXED_STRINGS) != 0)
2031 {
2032 char *eop = pattern + strlen(pattern);
2033 char buffer[PATBUFSIZE];
2034 for(;;)
2035 {
2036 int ellength;
2037 char *p = end_of_line(pattern, eop, &ellength);
2038 if (ellength == 0)
2039 return compile_single_pattern(pattern, options, filename, count);
2040 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
2041 pattern = p;
2042 if (!compile_single_pattern(buffer, options, filename, count))
2043 return FALSE;
2044 }
2045 }
2046 else return compile_single_pattern(pattern, options, filename, count);
2047 }
2048
2049
2050
2051 /*************************************************
2052 * Main program *
2053 *************************************************/
2054
2055 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2056
2057 int
2058 main(int argc, char **argv)
2059 {
2060 int i, j;
2061 int rc = 1;
2062 int pcre_options = 0;
2063 int cmd_pattern_count = 0;
2064 int hint_count = 0;
2065 int errptr;
2066 BOOL only_one_at_top;
2067 char *patterns[MAX_PATTERN_COUNT];
2068 const char *locale_from = "--locale";
2069 const char *error;
2070
2071 #ifdef SUPPORT_PCREGREP_JIT
2072 pcre_jit_stack *jit_stack = NULL;
2073 #endif
2074
2075 /* Set the default line ending value from the default in the PCRE library;
2076 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2077 Note that the return values from pcre_config(), though derived from the ASCII
2078 codes, are the same in EBCDIC environments, so we must use the actual values
2079 rather than escapes such as as '\r'. */
2080
2081 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2082 switch(i)
2083 {
2084 default: newline = (char *)"lf"; break;
2085 case 13: newline = (char *)"cr"; break;
2086 case (13 << 8) | 10: newline = (char *)"crlf"; break;
2087 case -1: newline = (char *)"any"; break;
2088 case -2: newline = (char *)"anycrlf"; break;
2089 }
2090
2091 /* Process the options */
2092
2093 for (i = 1; i < argc; i++)
2094 {
2095 option_item *op = NULL;
2096 char *option_data = (char *)""; /* default to keep compiler happy */
2097 BOOL longop;
2098 BOOL longopwasequals = FALSE;
2099
2100 if (argv[i][0] != '-') break;
2101
2102 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2103 but only if we have previously had -e or -f to define the patterns. */
2104
2105 if (argv[i][1] == 0)
2106 {
2107 if (pattern_filename != NULL || pattern_count > 0) break;
2108 else pcregrep_exit(usage(2));
2109 }
2110
2111 /* Handle a long name option, or -- to terminate the options */
2112
2113 if (argv[i][1] == '-')
2114 {
2115 char *arg = argv[i] + 2;
2116 char *argequals = strchr(arg, '=');
2117
2118 if (*arg == 0) /* -- terminates options */
2119 {
2120 i++;
2121 break; /* out of the options-handling loop */
2122 }
2123
2124 longop = TRUE;
2125
2126 /* Some long options have data that follows after =, for example file=name.
2127 Some options have variations in the long name spelling: specifically, we
2128 allow "regexp" because GNU grep allows it, though I personally go along
2129 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2130 These options are entered in the table as "regex(p)". Options can be in
2131 both these categories. */
2132
2133 for (op = optionlist; op->one_char != 0; op++)
2134 {
2135 char *opbra = strchr(op->long_name, '(');
2136 char *equals = strchr(op->long_name, '=');
2137
2138 /* Handle options with only one spelling of the name */
2139
2140 if (opbra == NULL) /* Does not contain '(' */
2141 {
2142 if (equals == NULL) /* Not thing=data case */
2143 {
2144 if (strcmp(arg, op->long_name) == 0) break;
2145 }
2146 else /* Special case xxx=data */
2147 {
2148 int oplen = (int)(equals - op->long_name);
2149 int arglen = (argequals == NULL)?
2150 (int)strlen(arg) : (int)(argequals - arg);
2151 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2152 {
2153 option_data = arg + arglen;
2154 if (*option_data == '=')
2155 {
2156 option_data++;
2157 longopwasequals = TRUE;
2158 }
2159 break;
2160 }
2161 }
2162 }
2163
2164 /* Handle options with an alternate spelling of the name */
2165
2166 else
2167 {
2168 char buff1[24];
2169 char buff2[24];
2170
2171 int baselen = (int)(opbra - op->long_name);
2172 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2173 int arglen = (argequals == NULL || equals == NULL)?
2174 (int)strlen(arg) : (int)(argequals - arg);
2175
2176 sprintf(buff1, "%.*s", baselen, op->long_name);
2177 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2178
2179 if (strncmp(arg, buff1, arglen) == 0 ||
2180 strncmp(arg, buff2, arglen) == 0)
2181 {
2182 if (equals != NULL && argequals != NULL)
2183 {
2184 option_data = argequals;
2185 if (*option_data == '=')
2186 {
2187 option_data++;
2188 longopwasequals = TRUE;
2189 }
2190 }
2191 break;
2192 }
2193 }
2194 }
2195
2196 if (op->one_char == 0)
2197 {
2198 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2199 pcregrep_exit(usage(2));
2200 }
2201 }
2202
2203 /* Jeffrey Friedl's debugging harness uses these additional options which
2204 are not in the right form for putting in the option table because they use
2205 only one hyphen, yet are more than one character long. By putting them
2206 separately here, they will not get displayed as part of the help() output,
2207 but I don't think Jeffrey will care about that. */
2208
2209 #ifdef JFRIEDL_DEBUG
2210 else if (strcmp(argv[i], "-pre") == 0) {
2211 jfriedl_prefix = argv[++i];
2212 continue;
2213 } else if (strcmp(argv[i], "-post") == 0) {
2214 jfriedl_postfix = argv[++i];
2215 continue;
2216 } else if (strcmp(argv[i], "-XT") == 0) {
2217 sscanf(argv[++i], "%d", &jfriedl_XT);
2218 continue;
2219 } else if (strcmp(argv[i], "-XR") == 0) {
2220 sscanf(argv[++i], "%d", &jfriedl_XR);
2221 continue;
2222 }
2223 #endif
2224
2225
2226 /* One-char options; many that have no data may be in a single argument; we
2227 continue till we hit the last one or one that needs data. */
2228
2229 else
2230 {
2231 char *s = argv[i] + 1;
2232 longop = FALSE;
2233 while (*s != 0)
2234 {
2235 for (op = optionlist; op->one_char != 0; op++)
2236 {
2237 if (*s == op->one_char) break;
2238 }
2239 if (op->one_char == 0)
2240 {
2241 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2242 *s, argv[i]);
2243 pcregrep_exit(usage(2));
2244 }
2245
2246 /* Check for a single-character option that has data: OP_OP_NUMBER
2247 is used for one that either has a numerical number or defaults, i.e. the
2248 data is optional. If a digit follows, there is data; if not, carry on
2249 with other single-character options in the same string. */
2250
2251 option_data = s+1;
2252 if (op->type == OP_OP_NUMBER)
2253 {
2254 if (isdigit((unsigned char)s[1])) break;
2255 }
2256 else /* Check for end or a dataless option */
2257 {
2258 if (op->type != OP_NODATA || s[1] == 0) break;
2259 }
2260
2261 /* Handle a single-character option with no data, then loop for the
2262 next character in the string. */
2263
2264 pcre_options = handle_option(*s++, pcre_options);
2265 }
2266 }
2267
2268 /* At this point we should have op pointing to a matched option. If the type
2269 is NO_DATA, it means that there is no data, and the option might set
2270 something in the PCRE options. */
2271
2272 if (op->type == OP_NODATA)
2273 {
2274 pcre_options = handle_option(op->one_char, pcre_options);
2275 continue;
2276 }
2277
2278 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2279 either has a value or defaults to something. It cannot have data in a
2280 separate item. At the moment, the only such options are "colo(u)r",
2281 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2282
2283 if (*option_data == 0 &&
2284 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2285 {
2286 switch (op->one_char)
2287 {
2288 case N_COLOUR:
2289 colour_option = (char *)"auto";
2290 break;
2291
2292 case 'o':
2293 only_matching = 0;
2294 break;
2295
2296 #ifdef JFRIEDL_DEBUG
2297 case 'S':
2298 S_arg = 0;
2299 break;
2300 #endif
2301 }
2302 continue;
2303 }
2304
2305 /* Otherwise, find the data string for the option. */
2306
2307 if (*option_data == 0)
2308 {
2309 if (i >= argc - 1 || longopwasequals)
2310 {
2311 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2312 pcregrep_exit(usage(2));
2313 }
2314 option_data = argv[++i];
2315 }
2316
2317 /* If the option type is OP_PATLIST, it's the -e option, which can be called
2318 multiple times to create a list of patterns. */
2319
2320 if (op->type == OP_PATLIST)
2321 {
2322 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2323 {
2324 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2325 MAX_PATTERN_COUNT);
2326 return 2;
2327 }
2328 patterns[cmd_pattern_count++] = option_data;
2329 }
2330
2331 /* Otherwise, deal with single string or numeric data values. */
2332
2333 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2334 op->type != OP_OP_NUMBER)
2335 {
2336 *((char **)op->dataptr) = option_data;
2337 }
2338
2339 /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2340 only for unpicking arguments, so just keep it simple. */
2341
2342 else
2343 {
2344 unsigned long int n = 0;
2345 char *endptr = option_data;
2346 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2347 while (isdigit((unsigned char)(*endptr)))
2348 n = n * 10 + (int)(*endptr++ - '0');
2349 if (toupper(*endptr) == 'K')
2350 {
2351 n *= 1024;
2352 endptr++;
2353 }
2354 else if (toupper(*endptr) == 'M')
2355 {
2356 n *= 1024*1024;
2357 endptr++;
2358 }
2359 if (*endptr != 0)
2360 {
2361 if (longop)
2362 {
2363 char *equals = strchr(op->long_name, '=');
2364 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2365 (int)(equals - op->long_name);
2366 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2367 option_data, nlen, op->long_name);
2368 }
2369 else
2370 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2371 option_data, op->one_char);
2372 pcregrep_exit(usage(2));
2373 }
2374 if (op->type == OP_LONGNUMBER)
2375 *((unsigned long int *)op->dataptr) = n;
2376 else
2377 *((int *)op->dataptr) = n;
2378 }
2379 }
2380
2381 /* Options have been decoded. If -C was used, its value is used as a default
2382 for -A and -B. */
2383
2384 if (both_context > 0)
2385 {
2386 if (after_context == 0) after_context = both_context;
2387 if (before_context == 0) before_context = both_context;
2388 }
2389
2390 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2391 However, the latter two set only_matching. */
2392
2393 if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2394 (file_offsets && line_offsets))
2395 {
2396 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2397 "and/or --line-offsets\n");
2398 pcregrep_exit(usage(2));
2399 }
2400
2401 if (file_offsets || line_offsets) only_matching = 0;
2402
2403 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2404 LC_ALL environment variable is set, and if so, use it. */
2405
2406 if (locale == NULL)
2407 {
2408 locale = getenv("LC_ALL");
2409 locale_from = "LCC_ALL";
2410 }
2411
2412 if (locale == NULL)
2413 {
2414 locale = getenv("LC_CTYPE");
2415 locale_from = "LC_CTYPE";
2416 }
2417
2418 /* If a locale has been provided, set it, and generate the tables the PCRE
2419 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2420
2421 if (locale != NULL)
2422 {
2423 if (setlocale(LC_CTYPE, locale) == NULL)
2424 {
2425 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2426 locale, locale_from);
2427 return 2;
2428 }
2429 pcretables = pcre_maketables();
2430 }
2431
2432 /* Sort out colouring */
2433
2434 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2435 {
2436 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2437 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2438 else
2439 {
2440 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2441 colour_option);
2442 return 2;
2443 }
2444 if (do_colour)
2445 {
2446 char *cs = getenv("PCREGREP_COLOUR");
2447 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2448 if (cs != NULL) colour_string = cs;
2449 }
2450 }
2451
2452 /* Interpret the newline type; the default settings are Unix-like. */
2453
2454 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2455 {
2456 pcre_options |= PCRE_NEWLINE_CR;
2457 endlinetype = EL_CR;
2458 }
2459 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2460 {
2461 pcre_options |= PCRE_NEWLINE_LF;
2462 endlinetype = EL_LF;
2463 }
2464 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2465 {
2466 pcre_options |= PCRE_NEWLINE_CRLF;
2467 endlinetype = EL_CRLF;
2468 }
2469 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2470 {
2471 pcre_options |= PCRE_NEWLINE_ANY;
2472 endlinetype = EL_ANY;
2473 }
2474 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2475 {
2476 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2477 endlinetype = EL_ANYCRLF;
2478 }
2479 else
2480 {
2481 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2482 return 2;
2483 }
2484
2485 /* Interpret the text values for -d and -D */
2486
2487 if (dee_option != NULL)
2488 {
2489 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2490 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2491 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2492 else
2493 {
2494 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2495 return 2;
2496 }
2497 }
2498
2499 if (DEE_option != NULL)
2500 {
2501 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2502 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2503 else
2504 {
2505 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2506 return 2;
2507 }
2508 }
2509
2510 /* Check the values for Jeffrey Friedl's debugging options. */
2511
2512 #ifdef JFRIEDL_DEBUG
2513 if (S_arg > 9)
2514 {
2515 fprintf(stderr, "pcregrep: bad value for -S option\n");
2516 return 2;
2517 }
2518 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2519 {
2520 if (jfriedl_XT == 0) jfriedl_XT = 1;
2521 if (jfriedl_XR == 0) jfriedl_XR = 1;
2522 }
2523 #endif
2524
2525 /* Get memory for the main buffer, and to store the pattern and hints lists. */
2526
2527 bufsize = 3*bufthird;
2528 main_buffer = (char *)malloc(bufsize);
2529 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2530 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2531
2532 if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)
2533 {
2534 fprintf(stderr, "pcregrep: malloc failed\n");
2535 goto EXIT2;
2536 }
2537
2538 /* If no patterns were provided by -e, and there is no file provided by -f,
2539 the first argument is the one and only pattern, and it must exist. */
2540
2541 if (cmd_pattern_count == 0 && pattern_filename == NULL)
2542 {
2543 if (i >= argc) return usage(2);
2544 patterns[cmd_pattern_count++] = argv[i++];
2545 }
2546
2547 /* Compile the patterns that were provided on the command line, either by
2548 multiple uses of -e or as a single unkeyed pattern. */
2549
2550 for (j = 0; j < cmd_pattern_count; j++)
2551 {
2552 if (!compile_pattern(patterns[j], pcre_options, NULL,
2553 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2554 goto EXIT2;
2555 }
2556
2557 /* Compile the regular expressions that are provided in a file. */
2558
2559 if (pattern_filename != NULL)
2560 {
2561 int linenumber = 0;
2562 FILE *f;
2563 char *filename;
2564 char buffer[PATBUFSIZE];
2565
2566 if (strcmp(pattern_filename, "-") == 0)
2567 {
2568 f = stdin;
2569 filename = stdin_name;
2570 }
2571 else
2572 {
2573 f = fopen(pattern_filename, "r");
2574 if (f == NULL)
2575 {
2576 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2577 strerror(errno));
2578 goto EXIT2;
2579 }
2580 filename = pattern_filename;
2581 }
2582
2583 while (fgets(buffer, PATBUFSIZE, f) != NULL)
2584 {
2585 char *s = buffer + (int)strlen(buffer);
2586 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2587 *s = 0;
2588 linenumber++;
2589 if (buffer[0] == 0) continue; /* Skip blank lines */
2590 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2591 goto EXIT2;
2592 }
2593
2594 if (f != stdin) fclose(f);
2595 }
2596
2597 /* Study the regular expressions, as we will be running them many times. Unless
2598 JIT has been explicitly disabled, arrange a stack for it to use. */
2599
2600 #ifdef SUPPORT_PCREGREP_JIT
2601 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2602 jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
2603 #endif
2604
2605 for (j = 0; j < pattern_count; j++)
2606 {
2607 hints_list[j] = pcre_study(pattern_list[j], study_options, &error);
2608 if (error != NULL)
2609 {
2610 char s[16];
2611 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2612 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2613 goto EXIT2;
2614 }
2615 hint_count++;
2616 #ifdef SUPPORT_PCREGREP_JIT
2617 if (jit_stack != NULL && hints_list[j] != NULL)
2618 pcre_assign_jit_stack(hints_list[j], NULL, jit_stack);
2619 #endif
2620 }
2621
2622 /* If --match-limit or --recursion-limit was set, put the value(s) into the
2623 pcre_extra block for each pattern. */
2624
2625 if (match_limit > 0 || match_limit_recursion > 0)
2626 {
2627 for (j = 0; j < pattern_count; j++)
2628 {
2629 if (hints_list[j] == NULL)
2630 {
2631 hints_list[j] = malloc(sizeof(pcre_extra));
2632 if (hints_list[j] == NULL)
2633 {
2634 fprintf(stderr, "pcregrep: malloc failed\n");
2635 pcregrep_exit(2);
2636 }
2637 }
2638 if (match_limit > 0)
2639 {
2640 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2641 hints_list[j]->match_limit = match_limit;
2642 }
2643 if (match_limit_recursion > 0)
2644 {
2645 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2646 hints_list[j]->match_limit_recursion = match_limit_recursion;
2647 }
2648 }
2649 }
2650
2651 /* If there are include or exclude patterns, compile them. */
2652
2653 if (exclude_pattern != NULL)
2654 {
2655 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2656 pcretables);
2657 if (exclude_compiled == NULL)
2658 {
2659 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2660 errptr, error);
2661 goto EXIT2;
2662 }
2663 }
2664
2665 if (include_pattern != NULL)
2666 {
2667 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2668 pcretables);
2669 if (include_compiled == NULL)
2670 {
2671 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2672 errptr, error);
2673 goto EXIT2;
2674 }
2675 }
2676
2677 if (exclude_dir_pattern != NULL)
2678 {
2679 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2680 pcretables);
2681 if (exclude_dir_compiled == NULL)
2682 {
2683 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2684 errptr, error);
2685 goto EXIT2;
2686 }
2687 }
2688
2689 if (include_dir_pattern != NULL)
2690 {
2691 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2692 pcretables);
2693 if (include_dir_compiled == NULL)
2694 {
2695 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2696 errptr, error);
2697 goto EXIT2;
2698 }
2699 }
2700
2701 /* If a file that contains a list of files to search has been specified, read
2702 it line by line and search the given files. Otherwise, if there are no further
2703 arguments, do the business on stdin and exit. */
2704
2705 if (file_list != NULL)
2706 {
2707 char buffer[PATBUFSIZE];
2708 FILE *fl;
2709 if (strcmp(file_list, "-") == 0) fl = stdin; else
2710 {
2711 fl = fopen(file_list, "rb");
2712 if (fl == NULL)
2713 {
2714 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", file_list,
2715 strerror(errno));
2716 goto EXIT2;
2717 }
2718 }
2719 while (fgets(buffer, PATBUFSIZE, fl) != NULL)
2720 {
2721 int frc;
2722 char *end = buffer + (int)strlen(buffer);
2723 while (end > buffer && isspace(end[-1])) end--;
2724 *end = 0;
2725 if (*buffer != 0)
2726 {
2727 frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
2728 if (frc > 1) rc = frc;
2729 else if (frc == 0 && rc == 1) rc = 0;
2730 }
2731 }
2732 if (fl != stdin) fclose (fl);
2733 }
2734
2735 /* Do this only if there was no file list (and no file arguments). */
2736
2737 else if (i >= argc)
2738 {
2739 rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2740 (filenames > FN_DEFAULT)? stdin_name : NULL);
2741 goto EXIT;
2742 }
2743
2744 /* After handling file-list or if there are remaining arguments, work through
2745 them as files or directories. Pass in the fact that there is only one argument
2746 at top level - this suppresses the file name if the argument is not a directory
2747 and filenames are not otherwise forced. */
2748
2749 only_one_at_top = i == argc - 1 && file_list == NULL;
2750
2751 for (; i < argc; i++)
2752 {
2753 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2754 only_one_at_top);
2755 if (frc > 1) rc = frc;
2756 else if (frc == 0 && rc == 1) rc = 0;
2757 }
2758
2759 EXIT:
2760 #ifdef SUPPORT_PCREGREP_JIT
2761 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2762 #endif
2763 if (main_buffer != NULL) free(main_buffer);
2764 if (pattern_list != NULL)
2765 {
2766 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2767 free(pattern_list);
2768 }
2769 if (hints_list != NULL)
2770 {
2771 for (i = 0; i < hint_count; i++)
2772 {
2773 if (hints_list[i] != NULL) pcre_free_study(hints_list[i]);
2774 }
2775 free(hints_list);
2776 }
2777 pcregrep_exit(rc);
2778
2779 EXIT2:
2780 rc = 2;
2781 goto EXIT;
2782 }
2783
2784 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5