/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 971 - (show annotations)
Fri Jun 1 16:29:43 2012 UTC (7 years, 4 months ago) by ph10
File MIME type: text/plain
File size: 83075 byte(s)
Fix pcregrep build problem when bzip2 but not gzip was enabled.
1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2012 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65
66 #include "pcre.h"
67
68 #define FALSE 0
69 #define TRUE 1
70
71 typedef int BOOL;
72
73 #define MAX_PATTERN_COUNT 100
74 #define OFFSET_SIZE 99
75
76 #if BUFSIZ > 8192
77 #define PATBUFSIZE BUFSIZ
78 #else
79 #define PATBUFSIZE 8192
80 #endif
81
82 /* Values for the "filenames" variable, which specifies options for file name
83 output. The order is important; it is assumed that a file name is wanted for
84 all values greater than FN_DEFAULT. */
85
86 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87
88 /* File reading styles */
89
90 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91
92 /* Actions for the -d and -D options */
93
94 enum { dee_READ, dee_SKIP, dee_RECURSE };
95 enum { DEE_READ, DEE_SKIP };
96
97 /* Actions for special processing options (flag bits) */
98
99 #define PO_WORD_MATCH 0x0001
100 #define PO_LINE_MATCH 0x0002
101 #define PO_FIXED_STRINGS 0x0004
102
103 /* Line ending types */
104
105 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106
107 /* Binary file options */
108
109 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
110
111 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
112 environments), a warning is issued if the value of fwrite() is ignored.
113 Unfortunately, casting to (void) does not suppress the warning. To get round
114 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
115 apply to fprintf(). */
116
117 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
118
119
120
121 /*************************************************
122 * Global variables *
123 *************************************************/
124
125 /* Jeffrey Friedl has some debugging requirements that are not part of the
126 regular code. */
127
128 #ifdef JFRIEDL_DEBUG
129 static int S_arg = -1;
130 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
131 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
132 static const char *jfriedl_prefix = "";
133 static const char *jfriedl_postfix = "";
134 #endif
135
136 static int endlinetype;
137
138 static char *colour_string = (char *)"1;31";
139 static char *colour_option = NULL;
140 static char *dee_option = NULL;
141 static char *DEE_option = NULL;
142 static char *main_buffer = NULL;
143 static char *newline = NULL;
144 static char *pattern_filename = NULL;
145 static char *stdin_name = (char *)"(standard input)";
146 static char *locale = NULL;
147
148 static const unsigned char *pcretables = NULL;
149
150 static int pattern_count = 0;
151 static pcre **pattern_list = NULL;
152 static pcre_extra **hints_list = NULL;
153
154 static char *file_list = NULL;
155 static char *include_pattern = NULL;
156 static char *exclude_pattern = NULL;
157 static char *include_dir_pattern = NULL;
158 static char *exclude_dir_pattern = NULL;
159
160 static pcre *include_compiled = NULL;
161 static pcre *exclude_compiled = NULL;
162 static pcre *include_dir_compiled = NULL;
163 static pcre *exclude_dir_compiled = NULL;
164
165 static int after_context = 0;
166 static int before_context = 0;
167 static int binary_files = BIN_BINARY;
168 static int both_context = 0;
169 static int bufthird = PCREGREP_BUFSIZE;
170 static int bufsize = 3*PCREGREP_BUFSIZE;
171 static int dee_action = dee_READ;
172 static int DEE_action = DEE_READ;
173 static int error_count = 0;
174 static int filenames = FN_DEFAULT;
175 static int only_matching = -1;
176 static int process_options = 0;
177
178 #ifdef SUPPORT_PCREGREP_JIT
179 static int study_options = PCRE_STUDY_JIT_COMPILE;
180 #else
181 static int study_options = 0;
182 #endif
183
184 static unsigned long int match_limit = 0;
185 static unsigned long int match_limit_recursion = 0;
186
187 static BOOL count_only = FALSE;
188 static BOOL do_colour = FALSE;
189 static BOOL file_offsets = FALSE;
190 static BOOL hyphenpending = FALSE;
191 static BOOL invert = FALSE;
192 static BOOL line_buffered = FALSE;
193 static BOOL line_offsets = FALSE;
194 static BOOL multiline = FALSE;
195 static BOOL number = FALSE;
196 static BOOL omit_zero_count = FALSE;
197 static BOOL resource_error = FALSE;
198 static BOOL quiet = FALSE;
199 static BOOL silent = FALSE;
200 static BOOL utf8 = FALSE;
201
202 /* Structure for options and list of them */
203
204 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
205 OP_OP_NUMBER, OP_PATLIST, OP_BINFILES };
206
207 typedef struct option_item {
208 int type;
209 int one_char;
210 void *dataptr;
211 const char *long_name;
212 const char *help_text;
213 } option_item;
214
215 /* Options without a single-letter equivalent get a negative value. This can be
216 used to identify them. */
217
218 #define N_COLOUR (-1)
219 #define N_EXCLUDE (-2)
220 #define N_EXCLUDE_DIR (-3)
221 #define N_HELP (-4)
222 #define N_INCLUDE (-5)
223 #define N_INCLUDE_DIR (-6)
224 #define N_LABEL (-7)
225 #define N_LOCALE (-8)
226 #define N_NULL (-9)
227 #define N_LOFFSETS (-10)
228 #define N_FOFFSETS (-11)
229 #define N_LBUFFER (-12)
230 #define N_M_LIMIT (-13)
231 #define N_M_LIMIT_REC (-14)
232 #define N_BUFSIZE (-15)
233 #define N_NOJIT (-16)
234 #define N_FILE_LIST (-17)
235 #define N_BINARY_FILES (-18)
236
237 static option_item optionlist[] = {
238 { OP_NODATA, N_NULL, NULL, "", "terminate options" },
239 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
240 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
241 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
242 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
243 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
244 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" },
245 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
246 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
247 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
248 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
249 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
250 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
251 { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
252 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
253 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
254 { OP_STRING, N_FILE_LIST, &file_list, "file-list=path","read files to search from file" },
255 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
256 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
257 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
258 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
259 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
260 #ifdef SUPPORT_PCREGREP_JIT
261 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
262 #else
263 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcregrep does not support JIT" },
264 #endif
265 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
266 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
267 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
268 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
269 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
270 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
271 { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
272 { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
273 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
274 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
275 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
276 { OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" },
277 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
278 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
279 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
280 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
281 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
282 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
283
284 /* These two were accidentally implemented with underscores instead of
285 hyphens in the option names. As this was not discovered for several releases,
286 the incorrect versions are left in the table for compatibility. However, the
287 --help function misses out any option that has an underscore in its name. */
288
289 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
290 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
291
292 #ifdef JFRIEDL_DEBUG
293 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
294 #endif
295 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
296 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
297 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
298 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
299 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
300 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
301 { OP_NODATA, 0, NULL, NULL, NULL }
302 };
303
304 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
305 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
306 that the combination of -w and -x has the same effect as -x on its own, so we
307 can treat them as the same. */
308
309 static const char *prefix[] = {
310 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
311
312 static const char *suffix[] = {
313 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
314
315 /* UTF-8 tables - used only when the newline setting is "any". */
316
317 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
318
319 const char utf8_table4[] = {
320 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
321 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
322 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
323 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
324
325
326
327 /*************************************************
328 * Exit from the program *
329 *************************************************/
330
331 /* If there has been a resource error, give a suitable message.
332
333 Argument: the return code
334 Returns: does not return
335 */
336
337 static void
338 pcregrep_exit(int rc)
339 {
340 if (resource_error)
341 {
342 fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
343 "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
344 PCRE_ERROR_JIT_STACKLIMIT);
345 fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
346 }
347
348 exit(rc);
349 }
350
351
352 /*************************************************
353 * OS-specific functions *
354 *************************************************/
355
356 /* These functions are defined so that they can be made system specific,
357 although at present the only ones are for Unix, Win32, and for "no support". */
358
359
360 /************* Directory scanning in Unix ***********/
361
362 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
363 #include <sys/types.h>
364 #include <sys/stat.h>
365 #include <dirent.h>
366
367 typedef DIR directory_type;
368
369 static int
370 isdirectory(char *filename)
371 {
372 struct stat statbuf;
373 if (stat(filename, &statbuf) < 0)
374 return 0; /* In the expectation that opening as a file will fail */
375 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
376 }
377
378 static directory_type *
379 opendirectory(char *filename)
380 {
381 return opendir(filename);
382 }
383
384 static char *
385 readdirectory(directory_type *dir)
386 {
387 for (;;)
388 {
389 struct dirent *dent = readdir(dir);
390 if (dent == NULL) return NULL;
391 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
392 return dent->d_name;
393 }
394 /* Control never reaches here */
395 }
396
397 static void
398 closedirectory(directory_type *dir)
399 {
400 closedir(dir);
401 }
402
403
404 /************* Test for regular file in Unix **********/
405
406 static int
407 isregfile(char *filename)
408 {
409 struct stat statbuf;
410 if (stat(filename, &statbuf) < 0)
411 return 1; /* In the expectation that opening as a file will fail */
412 return (statbuf.st_mode & S_IFMT) == S_IFREG;
413 }
414
415
416 /************* Test for a terminal in Unix **********/
417
418 static BOOL
419 is_stdout_tty(void)
420 {
421 return isatty(fileno(stdout));
422 }
423
424 static BOOL
425 is_file_tty(FILE *f)
426 {
427 return isatty(fileno(f));
428 }
429
430
431 /************* Directory scanning in Win32 ***********/
432
433 /* I (Philip Hazel) have no means of testing this code. It was contributed by
434 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
435 when it did not exist. David Byron added a patch that moved the #include of
436 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
437 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
438 undefined when it is indeed undefined. */
439
440 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
441
442 #ifndef STRICT
443 # define STRICT
444 #endif
445 #ifndef WIN32_LEAN_AND_MEAN
446 # define WIN32_LEAN_AND_MEAN
447 #endif
448
449 #include <windows.h>
450
451 #ifndef INVALID_FILE_ATTRIBUTES
452 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
453 #endif
454
455 typedef struct directory_type
456 {
457 HANDLE handle;
458 BOOL first;
459 WIN32_FIND_DATA data;
460 } directory_type;
461
462 int
463 isdirectory(char *filename)
464 {
465 DWORD attr = GetFileAttributes(filename);
466 if (attr == INVALID_FILE_ATTRIBUTES)
467 return 0;
468 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
469 }
470
471 directory_type *
472 opendirectory(char *filename)
473 {
474 size_t len;
475 char *pattern;
476 directory_type *dir;
477 DWORD err;
478 len = strlen(filename);
479 pattern = (char *) malloc(len + 3);
480 dir = (directory_type *) malloc(sizeof(*dir));
481 if ((pattern == NULL) || (dir == NULL))
482 {
483 fprintf(stderr, "pcregrep: malloc failed\n");
484 pcregrep_exit(2);
485 }
486 memcpy(pattern, filename, len);
487 memcpy(&(pattern[len]), "\\*", 3);
488 dir->handle = FindFirstFile(pattern, &(dir->data));
489 if (dir->handle != INVALID_HANDLE_VALUE)
490 {
491 free(pattern);
492 dir->first = TRUE;
493 return dir;
494 }
495 err = GetLastError();
496 free(pattern);
497 free(dir);
498 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
499 return NULL;
500 }
501
502 char *
503 readdirectory(directory_type *dir)
504 {
505 for (;;)
506 {
507 if (!dir->first)
508 {
509 if (!FindNextFile(dir->handle, &(dir->data)))
510 return NULL;
511 }
512 else
513 {
514 dir->first = FALSE;
515 }
516 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
517 return dir->data.cFileName;
518 }
519 #ifndef _MSC_VER
520 return NULL; /* Keep compiler happy; never executed */
521 #endif
522 }
523
524 void
525 closedirectory(directory_type *dir)
526 {
527 FindClose(dir->handle);
528 free(dir);
529 }
530
531
532 /************* Test for regular file in Win32 **********/
533
534 /* I don't know how to do this, or if it can be done; assume all paths are
535 regular if they are not directories. */
536
537 int isregfile(char *filename)
538 {
539 return !isdirectory(filename);
540 }
541
542
543 /************* Test for a terminal in Win32 **********/
544
545 /* I don't know how to do this; assume never */
546
547 static BOOL
548 is_stdout_tty(void)
549 {
550 return FALSE;
551 }
552
553 static BOOL
554 is_file_tty(FILE *f)
555 {
556 return FALSE;
557 }
558
559
560 /************* Directory scanning when we can't do it ***********/
561
562 /* The type is void, and apart from isdirectory(), the functions do nothing. */
563
564 #else
565
566 typedef void directory_type;
567
568 int isdirectory(char *filename) { return 0; }
569 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
570 char *readdirectory(directory_type *dir) { return (char*)0;}
571 void closedirectory(directory_type *dir) {}
572
573
574 /************* Test for regular when we can't do it **********/
575
576 /* Assume all files are regular. */
577
578 int isregfile(char *filename) { return 1; }
579
580
581 /************* Test for a terminal when we can't do it **********/
582
583 static BOOL
584 is_stdout_tty(void)
585 {
586 return FALSE;
587 }
588
589 static BOOL
590 is_file_tty(FILE *f)
591 {
592 return FALSE;
593 }
594
595 #endif
596
597
598
599 #ifndef HAVE_STRERROR
600 /*************************************************
601 * Provide strerror() for non-ANSI libraries *
602 *************************************************/
603
604 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
605 in their libraries, but can provide the same facility by this simple
606 alternative function. */
607
608 extern int sys_nerr;
609 extern char *sys_errlist[];
610
611 char *
612 strerror(int n)
613 {
614 if (n < 0 || n >= sys_nerr) return "unknown error number";
615 return sys_errlist[n];
616 }
617 #endif /* HAVE_STRERROR */
618
619
620
621 /*************************************************
622 * Read one line of input *
623 *************************************************/
624
625 /* Normally, input is read using fread() into a large buffer, so many lines may
626 be read at once. However, doing this for tty input means that no output appears
627 until a lot of input has been typed. Instead, tty input is handled line by
628 line. We cannot use fgets() for this, because it does not stop at a binary
629 zero, and therefore there is no way of telling how many characters it has read,
630 because there may be binary zeros embedded in the data.
631
632 Arguments:
633 buffer the buffer to read into
634 length the maximum number of characters to read
635 f the file
636
637 Returns: the number of characters read, zero at end of file
638 */
639
640 static unsigned int
641 read_one_line(char *buffer, int length, FILE *f)
642 {
643 int c;
644 int yield = 0;
645 while ((c = fgetc(f)) != EOF)
646 {
647 buffer[yield++] = c;
648 if (c == '\n' || yield >= length) break;
649 }
650 return yield;
651 }
652
653
654
655 /*************************************************
656 * Find end of line *
657 *************************************************/
658
659 /* The length of the endline sequence that is found is set via lenptr. This may
660 be zero at the very end of the file if there is no line-ending sequence there.
661
662 Arguments:
663 p current position in line
664 endptr end of available data
665 lenptr where to put the length of the eol sequence
666
667 Returns: pointer after the last byte of the line,
668 including the newline byte(s)
669 */
670
671 static char *
672 end_of_line(char *p, char *endptr, int *lenptr)
673 {
674 switch(endlinetype)
675 {
676 default: /* Just in case */
677 case EL_LF:
678 while (p < endptr && *p != '\n') p++;
679 if (p < endptr)
680 {
681 *lenptr = 1;
682 return p + 1;
683 }
684 *lenptr = 0;
685 return endptr;
686
687 case EL_CR:
688 while (p < endptr && *p != '\r') p++;
689 if (p < endptr)
690 {
691 *lenptr = 1;
692 return p + 1;
693 }
694 *lenptr = 0;
695 return endptr;
696
697 case EL_CRLF:
698 for (;;)
699 {
700 while (p < endptr && *p != '\r') p++;
701 if (++p >= endptr)
702 {
703 *lenptr = 0;
704 return endptr;
705 }
706 if (*p == '\n')
707 {
708 *lenptr = 2;
709 return p + 1;
710 }
711 }
712 break;
713
714 case EL_ANYCRLF:
715 while (p < endptr)
716 {
717 int extra = 0;
718 register int c = *((unsigned char *)p);
719
720 if (utf8 && c >= 0xc0)
721 {
722 int gcii, gcss;
723 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
724 gcss = 6*extra;
725 c = (c & utf8_table3[extra]) << gcss;
726 for (gcii = 1; gcii <= extra; gcii++)
727 {
728 gcss -= 6;
729 c |= (p[gcii] & 0x3f) << gcss;
730 }
731 }
732
733 p += 1 + extra;
734
735 switch (c)
736 {
737 case 0x0a: /* LF */
738 *lenptr = 1;
739 return p;
740
741 case 0x0d: /* CR */
742 if (p < endptr && *p == 0x0a)
743 {
744 *lenptr = 2;
745 p++;
746 }
747 else *lenptr = 1;
748 return p;
749
750 default:
751 break;
752 }
753 } /* End of loop for ANYCRLF case */
754
755 *lenptr = 0; /* Must have hit the end */
756 return endptr;
757
758 case EL_ANY:
759 while (p < endptr)
760 {
761 int extra = 0;
762 register int c = *((unsigned char *)p);
763
764 if (utf8 && c >= 0xc0)
765 {
766 int gcii, gcss;
767 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
768 gcss = 6*extra;
769 c = (c & utf8_table3[extra]) << gcss;
770 for (gcii = 1; gcii <= extra; gcii++)
771 {
772 gcss -= 6;
773 c |= (p[gcii] & 0x3f) << gcss;
774 }
775 }
776
777 p += 1 + extra;
778
779 switch (c)
780 {
781 case 0x0a: /* LF */
782 case 0x0b: /* VT */
783 case 0x0c: /* FF */
784 *lenptr = 1;
785 return p;
786
787 case 0x0d: /* CR */
788 if (p < endptr && *p == 0x0a)
789 {
790 *lenptr = 2;
791 p++;
792 }
793 else *lenptr = 1;
794 return p;
795
796 case 0x85: /* NEL */
797 *lenptr = utf8? 2 : 1;
798 return p;
799
800 case 0x2028: /* LS */
801 case 0x2029: /* PS */
802 *lenptr = 3;
803 return p;
804
805 default:
806 break;
807 }
808 } /* End of loop for ANY case */
809
810 *lenptr = 0; /* Must have hit the end */
811 return endptr;
812 } /* End of overall switch */
813 }
814
815
816
817 /*************************************************
818 * Find start of previous line *
819 *************************************************/
820
821 /* This is called when looking back for before lines to print.
822
823 Arguments:
824 p start of the subsequent line
825 startptr start of available data
826
827 Returns: pointer to the start of the previous line
828 */
829
830 static char *
831 previous_line(char *p, char *startptr)
832 {
833 switch(endlinetype)
834 {
835 default: /* Just in case */
836 case EL_LF:
837 p--;
838 while (p > startptr && p[-1] != '\n') p--;
839 return p;
840
841 case EL_CR:
842 p--;
843 while (p > startptr && p[-1] != '\n') p--;
844 return p;
845
846 case EL_CRLF:
847 for (;;)
848 {
849 p -= 2;
850 while (p > startptr && p[-1] != '\n') p--;
851 if (p <= startptr + 1 || p[-2] == '\r') return p;
852 }
853 return p; /* But control should never get here */
854
855 case EL_ANY:
856 case EL_ANYCRLF:
857 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
858 if (utf8) while ((*p & 0xc0) == 0x80) p--;
859
860 while (p > startptr)
861 {
862 register int c;
863 char *pp = p - 1;
864
865 if (utf8)
866 {
867 int extra = 0;
868 while ((*pp & 0xc0) == 0x80) pp--;
869 c = *((unsigned char *)pp);
870 if (c >= 0xc0)
871 {
872 int gcii, gcss;
873 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
874 gcss = 6*extra;
875 c = (c & utf8_table3[extra]) << gcss;
876 for (gcii = 1; gcii <= extra; gcii++)
877 {
878 gcss -= 6;
879 c |= (pp[gcii] & 0x3f) << gcss;
880 }
881 }
882 }
883 else c = *((unsigned char *)pp);
884
885 if (endlinetype == EL_ANYCRLF) switch (c)
886 {
887 case 0x0a: /* LF */
888 case 0x0d: /* CR */
889 return p;
890
891 default:
892 break;
893 }
894
895 else switch (c)
896 {
897 case 0x0a: /* LF */
898 case 0x0b: /* VT */
899 case 0x0c: /* FF */
900 case 0x0d: /* CR */
901 case 0x85: /* NEL */
902 case 0x2028: /* LS */
903 case 0x2029: /* PS */
904 return p;
905
906 default:
907 break;
908 }
909
910 p = pp; /* Back one character */
911 } /* End of loop for ANY case */
912
913 return startptr; /* Hit start of data */
914 } /* End of overall switch */
915 }
916
917
918
919
920
921 /*************************************************
922 * Print the previous "after" lines *
923 *************************************************/
924
925 /* This is called if we are about to lose said lines because of buffer filling,
926 and at the end of the file. The data in the line is written using fwrite() so
927 that a binary zero does not terminate it.
928
929 Arguments:
930 lastmatchnumber the number of the last matching line, plus one
931 lastmatchrestart where we restarted after the last match
932 endptr end of available data
933 printname filename for printing
934
935 Returns: nothing
936 */
937
938 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
939 char *endptr, char *printname)
940 {
941 if (after_context > 0 && lastmatchnumber > 0)
942 {
943 int count = 0;
944 while (lastmatchrestart < endptr && count++ < after_context)
945 {
946 int ellength;
947 char *pp = lastmatchrestart;
948 if (printname != NULL) fprintf(stdout, "%s-", printname);
949 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
950 pp = end_of_line(pp, endptr, &ellength);
951 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
952 lastmatchrestart = pp;
953 }
954 hyphenpending = TRUE;
955 }
956 }
957
958
959
960 /*************************************************
961 * Apply patterns to subject till one matches *
962 *************************************************/
963
964 /* This function is called to run through all patterns, looking for a match. It
965 is used multiple times for the same subject when colouring is enabled, in order
966 to find all possible matches.
967
968 Arguments:
969 matchptr the start of the subject
970 length the length of the subject to match
971 startoffset where to start matching
972 offsets the offets vector to fill in
973 mrc address of where to put the result of pcre_exec()
974
975 Returns: TRUE if there was a match
976 FALSE if there was no match
977 invert if there was a non-fatal error
978 */
979
980 static BOOL
981 match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
982 int *mrc)
983 {
984 int i;
985 size_t slen = length;
986 const char *msg = "this text:\n\n";
987 if (slen > 200)
988 {
989 slen = 200;
990 msg = "text that starts:\n\n";
991 }
992 for (i = 0; i < pattern_count; i++)
993 {
994 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
995 startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
996 if (*mrc >= 0) return TRUE;
997 if (*mrc == PCRE_ERROR_NOMATCH) continue;
998 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
999 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
1000 fprintf(stderr, "%s", msg);
1001 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
1002 fprintf(stderr, "\n\n");
1003 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1004 *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1005 resource_error = TRUE;
1006 if (error_count++ > 20)
1007 {
1008 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1009 pcregrep_exit(2);
1010 }
1011 return invert; /* No more matching; don't show the line again */
1012 }
1013
1014 return FALSE; /* No match, no errors */
1015 }
1016
1017
1018
1019 /*************************************************
1020 * Grep an individual file *
1021 *************************************************/
1022
1023 /* This is called from grep_or_recurse() below. It uses a buffer that is three
1024 times the value of bufthird. The matching point is never allowed to stray into
1025 the top third of the buffer, thus keeping more of the file available for
1026 context printing or for multiline scanning. For large files, the pointer will
1027 be in the middle third most of the time, so the bottom third is available for
1028 "before" context printing.
1029
1030 Arguments:
1031 handle the fopened FILE stream for a normal file
1032 the gzFile pointer when reading is via libz
1033 the BZFILE pointer when reading is via libbz2
1034 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1035 filename the file name or NULL (for errors)
1036 printname the file name if it is to be printed for each match
1037 or NULL if the file name is not to be printed
1038 it cannot be NULL if filenames[_nomatch]_only is set
1039
1040 Returns: 0 if there was at least one match
1041 1 otherwise (no matches)
1042 2 if an overlong line is encountered
1043 3 if there is a read error on a .bz2 file
1044 */
1045
1046 static int
1047 pcregrep(void *handle, int frtype, char *filename, char *printname)
1048 {
1049 int rc = 1;
1050 int linenumber = 1;
1051 int lastmatchnumber = 0;
1052 int count = 0;
1053 int filepos = 0;
1054 int offsets[OFFSET_SIZE];
1055 char *lastmatchrestart = NULL;
1056 char *ptr = main_buffer;
1057 char *endptr;
1058 size_t bufflength;
1059 BOOL binary = FALSE;
1060 BOOL endhyphenpending = FALSE;
1061 BOOL input_line_buffered = line_buffered;
1062 FILE *in = NULL; /* Ensure initialized */
1063
1064 #ifdef SUPPORT_LIBZ
1065 gzFile ingz = NULL;
1066 #endif
1067
1068 #ifdef SUPPORT_LIBBZ2
1069 BZFILE *inbz2 = NULL;
1070 #endif
1071
1072
1073 /* Do the first read into the start of the buffer and set up the pointer to end
1074 of what we have. In the case of libz, a non-zipped .gz file will be read as a
1075 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1076 fail. */
1077
1078 #ifdef SUPPORT_LIBZ
1079 if (frtype == FR_LIBZ)
1080 {
1081 ingz = (gzFile)handle;
1082 bufflength = gzread (ingz, main_buffer, bufsize);
1083 }
1084 else
1085 #endif
1086
1087 #ifdef SUPPORT_LIBBZ2
1088 if (frtype == FR_LIBBZ2)
1089 {
1090 inbz2 = (BZFILE *)handle;
1091 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1092 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1093 } /* without the cast it is unsigned. */
1094 else
1095 #endif
1096
1097 {
1098 in = (FILE *)handle;
1099 if (is_file_tty(in)) input_line_buffered = TRUE;
1100 bufflength = input_line_buffered?
1101 read_one_line(main_buffer, bufsize, in) :
1102 fread(main_buffer, 1, bufsize, in);
1103 }
1104
1105 endptr = main_buffer + bufflength;
1106
1107 /* Unless binary-files=text, see if we have a binary file. This uses the same
1108 rule as GNU grep, namely, a search for a binary zero byte near the start of the
1109 file. */
1110
1111 if (binary_files != BIN_TEXT)
1112 {
1113 binary =
1114 memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1115 if (binary && binary_files == BIN_NOMATCH) return 1;
1116 }
1117
1118 /* Loop while the current pointer is not at the end of the file. For large
1119 files, endptr will be at the end of the buffer when we are in the middle of the
1120 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1121 way, the buffer is shifted left and re-filled. */
1122
1123 while (ptr < endptr)
1124 {
1125 int endlinelength;
1126 int mrc = 0;
1127 int startoffset = 0;
1128 BOOL match;
1129 char *matchptr = ptr;
1130 char *t = ptr;
1131 size_t length, linelength;
1132
1133 /* At this point, ptr is at the start of a line. We need to find the length
1134 of the subject string to pass to pcre_exec(). In multiline mode, it is the
1135 length remainder of the data in the buffer. Otherwise, it is the length of
1136 the next line, excluding the terminating newline. After matching, we always
1137 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1138 option is used for compiling, so that any match is constrained to be in the
1139 first line. */
1140
1141 t = end_of_line(t, endptr, &endlinelength);
1142 linelength = t - ptr - endlinelength;
1143 length = multiline? (size_t)(endptr - ptr) : linelength;
1144
1145 /* Check to see if the line we are looking at extends right to the very end
1146 of the buffer without a line terminator. This means the line is too long to
1147 handle. */
1148
1149 if (endlinelength == 0 && t == main_buffer + bufsize)
1150 {
1151 fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1152 "pcregrep: check the --buffer-size option\n",
1153 linenumber,
1154 (filename == NULL)? "" : " of file ",
1155 (filename == NULL)? "" : filename);
1156 return 2;
1157 }
1158
1159 /* Extra processing for Jeffrey Friedl's debugging. */
1160
1161 #ifdef JFRIEDL_DEBUG
1162 if (jfriedl_XT || jfriedl_XR)
1163 {
1164 #include <sys/time.h>
1165 #include <time.h>
1166 struct timeval start_time, end_time;
1167 struct timezone dummy;
1168 int i;
1169
1170 if (jfriedl_XT)
1171 {
1172 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1173 const char *orig = ptr;
1174 ptr = malloc(newlen + 1);
1175 if (!ptr) {
1176 printf("out of memory");
1177 pcregrep_exit(2);
1178 }
1179 endptr = ptr;
1180 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1181 for (i = 0; i < jfriedl_XT; i++) {
1182 strncpy(endptr, orig, length);
1183 endptr += length;
1184 }
1185 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1186 length = newlen;
1187 }
1188
1189 if (gettimeofday(&start_time, &dummy) != 0)
1190 perror("bad gettimeofday");
1191
1192
1193 for (i = 0; i < jfriedl_XR; i++)
1194 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1195 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1196
1197 if (gettimeofday(&end_time, &dummy) != 0)
1198 perror("bad gettimeofday");
1199
1200 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1201 -
1202 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1203
1204 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1205 return 0;
1206 }
1207 #endif
1208
1209 /* We come back here after a match when the -o option (only_matching) is set,
1210 in order to find any further matches in the same line. */
1211
1212 ONLY_MATCHING_RESTART:
1213
1214 /* Run through all the patterns until one matches or there is an error other
1215 than NOMATCH. This code is in a subroutine so that it can be re-used for
1216 finding subsequent matches when colouring matched lines. */
1217
1218 match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1219
1220 /* If it's a match or a not-match (as required), do what's wanted. */
1221
1222 if (match != invert)
1223 {
1224 BOOL hyphenprinted = FALSE;
1225
1226 /* We've failed if we want a file that doesn't have any matches. */
1227
1228 if (filenames == FN_NOMATCH_ONLY) return 1;
1229
1230 /* Just count if just counting is wanted. */
1231
1232 if (count_only) count++;
1233
1234 /* When handling a binary file and binary-files==binary, the "binary"
1235 variable will be set true (it's false in all other cases). In this
1236 situation we just want to output the file name. No need to scan further. */
1237
1238 else if (binary)
1239 {
1240 fprintf(stdout, "Binary file %s matches\n", filename);
1241 return 0;
1242 }
1243
1244 /* If all we want is a file name, there is no need to scan any more lines
1245 in the file. */
1246
1247 else if (filenames == FN_MATCH_ONLY)
1248 {
1249 fprintf(stdout, "%s\n", printname);
1250 return 0;
1251 }
1252
1253 /* Likewise, if all we want is a yes/no answer. */
1254
1255 else if (quiet) return 0;
1256
1257 /* The --only-matching option prints just the substring that matched, or a
1258 captured portion of it, as long as this string is not empty, and the
1259 --file-offsets and --line-offsets options output offsets for the matching
1260 substring (they both force --only-matching = 0). None of these options
1261 prints any context. Afterwards, adjust the start and then jump back to look
1262 for further matches in the same line. If we are in invert mode, however,
1263 nothing is printed and we do not restart - this could still be useful
1264 because the return code is set. */
1265
1266 else if (only_matching >= 0)
1267 {
1268 if (!invert)
1269 {
1270 if (printname != NULL) fprintf(stdout, "%s:", printname);
1271 if (number) fprintf(stdout, "%d:", linenumber);
1272 if (line_offsets)
1273 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1274 offsets[1] - offsets[0]);
1275 else if (file_offsets)
1276 fprintf(stdout, "%d,%d\n",
1277 (int)(filepos + matchptr + offsets[0] - ptr),
1278 offsets[1] - offsets[0]);
1279 else if (only_matching < mrc)
1280 {
1281 int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1282 if (plen > 0)
1283 {
1284 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1285 FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1286 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1287 fprintf(stdout, "\n");
1288 }
1289 }
1290 else if (printname != NULL || number) fprintf(stdout, "\n");
1291 match = FALSE;
1292 if (line_buffered) fflush(stdout);
1293 rc = 0; /* Had some success */
1294 startoffset = offsets[1]; /* Restart after the match */
1295 goto ONLY_MATCHING_RESTART;
1296 }
1297 }
1298
1299 /* This is the default case when none of the above options is set. We print
1300 the matching lines(s), possibly preceded and/or followed by other lines of
1301 context. */
1302
1303 else
1304 {
1305 /* See if there is a requirement to print some "after" lines from a
1306 previous match. We never print any overlaps. */
1307
1308 if (after_context > 0 && lastmatchnumber > 0)
1309 {
1310 int ellength;
1311 int linecount = 0;
1312 char *p = lastmatchrestart;
1313
1314 while (p < ptr && linecount < after_context)
1315 {
1316 p = end_of_line(p, ptr, &ellength);
1317 linecount++;
1318 }
1319
1320 /* It is important to advance lastmatchrestart during this printing so
1321 that it interacts correctly with any "before" printing below. Print
1322 each line's data using fwrite() in case there are binary zeroes. */
1323
1324 while (lastmatchrestart < p)
1325 {
1326 char *pp = lastmatchrestart;
1327 if (printname != NULL) fprintf(stdout, "%s-", printname);
1328 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1329 pp = end_of_line(pp, endptr, &ellength);
1330 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1331 lastmatchrestart = pp;
1332 }
1333 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1334 }
1335
1336 /* If there were non-contiguous lines printed above, insert hyphens. */
1337
1338 if (hyphenpending)
1339 {
1340 fprintf(stdout, "--\n");
1341 hyphenpending = FALSE;
1342 hyphenprinted = TRUE;
1343 }
1344
1345 /* See if there is a requirement to print some "before" lines for this
1346 match. Again, don't print overlaps. */
1347
1348 if (before_context > 0)
1349 {
1350 int linecount = 0;
1351 char *p = ptr;
1352
1353 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1354 linecount < before_context)
1355 {
1356 linecount++;
1357 p = previous_line(p, main_buffer);
1358 }
1359
1360 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1361 fprintf(stdout, "--\n");
1362
1363 while (p < ptr)
1364 {
1365 int ellength;
1366 char *pp = p;
1367 if (printname != NULL) fprintf(stdout, "%s-", printname);
1368 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1369 pp = end_of_line(pp, endptr, &ellength);
1370 FWRITE(p, 1, pp - p, stdout);
1371 p = pp;
1372 }
1373 }
1374
1375 /* Now print the matching line(s); ensure we set hyphenpending at the end
1376 of the file if any context lines are being output. */
1377
1378 if (after_context > 0 || before_context > 0)
1379 endhyphenpending = TRUE;
1380
1381 if (printname != NULL) fprintf(stdout, "%s:", printname);
1382 if (number) fprintf(stdout, "%d:", linenumber);
1383
1384 /* In multiline mode, we want to print to the end of the line in which
1385 the end of the matched string is found, so we adjust linelength and the
1386 line number appropriately, but only when there actually was a match
1387 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1388 the match will always be before the first newline sequence. */
1389
1390 if (multiline & !invert)
1391 {
1392 char *endmatch = ptr + offsets[1];
1393 t = ptr;
1394 while (t < endmatch)
1395 {
1396 t = end_of_line(t, endptr, &endlinelength);
1397 if (t < endmatch) linenumber++; else break;
1398 }
1399 linelength = t - ptr - endlinelength;
1400 }
1401
1402 /*** NOTE: Use only fwrite() to output the data line, so that binary
1403 zeroes are treated as just another data character. */
1404
1405 /* This extra option, for Jeffrey Friedl's debugging requirements,
1406 replaces the matched string, or a specific captured string if it exists,
1407 with X. When this happens, colouring is ignored. */
1408
1409 #ifdef JFRIEDL_DEBUG
1410 if (S_arg >= 0 && S_arg < mrc)
1411 {
1412 int first = S_arg * 2;
1413 int last = first + 1;
1414 FWRITE(ptr, 1, offsets[first], stdout);
1415 fprintf(stdout, "X");
1416 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1417 }
1418 else
1419 #endif
1420
1421 /* We have to split the line(s) up if colouring, and search for further
1422 matches, but not of course if the line is a non-match. */
1423
1424 if (do_colour && !invert)
1425 {
1426 int plength;
1427 FWRITE(ptr, 1, offsets[0], stdout);
1428 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1429 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1430 fprintf(stdout, "%c[00m", 0x1b);
1431 for (;;)
1432 {
1433 startoffset = offsets[1];
1434 if (startoffset >= (int)linelength + endlinelength ||
1435 !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1436 break;
1437 FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1438 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1439 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1440 fprintf(stdout, "%c[00m", 0x1b);
1441 }
1442
1443 /* In multiline mode, we may have already printed the complete line
1444 and its line-ending characters (if they matched the pattern), so there
1445 may be no more to print. */
1446
1447 plength = (int)((linelength + endlinelength) - startoffset);
1448 if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1449 }
1450
1451 /* Not colouring; no need to search for further matches */
1452
1453 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1454 }
1455
1456 /* End of doing what has to be done for a match. If --line-buffered was
1457 given, flush the output. */
1458
1459 if (line_buffered) fflush(stdout);
1460 rc = 0; /* Had some success */
1461
1462 /* Remember where the last match happened for after_context. We remember
1463 where we are about to restart, and that line's number. */
1464
1465 lastmatchrestart = ptr + linelength + endlinelength;
1466 lastmatchnumber = linenumber + 1;
1467 }
1468
1469 /* For a match in multiline inverted mode (which of course did not cause
1470 anything to be printed), we have to move on to the end of the match before
1471 proceeding. */
1472
1473 if (multiline && invert && match)
1474 {
1475 int ellength;
1476 char *endmatch = ptr + offsets[1];
1477 t = ptr;
1478 while (t < endmatch)
1479 {
1480 t = end_of_line(t, endptr, &ellength);
1481 if (t <= endmatch) linenumber++; else break;
1482 }
1483 endmatch = end_of_line(endmatch, endptr, &ellength);
1484 linelength = endmatch - ptr - ellength;
1485 }
1486
1487 /* Advance to after the newline and increment the line number. The file
1488 offset to the current line is maintained in filepos. */
1489
1490 ptr += linelength + endlinelength;
1491 filepos += (int)(linelength + endlinelength);
1492 linenumber++;
1493
1494 /* If input is line buffered, and the buffer is not yet full, read another
1495 line and add it into the buffer. */
1496
1497 if (input_line_buffered && bufflength < (size_t)bufsize)
1498 {
1499 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1500 bufflength += add;
1501 endptr += add;
1502 }
1503
1504 /* If we haven't yet reached the end of the file (the buffer is full), and
1505 the current point is in the top 1/3 of the buffer, slide the buffer down by
1506 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1507 about to be lost, print them. */
1508
1509 if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1510 {
1511 if (after_context > 0 &&
1512 lastmatchnumber > 0 &&
1513 lastmatchrestart < main_buffer + bufthird)
1514 {
1515 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1516 lastmatchnumber = 0;
1517 }
1518
1519 /* Now do the shuffle */
1520
1521 memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1522 ptr -= bufthird;
1523
1524 #ifdef SUPPORT_LIBZ
1525 if (frtype == FR_LIBZ)
1526 bufflength = 2*bufthird +
1527 gzread (ingz, main_buffer + 2*bufthird, bufthird);
1528 else
1529 #endif
1530
1531 #ifdef SUPPORT_LIBBZ2
1532 if (frtype == FR_LIBBZ2)
1533 bufflength = 2*bufthird +
1534 BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1535 else
1536 #endif
1537
1538 bufflength = 2*bufthird +
1539 (input_line_buffered?
1540 read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1541 fread(main_buffer + 2*bufthird, 1, bufthird, in));
1542 endptr = main_buffer + bufflength;
1543
1544 /* Adjust any last match point */
1545
1546 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1547 }
1548 } /* Loop through the whole file */
1549
1550 /* End of file; print final "after" lines if wanted; do_after_lines sets
1551 hyphenpending if it prints something. */
1552
1553 if (only_matching < 0 && !count_only)
1554 {
1555 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1556 hyphenpending |= endhyphenpending;
1557 }
1558
1559 /* Print the file name if we are looking for those without matches and there
1560 were none. If we found a match, we won't have got this far. */
1561
1562 if (filenames == FN_NOMATCH_ONLY)
1563 {
1564 fprintf(stdout, "%s\n", printname);
1565 return 0;
1566 }
1567
1568 /* Print the match count if wanted */
1569
1570 if (count_only)
1571 {
1572 if (count > 0 || !omit_zero_count)
1573 {
1574 if (printname != NULL && filenames != FN_NONE)
1575 fprintf(stdout, "%s:", printname);
1576 fprintf(stdout, "%d\n", count);
1577 }
1578 }
1579
1580 return rc;
1581 }
1582
1583
1584
1585 /*************************************************
1586 * Grep a file or recurse into a directory *
1587 *************************************************/
1588
1589 /* Given a path name, if it's a directory, scan all the files if we are
1590 recursing; if it's a file, grep it.
1591
1592 Arguments:
1593 pathname the path to investigate
1594 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1595 only_one_at_top TRUE if the path is the only one at toplevel
1596
1597 Returns: 0 if there was at least one match
1598 1 if there were no matches
1599 2 there was some kind of error
1600
1601 However, file opening failures are suppressed if "silent" is set.
1602 */
1603
1604 static int
1605 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1606 {
1607 int rc = 1;
1608 int sep;
1609 int frtype;
1610 void *handle;
1611 FILE *in = NULL; /* Ensure initialized */
1612
1613 #ifdef SUPPORT_LIBZ
1614 gzFile ingz = NULL;
1615 #endif
1616
1617 #ifdef SUPPORT_LIBBZ2
1618 BZFILE *inbz2 = NULL;
1619 #endif
1620
1621 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1622 int pathlen;
1623 #endif
1624
1625 /* If the file name is "-" we scan stdin */
1626
1627 if (strcmp(pathname, "-") == 0)
1628 {
1629 return pcregrep(stdin, FR_PLAIN, stdin_name,
1630 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1631 stdin_name : NULL);
1632 }
1633
1634 /* If the file is a directory, skip if skipping or if we are recursing, scan
1635 each file and directory within it, subject to any include or exclude patterns
1636 that were set. The scanning code is localized so it can be made
1637 system-specific. */
1638
1639 if ((sep = isdirectory(pathname)) != 0)
1640 {
1641 if (dee_action == dee_SKIP) return 1;
1642 if (dee_action == dee_RECURSE)
1643 {
1644 char buffer[1024];
1645 char *nextfile;
1646 directory_type *dir = opendirectory(pathname);
1647
1648 if (dir == NULL)
1649 {
1650 if (!silent)
1651 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1652 strerror(errno));
1653 return 2;
1654 }
1655
1656 while ((nextfile = readdirectory(dir)) != NULL)
1657 {
1658 int frc, nflen;
1659 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1660 nflen = (int)(strlen(nextfile));
1661
1662 if (isdirectory(buffer))
1663 {
1664 if (exclude_dir_compiled != NULL &&
1665 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1666 continue;
1667
1668 if (include_dir_compiled != NULL &&
1669 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1670 continue;
1671 }
1672 else
1673 {
1674 if (exclude_compiled != NULL &&
1675 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1676 continue;
1677
1678 if (include_compiled != NULL &&
1679 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1680 continue;
1681 }
1682
1683 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1684 if (frc > 1) rc = frc;
1685 else if (frc == 0 && rc == 1) rc = 0;
1686 }
1687
1688 closedirectory(dir);
1689 return rc;
1690 }
1691 }
1692
1693 /* If the file is not a directory and not a regular file, skip it if that's
1694 been requested. */
1695
1696 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1697
1698 /* Control reaches here if we have a regular file, or if we have a directory
1699 and recursion or skipping was not requested, or if we have anything else and
1700 skipping was not requested. The scan proceeds. If this is the first and only
1701 argument at top level, we don't show the file name, unless we are only showing
1702 the file name, or the filename was forced (-H). */
1703
1704 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1705 pathlen = (int)(strlen(pathname));
1706 #endif
1707
1708 /* Open using zlib if it is supported and the file name ends with .gz. */
1709
1710 #ifdef SUPPORT_LIBZ
1711 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1712 {
1713 ingz = gzopen(pathname, "rb");
1714 if (ingz == NULL)
1715 {
1716 if (!silent)
1717 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1718 strerror(errno));
1719 return 2;
1720 }
1721 handle = (void *)ingz;
1722 frtype = FR_LIBZ;
1723 }
1724 else
1725 #endif
1726
1727 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1728
1729 #ifdef SUPPORT_LIBBZ2
1730 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1731 {
1732 inbz2 = BZ2_bzopen(pathname, "rb");
1733 handle = (void *)inbz2;
1734 frtype = FR_LIBBZ2;
1735 }
1736 else
1737 #endif
1738
1739 /* Otherwise use plain fopen(). The label is so that we can come back here if
1740 an attempt to read a .bz2 file indicates that it really is a plain file. */
1741
1742 #ifdef SUPPORT_LIBBZ2
1743 PLAIN_FILE:
1744 #endif
1745 {
1746 in = fopen(pathname, "rb");
1747 handle = (void *)in;
1748 frtype = FR_PLAIN;
1749 }
1750
1751 /* All the opening methods return errno when they fail. */
1752
1753 if (handle == NULL)
1754 {
1755 if (!silent)
1756 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1757 strerror(errno));
1758 return 2;
1759 }
1760
1761 /* Now grep the file */
1762
1763 rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1764 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1765
1766 /* Close in an appropriate manner. */
1767
1768 #ifdef SUPPORT_LIBZ
1769 if (frtype == FR_LIBZ)
1770 gzclose(ingz);
1771 else
1772 #endif
1773
1774 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1775 read failed. If the error indicates that the file isn't in fact bzipped, try
1776 again as a normal file. */
1777
1778 #ifdef SUPPORT_LIBBZ2
1779 if (frtype == FR_LIBBZ2)
1780 {
1781 if (rc == 3)
1782 {
1783 int errnum;
1784 const char *err = BZ2_bzerror(inbz2, &errnum);
1785 if (errnum == BZ_DATA_ERROR_MAGIC)
1786 {
1787 BZ2_bzclose(inbz2);
1788 goto PLAIN_FILE;
1789 }
1790 else if (!silent)
1791 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1792 pathname, err);
1793 rc = 2; /* The normal "something went wrong" code */
1794 }
1795 BZ2_bzclose(inbz2);
1796 }
1797 else
1798 #endif
1799
1800 /* Normal file close */
1801
1802 fclose(in);
1803
1804 /* Pass back the yield from pcregrep(). */
1805
1806 return rc;
1807 }
1808
1809
1810
1811
1812 /*************************************************
1813 * Usage function *
1814 *************************************************/
1815
1816 static int
1817 usage(int rc)
1818 {
1819 option_item *op;
1820 fprintf(stderr, "Usage: pcregrep [-");
1821 for (op = optionlist; op->one_char != 0; op++)
1822 {
1823 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1824 }
1825 fprintf(stderr, "] [long options] [pattern] [files]\n");
1826 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1827 "options.\n");
1828 return rc;
1829 }
1830
1831
1832
1833
1834 /*************************************************
1835 * Help function *
1836 *************************************************/
1837
1838 static void
1839 help(void)
1840 {
1841 option_item *op;
1842
1843 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1844 printf("Search for PATTERN in each FILE or standard input.\n");
1845 printf("PATTERN must be present if neither -e nor -f is used.\n");
1846 printf("\"-\" can be used as a file name to mean STDIN.\n");
1847
1848 #ifdef SUPPORT_LIBZ
1849 printf("Files whose names end in .gz are read using zlib.\n");
1850 #endif
1851
1852 #ifdef SUPPORT_LIBBZ2
1853 printf("Files whose names end in .bz2 are read using bzlib2.\n");
1854 #endif
1855
1856 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1857 printf("Other files and the standard input are read as plain files.\n\n");
1858 #else
1859 printf("All files are read as plain files, without any interpretation.\n\n");
1860 #endif
1861
1862 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1863 printf("Options:\n");
1864
1865 for (op = optionlist; op->one_char != 0; op++)
1866 {
1867 int n;
1868 char s[4];
1869
1870 /* Two options were accidentally implemented and documented with underscores
1871 instead of hyphens in their names, something that was not noticed for quite a
1872 few releases. When fixing this, I left the underscored versions in the list
1873 in case people were using them. However, we don't want to display them in the
1874 help data. There are no other options that contain underscores, and we do not
1875 expect ever to implement such options. Therefore, just omit any option that
1876 contains an underscore. */
1877
1878 if (strchr(op->long_name, '_') != NULL) continue;
1879
1880 if (op->one_char > 0 && (op->long_name)[0] == 0)
1881 n = 31 - printf(" -%c", op->one_char);
1882 else
1883 {
1884 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
1885 else strcpy(s, " ");
1886 n = 31 - printf(" %s --%s", s, op->long_name);
1887 }
1888
1889 if (n < 1) n = 1;
1890 printf("%.*s%s\n", n, " ", op->help_text);
1891 }
1892
1893 printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
1894 printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
1895 printf("When reading patterns or file names from a file, trailing white\n");
1896 printf("space is removed and blank lines are ignored.\n");
1897 printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",
1898 MAX_PATTERN_COUNT, PATBUFSIZE);
1899
1900 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1901 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1902 }
1903
1904
1905
1906
1907 /*************************************************
1908 * Handle a single-letter, no data option *
1909 *************************************************/
1910
1911 static int
1912 handle_option(int letter, int options)
1913 {
1914 switch(letter)
1915 {
1916 case N_FOFFSETS: file_offsets = TRUE; break;
1917 case N_HELP: help(); pcregrep_exit(0);
1918 case N_LBUFFER: line_buffered = TRUE; break;
1919 case N_LOFFSETS: line_offsets = number = TRUE; break;
1920 case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
1921 case 'a': binary_files = BIN_TEXT; break;
1922 case 'c': count_only = TRUE; break;
1923 case 'F': process_options |= PO_FIXED_STRINGS; break;
1924 case 'H': filenames = FN_FORCE; break;
1925 case 'I': binary_files = BIN_NOMATCH; break;
1926 case 'h': filenames = FN_NONE; break;
1927 case 'i': options |= PCRE_CASELESS; break;
1928 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1929 case 'L': filenames = FN_NOMATCH_ONLY; break;
1930 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1931 case 'n': number = TRUE; break;
1932 case 'o': only_matching = 0; break;
1933 case 'q': quiet = TRUE; break;
1934 case 'r': dee_action = dee_RECURSE; break;
1935 case 's': silent = TRUE; break;
1936 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1937 case 'v': invert = TRUE; break;
1938 case 'w': process_options |= PO_WORD_MATCH; break;
1939 case 'x': process_options |= PO_LINE_MATCH; break;
1940
1941 case 'V':
1942 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1943 pcregrep_exit(0);
1944 break;
1945
1946 default:
1947 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1948 pcregrep_exit(usage(2));
1949 }
1950
1951 return options;
1952 }
1953
1954
1955
1956
1957 /*************************************************
1958 * Construct printed ordinal *
1959 *************************************************/
1960
1961 /* This turns a number into "1st", "3rd", etc. */
1962
1963 static char *
1964 ordin(int n)
1965 {
1966 static char buffer[8];
1967 char *p = buffer;
1968 sprintf(p, "%d", n);
1969 while (*p != 0) p++;
1970 switch (n%10)
1971 {
1972 case 1: strcpy(p, "st"); break;
1973 case 2: strcpy(p, "nd"); break;
1974 case 3: strcpy(p, "rd"); break;
1975 default: strcpy(p, "th"); break;
1976 }
1977 return buffer;
1978 }
1979
1980
1981
1982 /*************************************************
1983 * Compile a single pattern *
1984 *************************************************/
1985
1986 /* When the -F option has been used, this is called for each substring.
1987 Otherwise it's called for each supplied pattern.
1988
1989 Arguments:
1990 pattern the pattern string
1991 options the PCRE options
1992 filename the file name, or NULL for a command-line pattern
1993 count 0 if this is the only command line pattern, or
1994 number of the command line pattern, or
1995 linenumber for a pattern from a file
1996
1997 Returns: TRUE on success, FALSE after an error
1998 */
1999
2000 static BOOL
2001 compile_single_pattern(char *pattern, int options, char *filename, int count)
2002 {
2003 char buffer[PATBUFSIZE];
2004 const char *error;
2005 int errptr;
2006
2007 if (pattern_count >= MAX_PATTERN_COUNT)
2008 {
2009 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
2010 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
2011 return FALSE;
2012 }
2013
2014 sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,
2015 suffix[process_options]);
2016 pattern_list[pattern_count] =
2017 pcre_compile(buffer, options, &error, &errptr, pcretables);
2018 if (pattern_list[pattern_count] != NULL)
2019 {
2020 pattern_count++;
2021 return TRUE;
2022 }
2023
2024 /* Handle compile errors */
2025
2026 errptr -= (int)strlen(prefix[process_options]);
2027 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
2028
2029 if (filename == NULL)
2030 {
2031 if (count == 0)
2032 fprintf(stderr, "pcregrep: Error in command-line regex "
2033 "at offset %d: %s\n", errptr, error);
2034 else
2035 fprintf(stderr, "pcregrep: Error in %s command-line regex "
2036 "at offset %d: %s\n", ordin(count), errptr, error);
2037 }
2038 else
2039 {
2040 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2041 "at offset %d: %s\n", count, filename, errptr, error);
2042 }
2043
2044 return FALSE;
2045 }
2046
2047
2048
2049 /*************************************************
2050 * Compile one supplied pattern *
2051 *************************************************/
2052
2053 /* When the -F option has been used, each string may be a list of strings,
2054 separated by line breaks. They will be matched literally.
2055
2056 Arguments:
2057 pattern the pattern string
2058 options the PCRE options
2059 filename the file name, or NULL for a command-line pattern
2060 count 0 if this is the only command line pattern, or
2061 number of the command line pattern, or
2062 linenumber for a pattern from a file
2063
2064 Returns: TRUE on success, FALSE after an error
2065 */
2066
2067 static BOOL
2068 compile_pattern(char *pattern, int options, char *filename, int count)
2069 {
2070 if ((process_options & PO_FIXED_STRINGS) != 0)
2071 {
2072 char *eop = pattern + strlen(pattern);
2073 char buffer[PATBUFSIZE];
2074 for(;;)
2075 {
2076 int ellength;
2077 char *p = end_of_line(pattern, eop, &ellength);
2078 if (ellength == 0)
2079 return compile_single_pattern(pattern, options, filename, count);
2080 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
2081 pattern = p;
2082 if (!compile_single_pattern(buffer, options, filename, count))
2083 return FALSE;
2084 }
2085 }
2086 else return compile_single_pattern(pattern, options, filename, count);
2087 }
2088
2089
2090
2091 /*************************************************
2092 * Main program *
2093 *************************************************/
2094
2095 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2096
2097 int
2098 main(int argc, char **argv)
2099 {
2100 int i, j;
2101 int rc = 1;
2102 int pcre_options = 0;
2103 int cmd_pattern_count = 0;
2104 int hint_count = 0;
2105 int errptr;
2106 BOOL only_one_at_top;
2107 char *patterns[MAX_PATTERN_COUNT];
2108 const char *locale_from = "--locale";
2109 const char *error;
2110
2111 #ifdef SUPPORT_PCREGREP_JIT
2112 pcre_jit_stack *jit_stack = NULL;
2113 #endif
2114
2115 /* Set the default line ending value from the default in the PCRE library;
2116 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2117 Note that the return values from pcre_config(), though derived from the ASCII
2118 codes, are the same in EBCDIC environments, so we must use the actual values
2119 rather than escapes such as as '\r'. */
2120
2121 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2122 switch(i)
2123 {
2124 default: newline = (char *)"lf"; break;
2125 case 13: newline = (char *)"cr"; break;
2126 case (13 << 8) | 10: newline = (char *)"crlf"; break;
2127 case -1: newline = (char *)"any"; break;
2128 case -2: newline = (char *)"anycrlf"; break;
2129 }
2130
2131 /* Process the options */
2132
2133 for (i = 1; i < argc; i++)
2134 {
2135 option_item *op = NULL;
2136 char *option_data = (char *)""; /* default to keep compiler happy */
2137 BOOL longop;
2138 BOOL longopwasequals = FALSE;
2139
2140 if (argv[i][0] != '-') break;
2141
2142 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2143 but only if we have previously had -e or -f to define the patterns. */
2144
2145 if (argv[i][1] == 0)
2146 {
2147 if (pattern_filename != NULL || pattern_count > 0) break;
2148 else pcregrep_exit(usage(2));
2149 }
2150
2151 /* Handle a long name option, or -- to terminate the options */
2152
2153 if (argv[i][1] == '-')
2154 {
2155 char *arg = argv[i] + 2;
2156 char *argequals = strchr(arg, '=');
2157
2158 if (*arg == 0) /* -- terminates options */
2159 {
2160 i++;
2161 break; /* out of the options-handling loop */
2162 }
2163
2164 longop = TRUE;
2165
2166 /* Some long options have data that follows after =, for example file=name.
2167 Some options have variations in the long name spelling: specifically, we
2168 allow "regexp" because GNU grep allows it, though I personally go along
2169 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2170 These options are entered in the table as "regex(p)". Options can be in
2171 both these categories. */
2172
2173 for (op = optionlist; op->one_char != 0; op++)
2174 {
2175 char *opbra = strchr(op->long_name, '(');
2176 char *equals = strchr(op->long_name, '=');
2177
2178 /* Handle options with only one spelling of the name */
2179
2180 if (opbra == NULL) /* Does not contain '(' */
2181 {
2182 if (equals == NULL) /* Not thing=data case */
2183 {
2184 if (strcmp(arg, op->long_name) == 0) break;
2185 }
2186 else /* Special case xxx=data */
2187 {
2188 int oplen = (int)(equals - op->long_name);
2189 int arglen = (argequals == NULL)?
2190 (int)strlen(arg) : (int)(argequals - arg);
2191 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2192 {
2193 option_data = arg + arglen;
2194 if (*option_data == '=')
2195 {
2196 option_data++;
2197 longopwasequals = TRUE;
2198 }
2199 break;
2200 }
2201 }
2202 }
2203
2204 /* Handle options with an alternate spelling of the name */
2205
2206 else
2207 {
2208 char buff1[24];
2209 char buff2[24];
2210
2211 int baselen = (int)(opbra - op->long_name);
2212 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2213 int arglen = (argequals == NULL || equals == NULL)?
2214 (int)strlen(arg) : (int)(argequals - arg);
2215
2216 sprintf(buff1, "%.*s", baselen, op->long_name);
2217 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2218
2219 if (strncmp(arg, buff1, arglen) == 0 ||
2220 strncmp(arg, buff2, arglen) == 0)
2221 {
2222 if (equals != NULL && argequals != NULL)
2223 {
2224 option_data = argequals;
2225 if (*option_data == '=')
2226 {
2227 option_data++;
2228 longopwasequals = TRUE;
2229 }
2230 }
2231 break;
2232 }
2233 }
2234 }
2235
2236 if (op->one_char == 0)
2237 {
2238 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2239 pcregrep_exit(usage(2));
2240 }
2241 }
2242
2243 /* Jeffrey Friedl's debugging harness uses these additional options which
2244 are not in the right form for putting in the option table because they use
2245 only one hyphen, yet are more than one character long. By putting them
2246 separately here, they will not get displayed as part of the help() output,
2247 but I don't think Jeffrey will care about that. */
2248
2249 #ifdef JFRIEDL_DEBUG
2250 else if (strcmp(argv[i], "-pre") == 0) {
2251 jfriedl_prefix = argv[++i];
2252 continue;
2253 } else if (strcmp(argv[i], "-post") == 0) {
2254 jfriedl_postfix = argv[++i];
2255 continue;
2256 } else if (strcmp(argv[i], "-XT") == 0) {
2257 sscanf(argv[++i], "%d", &jfriedl_XT);
2258 continue;
2259 } else if (strcmp(argv[i], "-XR") == 0) {
2260 sscanf(argv[++i], "%d", &jfriedl_XR);
2261 continue;
2262 }
2263 #endif
2264
2265
2266 /* One-char options; many that have no data may be in a single argument; we
2267 continue till we hit the last one or one that needs data. */
2268
2269 else
2270 {
2271 char *s = argv[i] + 1;
2272 longop = FALSE;
2273 while (*s != 0)
2274 {
2275 for (op = optionlist; op->one_char != 0; op++)
2276 {
2277 if (*s == op->one_char) break;
2278 }
2279 if (op->one_char == 0)
2280 {
2281 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2282 *s, argv[i]);
2283 pcregrep_exit(usage(2));
2284 }
2285
2286 /* Check for a single-character option that has data: OP_OP_NUMBER
2287 is used for one that either has a numerical number or defaults, i.e. the
2288 data is optional. If a digit follows, there is data; if not, carry on
2289 with other single-character options in the same string. */
2290
2291 option_data = s+1;
2292 if (op->type == OP_OP_NUMBER)
2293 {
2294 if (isdigit((unsigned char)s[1])) break;
2295 }
2296 else /* Check for end or a dataless option */
2297 {
2298 if (op->type != OP_NODATA || s[1] == 0) break;
2299 }
2300
2301 /* Handle a single-character option with no data, then loop for the
2302 next character in the string. */
2303
2304 pcre_options = handle_option(*s++, pcre_options);
2305 }
2306 }
2307
2308 /* At this point we should have op pointing to a matched option. If the type
2309 is NO_DATA, it means that there is no data, and the option might set
2310 something in the PCRE options. */
2311
2312 if (op->type == OP_NODATA)
2313 {
2314 pcre_options = handle_option(op->one_char, pcre_options);
2315 continue;
2316 }
2317
2318 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2319 either has a value or defaults to something. It cannot have data in a
2320 separate item. At the moment, the only such options are "colo(u)r",
2321 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2322
2323 if (*option_data == 0 &&
2324 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2325 {
2326 switch (op->one_char)
2327 {
2328 case N_COLOUR:
2329 colour_option = (char *)"auto";
2330 break;
2331
2332 case 'o':
2333 only_matching = 0;
2334 break;
2335
2336 #ifdef JFRIEDL_DEBUG
2337 case 'S':
2338 S_arg = 0;
2339 break;
2340 #endif
2341 }
2342 continue;
2343 }
2344
2345 /* Otherwise, find the data string for the option. */
2346
2347 if (*option_data == 0)
2348 {
2349 if (i >= argc - 1 || longopwasequals)
2350 {
2351 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2352 pcregrep_exit(usage(2));
2353 }
2354 option_data = argv[++i];
2355 }
2356
2357 /* If the option type is OP_PATLIST, it's the -e option, which can be called
2358 multiple times to create a list of patterns. */
2359
2360 if (op->type == OP_PATLIST)
2361 {
2362 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2363 {
2364 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2365 MAX_PATTERN_COUNT);
2366 return 2;
2367 }
2368 patterns[cmd_pattern_count++] = option_data;
2369 }
2370
2371 /* Handle OP_BINARY_FILES */
2372
2373 else if (op->type == OP_BINFILES)
2374 {
2375 if (strcmp(option_data, "binary") == 0)
2376 binary_files = BIN_BINARY;
2377 else if (strcmp(option_data, "without-match") == 0)
2378 binary_files = BIN_NOMATCH;
2379 else if (strcmp(option_data, "text") == 0)
2380 binary_files = BIN_TEXT;
2381 else
2382 {
2383 fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2384 option_data);
2385 pcregrep_exit(usage(2));
2386 }
2387 }
2388
2389 /* Otherwise, deal with single string or numeric data values. */
2390
2391 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2392 op->type != OP_OP_NUMBER)
2393 {
2394 *((char **)op->dataptr) = option_data;
2395 }
2396
2397 /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2398 only for unpicking arguments, so just keep it simple. */
2399
2400 else
2401 {
2402 unsigned long int n = 0;
2403 char *endptr = option_data;
2404 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2405 while (isdigit((unsigned char)(*endptr)))
2406 n = n * 10 + (int)(*endptr++ - '0');
2407 if (toupper(*endptr) == 'K')
2408 {
2409 n *= 1024;
2410 endptr++;
2411 }
2412 else if (toupper(*endptr) == 'M')
2413 {
2414 n *= 1024*1024;
2415 endptr++;
2416 }
2417 if (*endptr != 0)
2418 {
2419 if (longop)
2420 {
2421 char *equals = strchr(op->long_name, '=');
2422 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2423 (int)(equals - op->long_name);
2424 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2425 option_data, nlen, op->long_name);
2426 }
2427 else
2428 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2429 option_data, op->one_char);
2430 pcregrep_exit(usage(2));
2431 }
2432 if (op->type == OP_LONGNUMBER)
2433 *((unsigned long int *)op->dataptr) = n;
2434 else
2435 *((int *)op->dataptr) = n;
2436 }
2437 }
2438
2439 /* Options have been decoded. If -C was used, its value is used as a default
2440 for -A and -B. */
2441
2442 if (both_context > 0)
2443 {
2444 if (after_context == 0) after_context = both_context;
2445 if (before_context == 0) before_context = both_context;
2446 }
2447
2448 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2449 However, the latter two set only_matching. */
2450
2451 if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2452 (file_offsets && line_offsets))
2453 {
2454 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2455 "and/or --line-offsets\n");
2456 pcregrep_exit(usage(2));
2457 }
2458
2459 if (file_offsets || line_offsets) only_matching = 0;
2460
2461 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2462 LC_ALL environment variable is set, and if so, use it. */
2463
2464 if (locale == NULL)
2465 {
2466 locale = getenv("LC_ALL");
2467 locale_from = "LCC_ALL";
2468 }
2469
2470 if (locale == NULL)
2471 {
2472 locale = getenv("LC_CTYPE");
2473 locale_from = "LC_CTYPE";
2474 }
2475
2476 /* If a locale has been provided, set it, and generate the tables the PCRE
2477 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2478
2479 if (locale != NULL)
2480 {
2481 if (setlocale(LC_CTYPE, locale) == NULL)
2482 {
2483 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2484 locale, locale_from);
2485 return 2;
2486 }
2487 pcretables = pcre_maketables();
2488 }
2489
2490 /* Sort out colouring */
2491
2492 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2493 {
2494 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2495 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2496 else
2497 {
2498 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2499 colour_option);
2500 return 2;
2501 }
2502 if (do_colour)
2503 {
2504 char *cs = getenv("PCREGREP_COLOUR");
2505 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2506 if (cs != NULL) colour_string = cs;
2507 }
2508 }
2509
2510 /* Interpret the newline type; the default settings are Unix-like. */
2511
2512 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2513 {
2514 pcre_options |= PCRE_NEWLINE_CR;
2515 endlinetype = EL_CR;
2516 }
2517 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2518 {
2519 pcre_options |= PCRE_NEWLINE_LF;
2520 endlinetype = EL_LF;
2521 }
2522 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2523 {
2524 pcre_options |= PCRE_NEWLINE_CRLF;
2525 endlinetype = EL_CRLF;
2526 }
2527 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2528 {
2529 pcre_options |= PCRE_NEWLINE_ANY;
2530 endlinetype = EL_ANY;
2531 }
2532 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2533 {
2534 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2535 endlinetype = EL_ANYCRLF;
2536 }
2537 else
2538 {
2539 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2540 return 2;
2541 }
2542
2543 /* Interpret the text values for -d and -D */
2544
2545 if (dee_option != NULL)
2546 {
2547 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2548 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2549 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2550 else
2551 {
2552 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2553 return 2;
2554 }
2555 }
2556
2557 if (DEE_option != NULL)
2558 {
2559 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2560 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2561 else
2562 {
2563 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2564 return 2;
2565 }
2566 }
2567
2568 /* Check the values for Jeffrey Friedl's debugging options. */
2569
2570 #ifdef JFRIEDL_DEBUG
2571 if (S_arg > 9)
2572 {
2573 fprintf(stderr, "pcregrep: bad value for -S option\n");
2574 return 2;
2575 }
2576 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2577 {
2578 if (jfriedl_XT == 0) jfriedl_XT = 1;
2579 if (jfriedl_XR == 0) jfriedl_XR = 1;
2580 }
2581 #endif
2582
2583 /* Get memory for the main buffer, and to store the pattern and hints lists. */
2584
2585 bufsize = 3*bufthird;
2586 main_buffer = (char *)malloc(bufsize);
2587 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2588 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2589
2590 if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)
2591 {
2592 fprintf(stderr, "pcregrep: malloc failed\n");
2593 goto EXIT2;
2594 }
2595
2596 /* If no patterns were provided by -e, and there is no file provided by -f,
2597 the first argument is the one and only pattern, and it must exist. */
2598
2599 if (cmd_pattern_count == 0 && pattern_filename == NULL)
2600 {
2601 if (i >= argc) return usage(2);
2602 patterns[cmd_pattern_count++] = argv[i++];
2603 }
2604
2605 /* Compile the patterns that were provided on the command line, either by
2606 multiple uses of -e or as a single unkeyed pattern. */
2607
2608 for (j = 0; j < cmd_pattern_count; j++)
2609 {
2610 if (!compile_pattern(patterns[j], pcre_options, NULL,
2611 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2612 goto EXIT2;
2613 }
2614
2615 /* Compile the regular expressions that are provided in a file. */
2616
2617 if (pattern_filename != NULL)
2618 {
2619 int linenumber = 0;
2620 FILE *f;
2621 char *filename;
2622 char buffer[PATBUFSIZE];
2623
2624 if (strcmp(pattern_filename, "-") == 0)
2625 {
2626 f = stdin;
2627 filename = stdin_name;
2628 }
2629 else
2630 {
2631 f = fopen(pattern_filename, "r");
2632 if (f == NULL)
2633 {
2634 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2635 strerror(errno));
2636 goto EXIT2;
2637 }
2638 filename = pattern_filename;
2639 }
2640
2641 while (fgets(buffer, PATBUFSIZE, f) != NULL)
2642 {
2643 char *s = buffer + (int)strlen(buffer);
2644 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2645 *s = 0;
2646 linenumber++;
2647 if (buffer[0] == 0) continue; /* Skip blank lines */
2648 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2649 goto EXIT2;
2650 }
2651
2652 if (f != stdin) fclose(f);
2653 }
2654
2655 /* Study the regular expressions, as we will be running them many times. Unless
2656 JIT has been explicitly disabled, arrange a stack for it to use. */
2657
2658 #ifdef SUPPORT_PCREGREP_JIT
2659 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2660 jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
2661 #endif
2662
2663 for (j = 0; j < pattern_count; j++)
2664 {
2665 hints_list[j] = pcre_study(pattern_list[j], study_options, &error);
2666 if (error != NULL)
2667 {
2668 char s[16];
2669 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2670 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2671 goto EXIT2;
2672 }
2673 hint_count++;
2674 #ifdef SUPPORT_PCREGREP_JIT
2675 if (jit_stack != NULL && hints_list[j] != NULL)
2676 pcre_assign_jit_stack(hints_list[j], NULL, jit_stack);
2677 #endif
2678 }
2679
2680 /* If --match-limit or --recursion-limit was set, put the value(s) into the
2681 pcre_extra block for each pattern. */
2682
2683 if (match_limit > 0 || match_limit_recursion > 0)
2684 {
2685 for (j = 0; j < pattern_count; j++)
2686 {
2687 if (hints_list[j] == NULL)
2688 {
2689 hints_list[j] = malloc(sizeof(pcre_extra));
2690 if (hints_list[j] == NULL)
2691 {
2692 fprintf(stderr, "pcregrep: malloc failed\n");
2693 pcregrep_exit(2);
2694 }
2695 }
2696 if (match_limit > 0)
2697 {
2698 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2699 hints_list[j]->match_limit = match_limit;
2700 }
2701 if (match_limit_recursion > 0)
2702 {
2703 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2704 hints_list[j]->match_limit_recursion = match_limit_recursion;
2705 }
2706 }
2707 }
2708
2709 /* If there are include or exclude patterns, compile them. */
2710
2711 if (exclude_pattern != NULL)
2712 {
2713 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2714 pcretables);
2715 if (exclude_compiled == NULL)
2716 {
2717 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2718 errptr, error);
2719 goto EXIT2;
2720 }
2721 }
2722
2723 if (include_pattern != NULL)
2724 {
2725 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2726 pcretables);
2727 if (include_compiled == NULL)
2728 {
2729 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2730 errptr, error);
2731 goto EXIT2;
2732 }
2733 }
2734
2735 if (exclude_dir_pattern != NULL)
2736 {
2737 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2738 pcretables);
2739 if (exclude_dir_compiled == NULL)
2740 {
2741 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2742 errptr, error);
2743 goto EXIT2;
2744 }
2745 }
2746
2747 if (include_dir_pattern != NULL)
2748 {
2749 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2750 pcretables);
2751 if (include_dir_compiled == NULL)
2752 {
2753 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2754 errptr, error);
2755 goto EXIT2;
2756 }
2757 }
2758
2759 /* If a file that contains a list of files to search has been specified, read
2760 it line by line and search the given files. Otherwise, if there are no further
2761 arguments, do the business on stdin and exit. */
2762
2763 if (file_list != NULL)
2764 {
2765 char buffer[PATBUFSIZE];
2766 FILE *fl;
2767 if (strcmp(file_list, "-") == 0) fl = stdin; else
2768 {
2769 fl = fopen(file_list, "rb");
2770 if (fl == NULL)
2771 {
2772 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", file_list,
2773 strerror(errno));
2774 goto EXIT2;
2775 }
2776 }
2777 while (fgets(buffer, PATBUFSIZE, fl) != NULL)
2778 {
2779 int frc;
2780 char *end = buffer + (int)strlen(buffer);
2781 while (end > buffer && isspace(end[-1])) end--;
2782 *end = 0;
2783 if (*buffer != 0)
2784 {
2785 frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
2786 if (frc > 1) rc = frc;
2787 else if (frc == 0 && rc == 1) rc = 0;
2788 }
2789 }
2790 if (fl != stdin) fclose (fl);
2791 }
2792
2793 /* Do this only if there was no file list (and no file arguments). */
2794
2795 else if (i >= argc)
2796 {
2797 rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2798 (filenames > FN_DEFAULT)? stdin_name : NULL);
2799 goto EXIT;
2800 }
2801
2802 /* After handling file-list or if there are remaining arguments, work through
2803 them as files or directories. Pass in the fact that there is only one argument
2804 at top level - this suppresses the file name if the argument is not a directory
2805 and filenames are not otherwise forced. */
2806
2807 only_one_at_top = i == argc - 1 && file_list == NULL;
2808
2809 for (; i < argc; i++)
2810 {
2811 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2812 only_one_at_top);
2813 if (frc > 1) rc = frc;
2814 else if (frc == 0 && rc == 1) rc = 0;
2815 }
2816
2817 EXIT:
2818 #ifdef SUPPORT_PCREGREP_JIT
2819 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2820 #endif
2821 if (main_buffer != NULL) free(main_buffer);
2822 if (pattern_list != NULL)
2823 {
2824 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2825 free(pattern_list);
2826 }
2827 if (hints_list != NULL)
2828 {
2829 for (i = 0; i < hint_count; i++)
2830 {
2831 if (hints_list[i] != NULL) pcre_free_study(hints_list[i]);
2832 }
2833 free(hints_list);
2834 }
2835 pcregrep_exit(rc);
2836
2837 EXIT2:
2838 rc = 2;
2839 goto EXIT;
2840 }
2841
2842 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5