/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1353 - (show annotations)
Mon Aug 5 16:24:02 2013 UTC (6 years, 2 months ago) by ph10
File MIME type: text/plain
File size: 92465 byte(s)
Error occurred while calculating annotation data.
Fix pcregrep looping bug for multiline empty string match.
1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2012 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65
66 #include "pcre.h"
67
68 #define FALSE 0
69 #define TRUE 1
70
71 typedef int BOOL;
72
73 #define OFFSET_SIZE 99
74
75 #if BUFSIZ > 8192
76 #define MAXPATLEN BUFSIZ
77 #else
78 #define MAXPATLEN 8192
79 #endif
80
81 #define PATBUFSIZE (MAXPATLEN + 10) /* Allows for prefix+suffix */
82
83 /* Values for the "filenames" variable, which specifies options for file name
84 output. The order is important; it is assumed that a file name is wanted for
85 all values greater than FN_DEFAULT. */
86
87 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
88
89 /* File reading styles */
90
91 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
92
93 /* Actions for the -d and -D options */
94
95 enum { dee_READ, dee_SKIP, dee_RECURSE };
96 enum { DEE_READ, DEE_SKIP };
97
98 /* Actions for special processing options (flag bits) */
99
100 #define PO_WORD_MATCH 0x0001
101 #define PO_LINE_MATCH 0x0002
102 #define PO_FIXED_STRINGS 0x0004
103
104 /* Line ending types */
105
106 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
107
108 /* Binary file options */
109
110 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
111
112 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
113 environments), a warning is issued if the value of fwrite() is ignored.
114 Unfortunately, casting to (void) does not suppress the warning. To get round
115 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
116 apply to fprintf(). */
117
118 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
119
120
121
122 /*************************************************
123 * Global variables *
124 *************************************************/
125
126 /* Jeffrey Friedl has some debugging requirements that are not part of the
127 regular code. */
128
129 #ifdef JFRIEDL_DEBUG
130 static int S_arg = -1;
131 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
132 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
133 static const char *jfriedl_prefix = "";
134 static const char *jfriedl_postfix = "";
135 #endif
136
137 static int endlinetype;
138
139 static char *colour_string = (char *)"1;31";
140 static char *colour_option = NULL;
141 static char *dee_option = NULL;
142 static char *DEE_option = NULL;
143 static char *locale = NULL;
144 static char *main_buffer = NULL;
145 static char *newline = NULL;
146 static char *om_separator = (char *)"";
147 static char *stdin_name = (char *)"(standard input)";
148
149 static const unsigned char *pcretables = NULL;
150
151 static int after_context = 0;
152 static int before_context = 0;
153 static int binary_files = BIN_BINARY;
154 static int both_context = 0;
155 static int bufthird = PCREGREP_BUFSIZE;
156 static int bufsize = 3*PCREGREP_BUFSIZE;
157
158 #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
159 static int dee_action = dee_SKIP;
160 #else
161 static int dee_action = dee_READ;
162 #endif
163
164 static int DEE_action = DEE_READ;
165 static int error_count = 0;
166 static int filenames = FN_DEFAULT;
167 static int pcre_options = 0;
168 static int process_options = 0;
169
170 #ifdef SUPPORT_PCREGREP_JIT
171 static int study_options = PCRE_STUDY_JIT_COMPILE;
172 #else
173 static int study_options = 0;
174 #endif
175
176 static unsigned long int match_limit = 0;
177 static unsigned long int match_limit_recursion = 0;
178
179 static BOOL count_only = FALSE;
180 static BOOL do_colour = FALSE;
181 static BOOL file_offsets = FALSE;
182 static BOOL hyphenpending = FALSE;
183 static BOOL invert = FALSE;
184 static BOOL line_buffered = FALSE;
185 static BOOL line_offsets = FALSE;
186 static BOOL multiline = FALSE;
187 static BOOL number = FALSE;
188 static BOOL omit_zero_count = FALSE;
189 static BOOL resource_error = FALSE;
190 static BOOL quiet = FALSE;
191 static BOOL show_only_matching = FALSE;
192 static BOOL silent = FALSE;
193 static BOOL utf8 = FALSE;
194
195 /* Structure for list of --only-matching capturing numbers. */
196
197 typedef struct omstr {
198 struct omstr *next;
199 int groupnum;
200 } omstr;
201
202 static omstr *only_matching = NULL;
203 static omstr *only_matching_last = NULL;
204
205 /* Structure for holding the two variables that describe a number chain. */
206
207 typedef struct omdatastr {
208 omstr **anchor;
209 omstr **lastptr;
210 } omdatastr;
211
212 static omdatastr only_matching_data = { &only_matching, &only_matching_last };
213
214 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
215
216 typedef struct fnstr {
217 struct fnstr *next;
218 char *name;
219 } fnstr;
220
221 static fnstr *exclude_from = NULL;
222 static fnstr *exclude_from_last = NULL;
223 static fnstr *include_from = NULL;
224 static fnstr *include_from_last = NULL;
225
226 static fnstr *file_lists = NULL;
227 static fnstr *file_lists_last = NULL;
228 static fnstr *pattern_files = NULL;
229 static fnstr *pattern_files_last = NULL;
230
231 /* Structure for holding the two variables that describe a file name chain. */
232
233 typedef struct fndatastr {
234 fnstr **anchor;
235 fnstr **lastptr;
236 } fndatastr;
237
238 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
239 static fndatastr include_from_data = { &include_from, &include_from_last };
240 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
241 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
242
243 /* Structure for pattern and its compiled form; used for matching patterns and
244 also for include/exclude patterns. */
245
246 typedef struct patstr {
247 struct patstr *next;
248 char *string;
249 pcre *compiled;
250 pcre_extra *hint;
251 } patstr;
252
253 static patstr *patterns = NULL;
254 static patstr *patterns_last = NULL;
255 static patstr *include_patterns = NULL;
256 static patstr *include_patterns_last = NULL;
257 static patstr *exclude_patterns = NULL;
258 static patstr *exclude_patterns_last = NULL;
259 static patstr *include_dir_patterns = NULL;
260 static patstr *include_dir_patterns_last = NULL;
261 static patstr *exclude_dir_patterns = NULL;
262 static patstr *exclude_dir_patterns_last = NULL;
263
264 /* Structure holding the two variables that describe a pattern chain. A pointer
265 to such structures is used for each appropriate option. */
266
267 typedef struct patdatastr {
268 patstr **anchor;
269 patstr **lastptr;
270 } patdatastr;
271
272 static patdatastr match_patdata = { &patterns, &patterns_last };
273 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
274 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
275 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
276 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
277
278 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
279 &include_dir_patterns, &exclude_dir_patterns };
280
281 static const char *incexname[4] = { "--include", "--exclude",
282 "--include-dir", "--exclude-dir" };
283
284 /* Structure for options and list of them */
285
286 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
287 OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
288
289 typedef struct option_item {
290 int type;
291 int one_char;
292 void *dataptr;
293 const char *long_name;
294 const char *help_text;
295 } option_item;
296
297 /* Options without a single-letter equivalent get a negative value. This can be
298 used to identify them. */
299
300 #define N_COLOUR (-1)
301 #define N_EXCLUDE (-2)
302 #define N_EXCLUDE_DIR (-3)
303 #define N_HELP (-4)
304 #define N_INCLUDE (-5)
305 #define N_INCLUDE_DIR (-6)
306 #define N_LABEL (-7)
307 #define N_LOCALE (-8)
308 #define N_NULL (-9)
309 #define N_LOFFSETS (-10)
310 #define N_FOFFSETS (-11)
311 #define N_LBUFFER (-12)
312 #define N_M_LIMIT (-13)
313 #define N_M_LIMIT_REC (-14)
314 #define N_BUFSIZE (-15)
315 #define N_NOJIT (-16)
316 #define N_FILE_LIST (-17)
317 #define N_BINARY_FILES (-18)
318 #define N_EXCLUDE_FROM (-19)
319 #define N_INCLUDE_FROM (-20)
320 #define N_OM_SEPARATOR (-21)
321
322 static option_item optionlist[] = {
323 { OP_NODATA, N_NULL, NULL, "", "terminate options" },
324 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
325 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
326 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
327 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
328 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
329 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" },
330 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
331 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
332 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
333 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
334 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
335 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
336 { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
337 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
338 { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
339 { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
340 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
341 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
342 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
343 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
344 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
345 #ifdef SUPPORT_PCREGREP_JIT
346 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
347 #else
348 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcregrep does not support JIT" },
349 #endif
350 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
351 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
352 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
353 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
354 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
355 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
356 { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
357 { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
358 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
359 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
360 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
361 { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
362 { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
363 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
364 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
365 { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
366 { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
367 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
368 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
369 { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
370 { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
371
372 /* These two were accidentally implemented with underscores instead of
373 hyphens in the option names. As this was not discovered for several releases,
374 the incorrect versions are left in the table for compatibility. However, the
375 --help function misses out any option that has an underscore in its name. */
376
377 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
378 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
379
380 #ifdef JFRIEDL_DEBUG
381 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
382 #endif
383 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
384 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
385 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
386 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
387 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
388 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
389 { OP_NODATA, 0, NULL, NULL, NULL }
390 };
391
392 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
393 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
394 that the combination of -w and -x has the same effect as -x on its own, so we
395 can treat them as the same. Note that the MAXPATLEN macro assumes the longest
396 prefix+suffix is 10 characters; if anything longer is added, it must be
397 adjusted. */
398
399 static const char *prefix[] = {
400 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
401
402 static const char *suffix[] = {
403 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
404
405 /* UTF-8 tables - used only when the newline setting is "any". */
406
407 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
408
409 const char utf8_table4[] = {
410 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
411 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
412 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
413 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
414
415
416
417 /*************************************************
418 * Exit from the program *
419 *************************************************/
420
421 /* If there has been a resource error, give a suitable message.
422
423 Argument: the return code
424 Returns: does not return
425 */
426
427 static void
428 pcregrep_exit(int rc)
429 {
430 if (resource_error)
431 {
432 fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
433 "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
434 PCRE_ERROR_JIT_STACKLIMIT);
435 fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
436 }
437 exit(rc);
438 }
439
440
441 /*************************************************
442 * Add item to chain of patterns *
443 *************************************************/
444
445 /* Used to add an item onto a chain, or just return an unconnected item if the
446 "after" argument is NULL.
447
448 Arguments:
449 s pattern string to add
450 after if not NULL points to item to insert after
451
452 Returns: new pattern block
453 */
454
455 static patstr *
456 add_pattern(char *s, patstr *after)
457 {
458 patstr *p = (patstr *)malloc(sizeof(patstr));
459 if (p == NULL)
460 {
461 fprintf(stderr, "pcregrep: malloc failed\n");
462 pcregrep_exit(2);
463 }
464 if (strlen(s) > MAXPATLEN)
465 {
466 fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
467 MAXPATLEN);
468 return NULL;
469 }
470 p->next = NULL;
471 p->string = s;
472 p->compiled = NULL;
473 p->hint = NULL;
474
475 if (after != NULL)
476 {
477 p->next = after->next;
478 after->next = p;
479 }
480 return p;
481 }
482
483
484 /*************************************************
485 * Free chain of patterns *
486 *************************************************/
487
488 /* Used for several chains of patterns.
489
490 Argument: pointer to start of chain
491 Returns: nothing
492 */
493
494 static void
495 free_pattern_chain(patstr *pc)
496 {
497 while (pc != NULL)
498 {
499 patstr *p = pc;
500 pc = p->next;
501 if (p->hint != NULL) pcre_free_study(p->hint);
502 if (p->compiled != NULL) pcre_free(p->compiled);
503 free(p);
504 }
505 }
506
507
508 /*************************************************
509 * Free chain of file names *
510 *************************************************/
511
512 /*
513 Argument: pointer to start of chain
514 Returns: nothing
515 */
516
517 static void
518 free_file_chain(fnstr *fn)
519 {
520 while (fn != NULL)
521 {
522 fnstr *f = fn;
523 fn = f->next;
524 free(f);
525 }
526 }
527
528
529 /*************************************************
530 * OS-specific functions *
531 *************************************************/
532
533 /* These functions are defined so that they can be made system specific,
534 although at present the only ones are for Unix, Win32, and for "no support". */
535
536
537 /************* Directory scanning in Unix ***********/
538
539 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
540 #include <sys/types.h>
541 #include <sys/stat.h>
542 #include <dirent.h>
543
544 typedef DIR directory_type;
545 #define FILESEP '/'
546
547 static int
548 isdirectory(char *filename)
549 {
550 struct stat statbuf;
551 if (stat(filename, &statbuf) < 0)
552 return 0; /* In the expectation that opening as a file will fail */
553 return (statbuf.st_mode & S_IFMT) == S_IFDIR;
554 }
555
556 static directory_type *
557 opendirectory(char *filename)
558 {
559 return opendir(filename);
560 }
561
562 static char *
563 readdirectory(directory_type *dir)
564 {
565 for (;;)
566 {
567 struct dirent *dent = readdir(dir);
568 if (dent == NULL) return NULL;
569 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
570 return dent->d_name;
571 }
572 /* Control never reaches here */
573 }
574
575 static void
576 closedirectory(directory_type *dir)
577 {
578 closedir(dir);
579 }
580
581
582 /************* Test for regular file in Unix **********/
583
584 static int
585 isregfile(char *filename)
586 {
587 struct stat statbuf;
588 if (stat(filename, &statbuf) < 0)
589 return 1; /* In the expectation that opening as a file will fail */
590 return (statbuf.st_mode & S_IFMT) == S_IFREG;
591 }
592
593
594 /************* Test for a terminal in Unix **********/
595
596 static BOOL
597 is_stdout_tty(void)
598 {
599 return isatty(fileno(stdout));
600 }
601
602 static BOOL
603 is_file_tty(FILE *f)
604 {
605 return isatty(fileno(f));
606 }
607
608
609 /************* Directory scanning in Win32 ***********/
610
611 /* I (Philip Hazel) have no means of testing this code. It was contributed by
612 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
613 when it did not exist. David Byron added a patch that moved the #include of
614 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
615 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
616 undefined when it is indeed undefined. */
617
618 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
619
620 #ifndef STRICT
621 # define STRICT
622 #endif
623 #ifndef WIN32_LEAN_AND_MEAN
624 # define WIN32_LEAN_AND_MEAN
625 #endif
626
627 #include <windows.h>
628
629 #ifndef INVALID_FILE_ATTRIBUTES
630 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
631 #endif
632
633 typedef struct directory_type
634 {
635 HANDLE handle;
636 BOOL first;
637 WIN32_FIND_DATA data;
638 } directory_type;
639
640 #define FILESEP '/'
641
642 int
643 isdirectory(char *filename)
644 {
645 DWORD attr = GetFileAttributes(filename);
646 if (attr == INVALID_FILE_ATTRIBUTES)
647 return 0;
648 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
649 }
650
651 directory_type *
652 opendirectory(char *filename)
653 {
654 size_t len;
655 char *pattern;
656 directory_type *dir;
657 DWORD err;
658 len = strlen(filename);
659 pattern = (char *)malloc(len + 3);
660 dir = (directory_type *)malloc(sizeof(*dir));
661 if ((pattern == NULL) || (dir == NULL))
662 {
663 fprintf(stderr, "pcregrep: malloc failed\n");
664 pcregrep_exit(2);
665 }
666 memcpy(pattern, filename, len);
667 memcpy(&(pattern[len]), "\\*", 3);
668 dir->handle = FindFirstFile(pattern, &(dir->data));
669 if (dir->handle != INVALID_HANDLE_VALUE)
670 {
671 free(pattern);
672 dir->first = TRUE;
673 return dir;
674 }
675 err = GetLastError();
676 free(pattern);
677 free(dir);
678 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
679 return NULL;
680 }
681
682 char *
683 readdirectory(directory_type *dir)
684 {
685 for (;;)
686 {
687 if (!dir->first)
688 {
689 if (!FindNextFile(dir->handle, &(dir->data)))
690 return NULL;
691 }
692 else
693 {
694 dir->first = FALSE;
695 }
696 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
697 return dir->data.cFileName;
698 }
699 #ifndef _MSC_VER
700 return NULL; /* Keep compiler happy; never executed */
701 #endif
702 }
703
704 void
705 closedirectory(directory_type *dir)
706 {
707 FindClose(dir->handle);
708 free(dir);
709 }
710
711
712 /************* Test for regular file in Win32 **********/
713
714 /* I don't know how to do this, or if it can be done; assume all paths are
715 regular if they are not directories. */
716
717 int isregfile(char *filename)
718 {
719 return !isdirectory(filename);
720 }
721
722
723 /************* Test for a terminal in Win32 **********/
724
725 /* I don't know how to do this; assume never */
726
727 static BOOL
728 is_stdout_tty(void)
729 {
730 return FALSE;
731 }
732
733 static BOOL
734 is_file_tty(FILE *f)
735 {
736 return FALSE;
737 }
738
739
740 /************* Directory scanning when we can't do it ***********/
741
742 /* The type is void, and apart from isdirectory(), the functions do nothing. */
743
744 #else
745
746 #define FILESEP 0
747 typedef void directory_type;
748
749 int isdirectory(char *filename) { return 0; }
750 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
751 char *readdirectory(directory_type *dir) { return (char*)0;}
752 void closedirectory(directory_type *dir) {}
753
754
755 /************* Test for regular when we can't do it **********/
756
757 /* Assume all files are regular. */
758
759 int isregfile(char *filename) { return 1; }
760
761
762 /************* Test for a terminal when we can't do it **********/
763
764 static BOOL
765 is_stdout_tty(void)
766 {
767 return FALSE;
768 }
769
770 static BOOL
771 is_file_tty(FILE *f)
772 {
773 return FALSE;
774 }
775
776 #endif
777
778
779
780 #ifndef HAVE_STRERROR
781 /*************************************************
782 * Provide strerror() for non-ANSI libraries *
783 *************************************************/
784
785 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
786 in their libraries, but can provide the same facility by this simple
787 alternative function. */
788
789 extern int sys_nerr;
790 extern char *sys_errlist[];
791
792 char *
793 strerror(int n)
794 {
795 if (n < 0 || n >= sys_nerr) return "unknown error number";
796 return sys_errlist[n];
797 }
798 #endif /* HAVE_STRERROR */
799
800
801
802 /*************************************************
803 * Usage function *
804 *************************************************/
805
806 static int
807 usage(int rc)
808 {
809 option_item *op;
810 fprintf(stderr, "Usage: pcregrep [-");
811 for (op = optionlist; op->one_char != 0; op++)
812 {
813 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
814 }
815 fprintf(stderr, "] [long options] [pattern] [files]\n");
816 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
817 "options.\n");
818 return rc;
819 }
820
821
822
823 /*************************************************
824 * Help function *
825 *************************************************/
826
827 static void
828 help(void)
829 {
830 option_item *op;
831
832 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
833 printf("Search for PATTERN in each FILE or standard input.\n");
834 printf("PATTERN must be present if neither -e nor -f is used.\n");
835 printf("\"-\" can be used as a file name to mean STDIN.\n");
836
837 #ifdef SUPPORT_LIBZ
838 printf("Files whose names end in .gz are read using zlib.\n");
839 #endif
840
841 #ifdef SUPPORT_LIBBZ2
842 printf("Files whose names end in .bz2 are read using bzlib2.\n");
843 #endif
844
845 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
846 printf("Other files and the standard input are read as plain files.\n\n");
847 #else
848 printf("All files are read as plain files, without any interpretation.\n\n");
849 #endif
850
851 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
852 printf("Options:\n");
853
854 for (op = optionlist; op->one_char != 0; op++)
855 {
856 int n;
857 char s[4];
858
859 /* Two options were accidentally implemented and documented with underscores
860 instead of hyphens in their names, something that was not noticed for quite a
861 few releases. When fixing this, I left the underscored versions in the list
862 in case people were using them. However, we don't want to display them in the
863 help data. There are no other options that contain underscores, and we do not
864 expect ever to implement such options. Therefore, just omit any option that
865 contains an underscore. */
866
867 if (strchr(op->long_name, '_') != NULL) continue;
868
869 if (op->one_char > 0 && (op->long_name)[0] == 0)
870 n = 31 - printf(" -%c", op->one_char);
871 else
872 {
873 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
874 else strcpy(s, " ");
875 n = 31 - printf(" %s --%s", s, op->long_name);
876 }
877
878 if (n < 1) n = 1;
879 printf("%.*s%s\n", n, " ", op->help_text);
880 }
881
882 printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
883 printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
884 printf("When reading patterns or file names from a file, trailing white\n");
885 printf("space is removed and blank lines are ignored.\n");
886 printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
887
888 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
889 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
890 }
891
892
893
894 /*************************************************
895 * Test exclude/includes *
896 *************************************************/
897
898 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
899 there are no includes, the path must match an include pattern.
900
901 Arguments:
902 path the path to be matched
903 ip the chain of include patterns
904 ep the chain of exclude patterns
905
906 Returns: TRUE if the path is not excluded
907 */
908
909 static BOOL
910 test_incexc(char *path, patstr *ip, patstr *ep)
911 {
912 int plen = strlen(path);
913
914 for (; ep != NULL; ep = ep->next)
915 {
916 if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
917 return FALSE;
918 }
919
920 if (ip == NULL) return TRUE;
921
922 for (; ip != NULL; ip = ip->next)
923 {
924 if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
925 return TRUE;
926 }
927
928 return FALSE;
929 }
930
931
932
933 /*************************************************
934 * Decode integer argument value *
935 *************************************************/
936
937 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
938 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
939 just keep it simple.
940
941 Arguments:
942 option_data the option data string
943 op the option item (for error messages)
944 longop TRUE if option given in long form
945
946 Returns: a long integer
947 */
948
949 static long int
950 decode_number(char *option_data, option_item *op, BOOL longop)
951 {
952 unsigned long int n = 0;
953 char *endptr = option_data;
954 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
955 while (isdigit((unsigned char)(*endptr)))
956 n = n * 10 + (int)(*endptr++ - '0');
957 if (toupper(*endptr) == 'K')
958 {
959 n *= 1024;
960 endptr++;
961 }
962 else if (toupper(*endptr) == 'M')
963 {
964 n *= 1024*1024;
965 endptr++;
966 }
967
968 if (*endptr != 0) /* Error */
969 {
970 if (longop)
971 {
972 char *equals = strchr(op->long_name, '=');
973 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
974 (int)(equals - op->long_name);
975 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
976 option_data, nlen, op->long_name);
977 }
978 else
979 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
980 option_data, op->one_char);
981 pcregrep_exit(usage(2));
982 }
983
984 return n;
985 }
986
987
988
989 /*************************************************
990 * Add item to a chain of numbers *
991 *************************************************/
992
993 /* Used to add an item onto a chain, or just return an unconnected item if the
994 "after" argument is NULL.
995
996 Arguments:
997 n the number to add
998 after if not NULL points to item to insert after
999
1000 Returns: new number block
1001 */
1002
1003 static omstr *
1004 add_number(int n, omstr *after)
1005 {
1006 omstr *om = (omstr *)malloc(sizeof(omstr));
1007
1008 if (om == NULL)
1009 {
1010 fprintf(stderr, "pcregrep: malloc failed\n");
1011 pcregrep_exit(2);
1012 }
1013 om->next = NULL;
1014 om->groupnum = n;
1015
1016 if (after != NULL)
1017 {
1018 om->next = after->next;
1019 after->next = om;
1020 }
1021 return om;
1022 }
1023
1024
1025
1026 /*************************************************
1027 * Read one line of input *
1028 *************************************************/
1029
1030 /* Normally, input is read using fread() into a large buffer, so many lines may
1031 be read at once. However, doing this for tty input means that no output appears
1032 until a lot of input has been typed. Instead, tty input is handled line by
1033 line. We cannot use fgets() for this, because it does not stop at a binary
1034 zero, and therefore there is no way of telling how many characters it has read,
1035 because there may be binary zeros embedded in the data.
1036
1037 Arguments:
1038 buffer the buffer to read into
1039 length the maximum number of characters to read
1040 f the file
1041
1042 Returns: the number of characters read, zero at end of file
1043 */
1044
1045 static unsigned int
1046 read_one_line(char *buffer, int length, FILE *f)
1047 {
1048 int c;
1049 int yield = 0;
1050 while ((c = fgetc(f)) != EOF)
1051 {
1052 buffer[yield++] = c;
1053 if (c == '\n' || yield >= length) break;
1054 }
1055 return yield;
1056 }
1057
1058
1059
1060 /*************************************************
1061 * Find end of line *
1062 *************************************************/
1063
1064 /* The length of the endline sequence that is found is set via lenptr. This may
1065 be zero at the very end of the file if there is no line-ending sequence there.
1066
1067 Arguments:
1068 p current position in line
1069 endptr end of available data
1070 lenptr where to put the length of the eol sequence
1071
1072 Returns: pointer after the last byte of the line,
1073 including the newline byte(s)
1074 */
1075
1076 static char *
1077 end_of_line(char *p, char *endptr, int *lenptr)
1078 {
1079 switch(endlinetype)
1080 {
1081 default: /* Just in case */
1082 case EL_LF:
1083 while (p < endptr && *p != '\n') p++;
1084 if (p < endptr)
1085 {
1086 *lenptr = 1;
1087 return p + 1;
1088 }
1089 *lenptr = 0;
1090 return endptr;
1091
1092 case EL_CR:
1093 while (p < endptr && *p != '\r') p++;
1094 if (p < endptr)
1095 {
1096 *lenptr = 1;
1097 return p + 1;
1098 }
1099 *lenptr = 0;
1100 return endptr;
1101
1102 case EL_CRLF:
1103 for (;;)
1104 {
1105 while (p < endptr && *p != '\r') p++;
1106 if (++p >= endptr)
1107 {
1108 *lenptr = 0;
1109 return endptr;
1110 }
1111 if (*p == '\n')
1112 {
1113 *lenptr = 2;
1114 return p + 1;
1115 }
1116 }
1117 break;
1118
1119 case EL_ANYCRLF:
1120 while (p < endptr)
1121 {
1122 int extra = 0;
1123 register int c = *((unsigned char *)p);
1124
1125 if (utf8 && c >= 0xc0)
1126 {
1127 int gcii, gcss;
1128 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1129 gcss = 6*extra;
1130 c = (c & utf8_table3[extra]) << gcss;
1131 for (gcii = 1; gcii <= extra; gcii++)
1132 {
1133 gcss -= 6;
1134 c |= (p[gcii] & 0x3f) << gcss;
1135 }
1136 }
1137
1138 p += 1 + extra;
1139
1140 switch (c)
1141 {
1142 case '\n':
1143 *lenptr = 1;
1144 return p;
1145
1146 case '\r':
1147 if (p < endptr && *p == '\n')
1148 {
1149 *lenptr = 2;
1150 p++;
1151 }
1152 else *lenptr = 1;
1153 return p;
1154
1155 default:
1156 break;
1157 }
1158 } /* End of loop for ANYCRLF case */
1159
1160 *lenptr = 0; /* Must have hit the end */
1161 return endptr;
1162
1163 case EL_ANY:
1164 while (p < endptr)
1165 {
1166 int extra = 0;
1167 register int c = *((unsigned char *)p);
1168
1169 if (utf8 && c >= 0xc0)
1170 {
1171 int gcii, gcss;
1172 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1173 gcss = 6*extra;
1174 c = (c & utf8_table3[extra]) << gcss;
1175 for (gcii = 1; gcii <= extra; gcii++)
1176 {
1177 gcss -= 6;
1178 c |= (p[gcii] & 0x3f) << gcss;
1179 }
1180 }
1181
1182 p += 1 + extra;
1183
1184 switch (c)
1185 {
1186 case '\n': /* LF */
1187 case '\v': /* VT */
1188 case '\f': /* FF */
1189 *lenptr = 1;
1190 return p;
1191
1192 case '\r': /* CR */
1193 if (p < endptr && *p == '\n')
1194 {
1195 *lenptr = 2;
1196 p++;
1197 }
1198 else *lenptr = 1;
1199 return p;
1200
1201 #ifndef EBCDIC
1202 case 0x85: /* Unicode NEL */
1203 *lenptr = utf8? 2 : 1;
1204 return p;
1205
1206 case 0x2028: /* Unicode LS */
1207 case 0x2029: /* Unicode PS */
1208 *lenptr = 3;
1209 return p;
1210 #endif /* Not EBCDIC */
1211
1212 default:
1213 break;
1214 }
1215 } /* End of loop for ANY case */
1216
1217 *lenptr = 0; /* Must have hit the end */
1218 return endptr;
1219 } /* End of overall switch */
1220 }
1221
1222
1223
1224 /*************************************************
1225 * Find start of previous line *
1226 *************************************************/
1227
1228 /* This is called when looking back for before lines to print.
1229
1230 Arguments:
1231 p start of the subsequent line
1232 startptr start of available data
1233
1234 Returns: pointer to the start of the previous line
1235 */
1236
1237 static char *
1238 previous_line(char *p, char *startptr)
1239 {
1240 switch(endlinetype)
1241 {
1242 default: /* Just in case */
1243 case EL_LF:
1244 p--;
1245 while (p > startptr && p[-1] != '\n') p--;
1246 return p;
1247
1248 case EL_CR:
1249 p--;
1250 while (p > startptr && p[-1] != '\n') p--;
1251 return p;
1252
1253 case EL_CRLF:
1254 for (;;)
1255 {
1256 p -= 2;
1257 while (p > startptr && p[-1] != '\n') p--;
1258 if (p <= startptr + 1 || p[-2] == '\r') return p;
1259 }
1260 return p; /* But control should never get here */
1261
1262 case EL_ANY:
1263 case EL_ANYCRLF:
1264 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1265 if (utf8) while ((*p & 0xc0) == 0x80) p--;
1266
1267 while (p > startptr)
1268 {
1269 register unsigned int c;
1270 char *pp = p - 1;
1271
1272 if (utf8)
1273 {
1274 int extra = 0;
1275 while ((*pp & 0xc0) == 0x80) pp--;
1276 c = *((unsigned char *)pp);
1277 if (c >= 0xc0)
1278 {
1279 int gcii, gcss;
1280 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1281 gcss = 6*extra;
1282 c = (c & utf8_table3[extra]) << gcss;
1283 for (gcii = 1; gcii <= extra; gcii++)
1284 {
1285 gcss -= 6;
1286 c |= (pp[gcii] & 0x3f) << gcss;
1287 }
1288 }
1289 }
1290 else c = *((unsigned char *)pp);
1291
1292 if (endlinetype == EL_ANYCRLF) switch (c)
1293 {
1294 case '\n': /* LF */
1295 case '\r': /* CR */
1296 return p;
1297
1298 default:
1299 break;
1300 }
1301
1302 else switch (c)
1303 {
1304 case '\n': /* LF */
1305 case '\v': /* VT */
1306 case '\f': /* FF */
1307 case '\r': /* CR */
1308 #ifndef EBCDIE
1309 case 0x85: /* Unicode NEL */
1310 case 0x2028: /* Unicode LS */
1311 case 0x2029: /* Unicode PS */
1312 #endif /* Not EBCDIC */
1313 return p;
1314
1315 default:
1316 break;
1317 }
1318
1319 p = pp; /* Back one character */
1320 } /* End of loop for ANY case */
1321
1322 return startptr; /* Hit start of data */
1323 } /* End of overall switch */
1324 }
1325
1326
1327
1328
1329
1330 /*************************************************
1331 * Print the previous "after" lines *
1332 *************************************************/
1333
1334 /* This is called if we are about to lose said lines because of buffer filling,
1335 and at the end of the file. The data in the line is written using fwrite() so
1336 that a binary zero does not terminate it.
1337
1338 Arguments:
1339 lastmatchnumber the number of the last matching line, plus one
1340 lastmatchrestart where we restarted after the last match
1341 endptr end of available data
1342 printname filename for printing
1343
1344 Returns: nothing
1345 */
1346
1347 static void
1348 do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1349 char *printname)
1350 {
1351 if (after_context > 0 && lastmatchnumber > 0)
1352 {
1353 int count = 0;
1354 while (lastmatchrestart < endptr && count++ < after_context)
1355 {
1356 int ellength;
1357 char *pp = lastmatchrestart;
1358 if (printname != NULL) fprintf(stdout, "%s-", printname);
1359 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1360 pp = end_of_line(pp, endptr, &ellength);
1361 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1362 lastmatchrestart = pp;
1363 }
1364 hyphenpending = TRUE;
1365 }
1366 }
1367
1368
1369
1370 /*************************************************
1371 * Apply patterns to subject till one matches *
1372 *************************************************/
1373
1374 /* This function is called to run through all patterns, looking for a match. It
1375 is used multiple times for the same subject when colouring is enabled, in order
1376 to find all possible matches.
1377
1378 Arguments:
1379 matchptr the start of the subject
1380 length the length of the subject to match
1381 options options for pcre_exec
1382 startoffset where to start matching
1383 offsets the offets vector to fill in
1384 mrc address of where to put the result of pcre_exec()
1385
1386 Returns: TRUE if there was a match
1387 FALSE if there was no match
1388 invert if there was a non-fatal error
1389 */
1390
1391 static BOOL
1392 match_patterns(char *matchptr, size_t length, unsigned int options,
1393 int startoffset, int *offsets, int *mrc)
1394 {
1395 int i;
1396 size_t slen = length;
1397 patstr *p = patterns;
1398 const char *msg = "this text:\n\n";
1399
1400 if (slen > 200)
1401 {
1402 slen = 200;
1403 msg = "text that starts:\n\n";
1404 }
1405 for (i = 1; p != NULL; p = p->next, i++)
1406 {
1407 *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1408 startoffset, options, offsets, OFFSET_SIZE);
1409 if (*mrc >= 0) return TRUE;
1410 if (*mrc == PCRE_ERROR_NOMATCH) continue;
1411 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1412 if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1413 fprintf(stderr, "%s", msg);
1414 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
1415 fprintf(stderr, "\n\n");
1416 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1417 *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1418 resource_error = TRUE;
1419 if (error_count++ > 20)
1420 {
1421 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1422 pcregrep_exit(2);
1423 }
1424 return invert; /* No more matching; don't show the line again */
1425 }
1426
1427 return FALSE; /* No match, no errors */
1428 }
1429
1430
1431
1432 /*************************************************
1433 * Grep an individual file *
1434 *************************************************/
1435
1436 /* This is called from grep_or_recurse() below. It uses a buffer that is three
1437 times the value of bufthird. The matching point is never allowed to stray into
1438 the top third of the buffer, thus keeping more of the file available for
1439 context printing or for multiline scanning. For large files, the pointer will
1440 be in the middle third most of the time, so the bottom third is available for
1441 "before" context printing.
1442
1443 Arguments:
1444 handle the fopened FILE stream for a normal file
1445 the gzFile pointer when reading is via libz
1446 the BZFILE pointer when reading is via libbz2
1447 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1448 filename the file name or NULL (for errors)
1449 printname the file name if it is to be printed for each match
1450 or NULL if the file name is not to be printed
1451 it cannot be NULL if filenames[_nomatch]_only is set
1452
1453 Returns: 0 if there was at least one match
1454 1 otherwise (no matches)
1455 2 if an overlong line is encountered
1456 3 if there is a read error on a .bz2 file
1457 */
1458
1459 static int
1460 pcregrep(void *handle, int frtype, char *filename, char *printname)
1461 {
1462 int rc = 1;
1463 int linenumber = 1;
1464 int lastmatchnumber = 0;
1465 int count = 0;
1466 int filepos = 0;
1467 int offsets[OFFSET_SIZE];
1468 char *lastmatchrestart = NULL;
1469 char *ptr = main_buffer;
1470 char *endptr;
1471 size_t bufflength;
1472 BOOL binary = FALSE;
1473 BOOL endhyphenpending = FALSE;
1474 BOOL input_line_buffered = line_buffered;
1475 FILE *in = NULL; /* Ensure initialized */
1476
1477 #ifdef SUPPORT_LIBZ
1478 gzFile ingz = NULL;
1479 #endif
1480
1481 #ifdef SUPPORT_LIBBZ2
1482 BZFILE *inbz2 = NULL;
1483 #endif
1484
1485
1486 /* Do the first read into the start of the buffer and set up the pointer to end
1487 of what we have. In the case of libz, a non-zipped .gz file will be read as a
1488 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1489 fail. */
1490
1491 (void)frtype;
1492
1493 #ifdef SUPPORT_LIBZ
1494 if (frtype == FR_LIBZ)
1495 {
1496 ingz = (gzFile)handle;
1497 bufflength = gzread (ingz, main_buffer, bufsize);
1498 }
1499 else
1500 #endif
1501
1502 #ifdef SUPPORT_LIBBZ2
1503 if (frtype == FR_LIBBZ2)
1504 {
1505 inbz2 = (BZFILE *)handle;
1506 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1507 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1508 } /* without the cast it is unsigned. */
1509 else
1510 #endif
1511
1512 {
1513 in = (FILE *)handle;
1514 if (is_file_tty(in)) input_line_buffered = TRUE;
1515 bufflength = input_line_buffered?
1516 read_one_line(main_buffer, bufsize, in) :
1517 fread(main_buffer, 1, bufsize, in);
1518 }
1519
1520 endptr = main_buffer + bufflength;
1521
1522 /* Unless binary-files=text, see if we have a binary file. This uses the same
1523 rule as GNU grep, namely, a search for a binary zero byte near the start of the
1524 file. */
1525
1526 if (binary_files != BIN_TEXT)
1527 {
1528 binary =
1529 memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1530 if (binary && binary_files == BIN_NOMATCH) return 1;
1531 }
1532
1533 /* Loop while the current pointer is not at the end of the file. For large
1534 files, endptr will be at the end of the buffer when we are in the middle of the
1535 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1536 way, the buffer is shifted left and re-filled. */
1537
1538 while (ptr < endptr)
1539 {
1540 int endlinelength;
1541 int mrc = 0;
1542 int startoffset = 0;
1543 unsigned int options = 0;
1544 BOOL match;
1545 char *matchptr = ptr;
1546 char *t = ptr;
1547 size_t length, linelength;
1548
1549 /* At this point, ptr is at the start of a line. We need to find the length
1550 of the subject string to pass to pcre_exec(). In multiline mode, it is the
1551 length remainder of the data in the buffer. Otherwise, it is the length of
1552 the next line, excluding the terminating newline. After matching, we always
1553 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1554 option is used for compiling, so that any match is constrained to be in the
1555 first line. */
1556
1557 t = end_of_line(t, endptr, &endlinelength);
1558 linelength = t - ptr - endlinelength;
1559 length = multiline? (size_t)(endptr - ptr) : linelength;
1560
1561 /* Check to see if the line we are looking at extends right to the very end
1562 of the buffer without a line terminator. This means the line is too long to
1563 handle. */
1564
1565 if (endlinelength == 0 && t == main_buffer + bufsize)
1566 {
1567 fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1568 "pcregrep: check the --buffer-size option\n",
1569 linenumber,
1570 (filename == NULL)? "" : " of file ",
1571 (filename == NULL)? "" : filename);
1572 return 2;
1573 }
1574
1575 /* Extra processing for Jeffrey Friedl's debugging. */
1576
1577 #ifdef JFRIEDL_DEBUG
1578 if (jfriedl_XT || jfriedl_XR)
1579 {
1580 # include <sys/time.h>
1581 # include <time.h>
1582 struct timeval start_time, end_time;
1583 struct timezone dummy;
1584 int i;
1585
1586 if (jfriedl_XT)
1587 {
1588 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1589 const char *orig = ptr;
1590 ptr = malloc(newlen + 1);
1591 if (!ptr) {
1592 printf("out of memory");
1593 pcregrep_exit(2);
1594 }
1595 endptr = ptr;
1596 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1597 for (i = 0; i < jfriedl_XT; i++) {
1598 strncpy(endptr, orig, length);
1599 endptr += length;
1600 }
1601 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1602 length = newlen;
1603 }
1604
1605 if (gettimeofday(&start_time, &dummy) != 0)
1606 perror("bad gettimeofday");
1607
1608
1609 for (i = 0; i < jfriedl_XR; i++)
1610 match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1611 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1612
1613 if (gettimeofday(&end_time, &dummy) != 0)
1614 perror("bad gettimeofday");
1615
1616 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1617 -
1618 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1619
1620 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1621 return 0;
1622 }
1623 #endif
1624
1625 /* We come back here after a match when show_only_matching is set, in order
1626 to find any further matches in the same line. This applies to
1627 --only-matching, --file-offsets, and --line-offsets. */
1628
1629 ONLY_MATCHING_RESTART:
1630
1631 /* Run through all the patterns until one matches or there is an error other
1632 than NOMATCH. This code is in a subroutine so that it can be re-used for
1633 finding subsequent matches when colouring matched lines. After finding one
1634 match, set PCRE_NOTEMPTY to disable any further matches of null strings in
1635 this line. */
1636
1637 match = match_patterns(matchptr, length, options, startoffset, offsets, &mrc);
1638 options = PCRE_NOTEMPTY;
1639
1640 /* If it's a match or a not-match (as required), do what's wanted. */
1641
1642 if (match != invert)
1643 {
1644 BOOL hyphenprinted = FALSE;
1645
1646 /* We've failed if we want a file that doesn't have any matches. */
1647
1648 if (filenames == FN_NOMATCH_ONLY) return 1;
1649
1650 /* Just count if just counting is wanted. */
1651
1652 if (count_only) count++;
1653
1654 /* When handling a binary file and binary-files==binary, the "binary"
1655 variable will be set true (it's false in all other cases). In this
1656 situation we just want to output the file name. No need to scan further. */
1657
1658 else if (binary)
1659 {
1660 fprintf(stdout, "Binary file %s matches\n", filename);
1661 return 0;
1662 }
1663
1664 /* If all we want is a file name, there is no need to scan any more lines
1665 in the file. */
1666
1667 else if (filenames == FN_MATCH_ONLY)
1668 {
1669 fprintf(stdout, "%s\n", printname);
1670 return 0;
1671 }
1672
1673 /* Likewise, if all we want is a yes/no answer. */
1674
1675 else if (quiet) return 0;
1676
1677 /* The --only-matching option prints just the substring that matched,
1678 and/or one or more captured portions of it, as long as these strings are
1679 not empty. The --file-offsets and --line-offsets options output offsets for
1680 the matching substring (all three set show_only_matching). None of these
1681 mutually exclusive options prints any context. Afterwards, adjust the start
1682 and then jump back to look for further matches in the same line. If we are
1683 in invert mode, however, nothing is printed and we do not restart - this
1684 could still be useful because the return code is set. */
1685
1686 else if (show_only_matching)
1687 {
1688 if (!invert)
1689 {
1690 if (printname != NULL) fprintf(stdout, "%s:", printname);
1691 if (number) fprintf(stdout, "%d:", linenumber);
1692
1693 /* Handle --line-offsets */
1694
1695 if (line_offsets)
1696 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1697 offsets[1] - offsets[0]);
1698
1699 /* Handle --file-offsets */
1700
1701 else if (file_offsets)
1702 fprintf(stdout, "%d,%d\n",
1703 (int)(filepos + matchptr + offsets[0] - ptr),
1704 offsets[1] - offsets[0]);
1705
1706 /* Handle --only-matching, which may occur many times */
1707
1708 else
1709 {
1710 BOOL printed = FALSE;
1711 omstr *om;
1712
1713 for (om = only_matching; om != NULL; om = om->next)
1714 {
1715 int n = om->groupnum;
1716 if (n < mrc)
1717 {
1718 int plen = offsets[2*n + 1] - offsets[2*n];
1719 if (plen > 0)
1720 {
1721 if (printed) fprintf(stdout, "%s", om_separator);
1722 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1723 FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
1724 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1725 printed = TRUE;
1726 }
1727 }
1728 }
1729
1730 if (printed || printname != NULL || number) fprintf(stdout, "\n");
1731 }
1732
1733 /* Prepare to repeat to find the next match */
1734
1735 match = FALSE;
1736 if (line_buffered) fflush(stdout);
1737 rc = 0; /* Had some success */
1738 startoffset = offsets[1]; /* Restart after the match */
1739 goto ONLY_MATCHING_RESTART;
1740 }
1741 }
1742
1743 /* This is the default case when none of the above options is set. We print
1744 the matching lines(s), possibly preceded and/or followed by other lines of
1745 context. */
1746
1747 else
1748 {
1749 /* See if there is a requirement to print some "after" lines from a
1750 previous match. We never print any overlaps. */
1751
1752 if (after_context > 0 && lastmatchnumber > 0)
1753 {
1754 int ellength;
1755 int linecount = 0;
1756 char *p = lastmatchrestart;
1757
1758 while (p < ptr && linecount < after_context)
1759 {
1760 p = end_of_line(p, ptr, &ellength);
1761 linecount++;
1762 }
1763
1764 /* It is important to advance lastmatchrestart during this printing so
1765 that it interacts correctly with any "before" printing below. Print
1766 each line's data using fwrite() in case there are binary zeroes. */
1767
1768 while (lastmatchrestart < p)
1769 {
1770 char *pp = lastmatchrestart;
1771 if (printname != NULL) fprintf(stdout, "%s-", printname);
1772 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1773 pp = end_of_line(pp, endptr, &ellength);
1774 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1775 lastmatchrestart = pp;
1776 }
1777 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1778 }
1779
1780 /* If there were non-contiguous lines printed above, insert hyphens. */
1781
1782 if (hyphenpending)
1783 {
1784 fprintf(stdout, "--\n");
1785 hyphenpending = FALSE;
1786 hyphenprinted = TRUE;
1787 }
1788
1789 /* See if there is a requirement to print some "before" lines for this
1790 match. Again, don't print overlaps. */
1791
1792 if (before_context > 0)
1793 {
1794 int linecount = 0;
1795 char *p = ptr;
1796
1797 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1798 linecount < before_context)
1799 {
1800 linecount++;
1801 p = previous_line(p, main_buffer);
1802 }
1803
1804 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1805 fprintf(stdout, "--\n");
1806
1807 while (p < ptr)
1808 {
1809 int ellength;
1810 char *pp = p;
1811 if (printname != NULL) fprintf(stdout, "%s-", printname);
1812 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1813 pp = end_of_line(pp, endptr, &ellength);
1814 FWRITE(p, 1, pp - p, stdout);
1815 p = pp;
1816 }
1817 }
1818
1819 /* Now print the matching line(s); ensure we set hyphenpending at the end
1820 of the file if any context lines are being output. */
1821
1822 if (after_context > 0 || before_context > 0)
1823 endhyphenpending = TRUE;
1824
1825 if (printname != NULL) fprintf(stdout, "%s:", printname);
1826 if (number) fprintf(stdout, "%d:", linenumber);
1827
1828 /* In multiline mode, we want to print to the end of the line in which
1829 the end of the matched string is found, so we adjust linelength and the
1830 line number appropriately, but only when there actually was a match
1831 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1832 the match will always be before the first newline sequence. */
1833
1834 if (multiline & !invert)
1835 {
1836 char *endmatch = ptr + offsets[1];
1837 t = ptr;
1838 while (t <= endmatch)
1839 {
1840 t = end_of_line(t, endptr, &endlinelength);
1841 if (t < endmatch) linenumber++; else break;
1842 }
1843 linelength = t - ptr - endlinelength;
1844 }
1845
1846 /*** NOTE: Use only fwrite() to output the data line, so that binary
1847 zeroes are treated as just another data character. */
1848
1849 /* This extra option, for Jeffrey Friedl's debugging requirements,
1850 replaces the matched string, or a specific captured string if it exists,
1851 with X. When this happens, colouring is ignored. */
1852
1853 #ifdef JFRIEDL_DEBUG
1854 if (S_arg >= 0 && S_arg < mrc)
1855 {
1856 int first = S_arg * 2;
1857 int last = first + 1;
1858 FWRITE(ptr, 1, offsets[first], stdout);
1859 fprintf(stdout, "X");
1860 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1861 }
1862 else
1863 #endif
1864
1865 /* We have to split the line(s) up if colouring, and search for further
1866 matches, but not of course if the line is a non-match. */
1867
1868 if (do_colour && !invert)
1869 {
1870 int plength;
1871 FWRITE(ptr, 1, offsets[0], stdout);
1872 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1873 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1874 fprintf(stdout, "%c[00m", 0x1b);
1875 for (;;)
1876 {
1877 startoffset = offsets[1];
1878 if (startoffset >= (int)linelength + endlinelength ||
1879 !match_patterns(matchptr, length, options, startoffset, offsets,
1880 &mrc))
1881 break;
1882 FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1883 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1884 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1885 fprintf(stdout, "%c[00m", 0x1b);
1886 }
1887
1888 /* In multiline mode, we may have already printed the complete line
1889 and its line-ending characters (if they matched the pattern), so there
1890 may be no more to print. */
1891
1892 plength = (int)((linelength + endlinelength) - startoffset);
1893 if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1894 }
1895
1896 /* Not colouring; no need to search for further matches */
1897
1898 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1899 }
1900
1901 /* End of doing what has to be done for a match. If --line-buffered was
1902 given, flush the output. */
1903
1904 if (line_buffered) fflush(stdout);
1905 rc = 0; /* Had some success */
1906
1907 /* Remember where the last match happened for after_context. We remember
1908 where we are about to restart, and that line's number. */
1909
1910 lastmatchrestart = ptr + linelength + endlinelength;
1911 lastmatchnumber = linenumber + 1;
1912 }
1913
1914 /* For a match in multiline inverted mode (which of course did not cause
1915 anything to be printed), we have to move on to the end of the match before
1916 proceeding. */
1917
1918 if (multiline && invert && match)
1919 {
1920 int ellength;
1921 char *endmatch = ptr + offsets[1];
1922 t = ptr;
1923 while (t < endmatch)
1924 {
1925 t = end_of_line(t, endptr, &ellength);
1926 if (t <= endmatch) linenumber++; else break;
1927 }
1928 endmatch = end_of_line(endmatch, endptr, &ellength);
1929 linelength = endmatch - ptr - ellength;
1930 }
1931
1932 /* Advance to after the newline and increment the line number. The file
1933 offset to the current line is maintained in filepos. */
1934
1935 ptr += linelength + endlinelength;
1936 filepos += (int)(linelength + endlinelength);
1937 linenumber++;
1938
1939 /* If input is line buffered, and the buffer is not yet full, read another
1940 line and add it into the buffer. */
1941
1942 if (input_line_buffered && bufflength < (size_t)bufsize)
1943 {
1944 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1945 bufflength += add;
1946 endptr += add;
1947 }
1948
1949 /* If we haven't yet reached the end of the file (the buffer is full), and
1950 the current point is in the top 1/3 of the buffer, slide the buffer down by
1951 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1952 about to be lost, print them. */
1953
1954 if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1955 {
1956 if (after_context > 0 &&
1957 lastmatchnumber > 0 &&
1958 lastmatchrestart < main_buffer + bufthird)
1959 {
1960 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1961 lastmatchnumber = 0;
1962 }
1963
1964 /* Now do the shuffle */
1965
1966 memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1967 ptr -= bufthird;
1968
1969 #ifdef SUPPORT_LIBZ
1970 if (frtype == FR_LIBZ)
1971 bufflength = 2*bufthird +
1972 gzread (ingz, main_buffer + 2*bufthird, bufthird);
1973 else
1974 #endif
1975
1976 #ifdef SUPPORT_LIBBZ2
1977 if (frtype == FR_LIBBZ2)
1978 bufflength = 2*bufthird +
1979 BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1980 else
1981 #endif
1982
1983 bufflength = 2*bufthird +
1984 (input_line_buffered?
1985 read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1986 fread(main_buffer + 2*bufthird, 1, bufthird, in));
1987 endptr = main_buffer + bufflength;
1988
1989 /* Adjust any last match point */
1990
1991 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1992 }
1993 } /* Loop through the whole file */
1994
1995 /* End of file; print final "after" lines if wanted; do_after_lines sets
1996 hyphenpending if it prints something. */
1997
1998 if (!show_only_matching && !count_only)
1999 {
2000 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2001 hyphenpending |= endhyphenpending;
2002 }
2003
2004 /* Print the file name if we are looking for those without matches and there
2005 were none. If we found a match, we won't have got this far. */
2006
2007 if (filenames == FN_NOMATCH_ONLY)
2008 {
2009 fprintf(stdout, "%s\n", printname);
2010 return 0;
2011 }
2012
2013 /* Print the match count if wanted */
2014
2015 if (count_only)
2016 {
2017 if (count > 0 || !omit_zero_count)
2018 {
2019 if (printname != NULL && filenames != FN_NONE)
2020 fprintf(stdout, "%s:", printname);
2021 fprintf(stdout, "%d\n", count);
2022 }
2023 }
2024
2025 return rc;
2026 }
2027
2028
2029
2030 /*************************************************
2031 * Grep a file or recurse into a directory *
2032 *************************************************/
2033
2034 /* Given a path name, if it's a directory, scan all the files if we are
2035 recursing; if it's a file, grep it.
2036
2037 Arguments:
2038 pathname the path to investigate
2039 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
2040 only_one_at_top TRUE if the path is the only one at toplevel
2041
2042 Returns: -1 the file/directory was skipped
2043 0 if there was at least one match
2044 1 if there were no matches
2045 2 there was some kind of error
2046
2047 However, file opening failures are suppressed if "silent" is set.
2048 */
2049
2050 static int
2051 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
2052 {
2053 int rc = 1;
2054 int frtype;
2055 void *handle;
2056 char *lastcomp;
2057 FILE *in = NULL; /* Ensure initialized */
2058
2059 #ifdef SUPPORT_LIBZ
2060 gzFile ingz = NULL;
2061 #endif
2062
2063 #ifdef SUPPORT_LIBBZ2
2064 BZFILE *inbz2 = NULL;
2065 #endif
2066
2067 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2068 int pathlen;
2069 #endif
2070
2071 /* If the file name is "-" we scan stdin */
2072
2073 if (strcmp(pathname, "-") == 0)
2074 {
2075 return pcregrep(stdin, FR_PLAIN, stdin_name,
2076 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
2077 stdin_name : NULL);
2078 }
2079
2080 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
2081 directories, whereas --include and --exclude apply to everything else. The test
2082 is against the final component of the path. */
2083
2084 lastcomp = strrchr(pathname, FILESEP);
2085 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
2086
2087 /* If the file is a directory, skip if not recursing or if explicitly excluded.
2088 Otherwise, scan the directory and recurse for each path within it. The scanning
2089 code is localized so it can be made system-specific. */
2090
2091 if (isdirectory(pathname))
2092 {
2093 if (dee_action == dee_SKIP ||
2094 !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
2095 return -1;
2096
2097 if (dee_action == dee_RECURSE)
2098 {
2099 char buffer[1024];
2100 char *nextfile;
2101 directory_type *dir = opendirectory(pathname);
2102
2103 if (dir == NULL)
2104 {
2105 if (!silent)
2106 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
2107 strerror(errno));
2108 return 2;
2109 }
2110
2111 while ((nextfile = readdirectory(dir)) != NULL)
2112 {
2113 int frc;
2114 sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
2115 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
2116 if (frc > 1) rc = frc;
2117 else if (frc == 0 && rc == 1) rc = 0;
2118 }
2119
2120 closedirectory(dir);
2121 return rc;
2122 }
2123 }
2124
2125 /* If the file is not a directory and not a regular file, skip it if that's
2126 been requested. Otherwise, check for explicit include/exclude. */
2127
2128 else if ((!isregfile(pathname) && DEE_action == DEE_SKIP) ||
2129 !test_incexc(lastcomp, include_patterns, exclude_patterns))
2130 return -1;
2131
2132 /* Control reaches here if we have a regular file, or if we have a directory
2133 and recursion or skipping was not requested, or if we have anything else and
2134 skipping was not requested. The scan proceeds. If this is the first and only
2135 argument at top level, we don't show the file name, unless we are only showing
2136 the file name, or the filename was forced (-H). */
2137
2138 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2139 pathlen = (int)(strlen(pathname));
2140 #endif
2141
2142 /* Open using zlib if it is supported and the file name ends with .gz. */
2143
2144 #ifdef SUPPORT_LIBZ
2145 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
2146 {
2147 ingz = gzopen(pathname, "rb");
2148 if (ingz == NULL)
2149 {
2150 if (!silent)
2151 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2152 strerror(errno));
2153 return 2;
2154 }
2155 handle = (void *)ingz;
2156 frtype = FR_LIBZ;
2157 }
2158 else
2159 #endif
2160
2161 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
2162
2163 #ifdef SUPPORT_LIBBZ2
2164 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
2165 {
2166 inbz2 = BZ2_bzopen(pathname, "rb");
2167 handle = (void *)inbz2;
2168 frtype = FR_LIBBZ2;
2169 }
2170 else
2171 #endif
2172
2173 /* Otherwise use plain fopen(). The label is so that we can come back here if
2174 an attempt to read a .bz2 file indicates that it really is a plain file. */
2175
2176 #ifdef SUPPORT_LIBBZ2
2177 PLAIN_FILE:
2178 #endif
2179 {
2180 in = fopen(pathname, "rb");
2181 handle = (void *)in;
2182 frtype = FR_PLAIN;
2183 }
2184
2185 /* All the opening methods return errno when they fail. */
2186
2187 if (handle == NULL)
2188 {
2189 if (!silent)
2190 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2191 strerror(errno));
2192 return 2;
2193 }
2194
2195 /* Now grep the file */
2196
2197 rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
2198 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
2199
2200 /* Close in an appropriate manner. */
2201
2202 #ifdef SUPPORT_LIBZ
2203 if (frtype == FR_LIBZ)
2204 gzclose(ingz);
2205 else
2206 #endif
2207
2208 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
2209 read failed. If the error indicates that the file isn't in fact bzipped, try
2210 again as a normal file. */
2211
2212 #ifdef SUPPORT_LIBBZ2
2213 if (frtype == FR_LIBBZ2)
2214 {
2215 if (rc == 3)
2216 {
2217 int errnum;
2218 const char *err = BZ2_bzerror(inbz2, &errnum);
2219 if (errnum == BZ_DATA_ERROR_MAGIC)
2220 {
2221 BZ2_bzclose(inbz2);
2222 goto PLAIN_FILE;
2223 }
2224 else if (!silent)
2225 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
2226 pathname, err);
2227 rc = 2; /* The normal "something went wrong" code */
2228 }
2229 BZ2_bzclose(inbz2);
2230 }
2231 else
2232 #endif
2233
2234 /* Normal file close */
2235
2236 fclose(in);
2237
2238 /* Pass back the yield from pcregrep(). */
2239
2240 return rc;
2241 }
2242
2243
2244
2245 /*************************************************
2246 * Handle a single-letter, no data option *
2247 *************************************************/
2248
2249 static int
2250 handle_option(int letter, int options)
2251 {
2252 switch(letter)
2253 {
2254 case N_FOFFSETS: file_offsets = TRUE; break;
2255 case N_HELP: help(); pcregrep_exit(0);
2256 case N_LBUFFER: line_buffered = TRUE; break;
2257 case N_LOFFSETS: line_offsets = number = TRUE; break;
2258 case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2259 case 'a': binary_files = BIN_TEXT; break;
2260 case 'c': count_only = TRUE; break;
2261 case 'F': process_options |= PO_FIXED_STRINGS; break;
2262 case 'H': filenames = FN_FORCE; break;
2263 case 'I': binary_files = BIN_NOMATCH; break;
2264 case 'h': filenames = FN_NONE; break;
2265 case 'i': options |= PCRE_CASELESS; break;
2266 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2267 case 'L': filenames = FN_NOMATCH_ONLY; break;
2268 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2269 case 'n': number = TRUE; break;
2270
2271 case 'o':
2272 only_matching_last = add_number(0, only_matching_last);
2273 if (only_matching == NULL) only_matching = only_matching_last;
2274 break;
2275
2276 case 'q': quiet = TRUE; break;
2277 case 'r': dee_action = dee_RECURSE; break;
2278 case 's': silent = TRUE; break;
2279 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
2280 case 'v': invert = TRUE; break;
2281 case 'w': process_options |= PO_WORD_MATCH; break;
2282 case 'x': process_options |= PO_LINE_MATCH; break;
2283
2284 case 'V':
2285 fprintf(stdout, "pcregrep version %s\n", pcre_version());
2286 pcregrep_exit(0);
2287 break;
2288
2289 default:
2290 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2291 pcregrep_exit(usage(2));
2292 }
2293
2294 return options;
2295 }
2296
2297
2298
2299
2300 /*************************************************
2301 * Construct printed ordinal *
2302 *************************************************/
2303
2304 /* This turns a number into "1st", "3rd", etc. */
2305
2306 static char *
2307 ordin(int n)
2308 {
2309 static char buffer[8];
2310 char *p = buffer;
2311 sprintf(p, "%d", n);
2312 while (*p != 0) p++;
2313 switch (n%10)
2314 {
2315 case 1: strcpy(p, "st"); break;
2316 case 2: strcpy(p, "nd"); break;
2317 case 3: strcpy(p, "rd"); break;
2318 default: strcpy(p, "th"); break;
2319 }
2320 return buffer;
2321 }
2322
2323
2324
2325 /*************************************************
2326 * Compile a single pattern *
2327 *************************************************/
2328
2329 /* Do nothing if the pattern has already been compiled. This is the case for
2330 include/exclude patterns read from a file.
2331
2332 When the -F option has been used, each "pattern" may be a list of strings,
2333 separated by line breaks. They will be matched literally. We split such a
2334 string and compile the first substring, inserting an additional block into the
2335 pattern chain.
2336
2337 Arguments:
2338 p points to the pattern block
2339 options the PCRE options
2340 popts the processing options
2341 fromfile TRUE if the pattern was read from a file
2342 fromtext file name or identifying text (e.g. "include")
2343 count 0 if this is the only command line pattern, or
2344 number of the command line pattern, or
2345 linenumber for a pattern from a file
2346
2347 Returns: TRUE on success, FALSE after an error
2348 */
2349
2350 static BOOL
2351 compile_pattern(patstr *p, int options, int popts, int fromfile,
2352 const char *fromtext, int count)
2353 {
2354 char buffer[PATBUFSIZE];
2355 const char *error;
2356 char *ps = p->string;
2357 int patlen = strlen(ps);
2358 int errptr;
2359
2360 if (p->compiled != NULL) return TRUE;
2361
2362 if ((popts & PO_FIXED_STRINGS) != 0)
2363 {
2364 int ellength;
2365 char *eop = ps + patlen;
2366 char *pe = end_of_line(ps, eop, &ellength);
2367
2368 if (ellength != 0)
2369 {
2370 if (add_pattern(pe, p) == NULL) return FALSE;
2371 patlen = (int)(pe - ps - ellength);
2372 }
2373 }
2374
2375 sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2376 p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2377 if (p->compiled != NULL) return TRUE;
2378
2379 /* Handle compile errors */
2380
2381 errptr -= (int)strlen(prefix[popts]);
2382 if (errptr > patlen) errptr = patlen;
2383
2384 if (fromfile)
2385 {
2386 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2387 "at offset %d: %s\n", count, fromtext, errptr, error);
2388 }
2389 else
2390 {
2391 if (count == 0)
2392 fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2393 fromtext, errptr, error);
2394 else
2395 fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2396 ordin(count), fromtext, errptr, error);
2397 }
2398
2399 return FALSE;
2400 }
2401
2402
2403
2404 /*************************************************
2405 * Read and compile a file of patterns *
2406 *************************************************/
2407
2408 /* This is used for --filelist, --include-from, and --exclude-from.
2409
2410 Arguments:
2411 name the name of the file; "-" is stdin
2412 patptr pointer to the pattern chain anchor
2413 patlastptr pointer to the last pattern pointer
2414 popts the process options to pass to pattern_compile()
2415
2416 Returns: TRUE if all went well
2417 */
2418
2419 static BOOL
2420 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2421 {
2422 int linenumber = 0;
2423 FILE *f;
2424 char *filename;
2425 char buffer[PATBUFSIZE];
2426
2427 if (strcmp(name, "-") == 0)
2428 {
2429 f = stdin;
2430 filename = stdin_name;
2431 }
2432 else
2433 {
2434 f = fopen(name, "r");
2435 if (f == NULL)
2436 {
2437 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2438 return FALSE;
2439 }
2440 filename = name;
2441 }
2442
2443 while (fgets(buffer, PATBUFSIZE, f) != NULL)
2444 {
2445 char *s = buffer + (int)strlen(buffer);
2446 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2447 *s = 0;
2448 linenumber++;
2449 if (buffer[0] == 0) continue; /* Skip blank lines */
2450
2451 /* Note: this call to add_pattern() puts a pointer to the local variable
2452 "buffer" into the pattern chain. However, that pointer is used only when
2453 compiling the pattern, which happens immediately below, so we flatten it
2454 afterwards, as a precaution against any later code trying to use it. */
2455
2456 *patlastptr = add_pattern(buffer, *patlastptr);
2457 if (*patlastptr == NULL) return FALSE;
2458 if (*patptr == NULL) *patptr = *patlastptr;
2459
2460 /* This loop is needed because compiling a "pattern" when -F is set may add
2461 on additional literal patterns if the original contains a newline. In the
2462 common case, it never will, because fgets() stops at a newline. However,
2463 the -N option can be used to give pcregrep a different newline setting. */
2464
2465 for(;;)
2466 {
2467 if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2468 linenumber))
2469 return FALSE;
2470 (*patlastptr)->string = NULL; /* Insurance */
2471 if ((*patlastptr)->next == NULL) break;
2472 *patlastptr = (*patlastptr)->next;
2473 }
2474 }
2475
2476 if (f != stdin) fclose(f);
2477 return TRUE;
2478 }
2479
2480
2481
2482 /*************************************************
2483 * Main program *
2484 *************************************************/
2485
2486 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2487
2488 int
2489 main(int argc, char **argv)
2490 {
2491 int i, j;
2492 int rc = 1;
2493 BOOL only_one_at_top;
2494 patstr *cp;
2495 fnstr *fn;
2496 const char *locale_from = "--locale";
2497 const char *error;
2498
2499 #ifdef SUPPORT_PCREGREP_JIT
2500 pcre_jit_stack *jit_stack = NULL;
2501 #endif
2502
2503 /* Set the default line ending value from the default in the PCRE library;
2504 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2505 Note that the return values from pcre_config(), though derived from the ASCII
2506 codes, are the same in EBCDIC environments, so we must use the actual values
2507 rather than escapes such as as '\r'. */
2508
2509 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2510 switch(i)
2511 {
2512 default: newline = (char *)"lf"; break;
2513 case 13: newline = (char *)"cr"; break;
2514 case (13 << 8) | 10: newline = (char *)"crlf"; break;
2515 case -1: newline = (char *)"any"; break;
2516 case -2: newline = (char *)"anycrlf"; break;
2517 }
2518
2519 /* Process the options */
2520
2521 for (i = 1; i < argc; i++)
2522 {
2523 option_item *op = NULL;
2524 char *option_data = (char *)""; /* default to keep compiler happy */
2525 BOOL longop;
2526 BOOL longopwasequals = FALSE;
2527
2528 if (argv[i][0] != '-') break;
2529
2530 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2531 but only if we have previously had -e or -f to define the patterns. */
2532
2533 if (argv[i][1] == 0)
2534 {
2535 if (pattern_files != NULL || patterns != NULL) break;
2536 else pcregrep_exit(usage(2));
2537 }
2538
2539 /* Handle a long name option, or -- to terminate the options */
2540
2541 if (argv[i][1] == '-')
2542 {
2543 char *arg = argv[i] + 2;
2544 char *argequals = strchr(arg, '=');
2545
2546 if (*arg == 0) /* -- terminates options */
2547 {
2548 i++;
2549 break; /* out of the options-handling loop */
2550 }
2551
2552 longop = TRUE;
2553
2554 /* Some long options have data that follows after =, for example file=name.
2555 Some options have variations in the long name spelling: specifically, we
2556 allow "regexp" because GNU grep allows it, though I personally go along
2557 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2558 These options are entered in the table as "regex(p)". Options can be in
2559 both these categories. */
2560
2561 for (op = optionlist; op->one_char != 0; op++)
2562 {
2563 char *opbra = strchr(op->long_name, '(');
2564 char *equals = strchr(op->long_name, '=');
2565
2566 /* Handle options with only one spelling of the name */
2567
2568 if (opbra == NULL) /* Does not contain '(' */
2569 {
2570 if (equals == NULL) /* Not thing=data case */
2571 {
2572 if (strcmp(arg, op->long_name) == 0) break;
2573 }
2574 else /* Special case xxx=data */
2575 {
2576 int oplen = (int)(equals - op->long_name);
2577 int arglen = (argequals == NULL)?
2578 (int)strlen(arg) : (int)(argequals - arg);
2579 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2580 {
2581 option_data = arg + arglen;
2582 if (*option_data == '=')
2583 {
2584 option_data++;
2585 longopwasequals = TRUE;
2586 }
2587 break;
2588 }
2589 }
2590 }
2591
2592 /* Handle options with an alternate spelling of the name */
2593
2594 else
2595 {
2596 char buff1[24];
2597 char buff2[24];
2598
2599 int baselen = (int)(opbra - op->long_name);
2600 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2601 int arglen = (argequals == NULL || equals == NULL)?
2602 (int)strlen(arg) : (int)(argequals - arg);
2603
2604 sprintf(buff1, "%.*s", baselen, op->long_name);
2605 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2606
2607 if (strncmp(arg, buff1, arglen) == 0 ||
2608 strncmp(arg, buff2, arglen) == 0)
2609 {
2610 if (equals != NULL && argequals != NULL)
2611 {
2612 option_data = argequals;
2613 if (*option_data == '=')
2614 {
2615 option_data++;
2616 longopwasequals = TRUE;
2617 }
2618 }
2619 break;
2620 }
2621 }
2622 }
2623
2624 if (op->one_char == 0)
2625 {
2626 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2627 pcregrep_exit(usage(2));
2628 }
2629 }
2630
2631 /* Jeffrey Friedl's debugging harness uses these additional options which
2632 are not in the right form for putting in the option table because they use
2633 only one hyphen, yet are more than one character long. By putting them
2634 separately here, they will not get displayed as part of the help() output,
2635 but I don't think Jeffrey will care about that. */
2636
2637 #ifdef JFRIEDL_DEBUG
2638 else if (strcmp(argv[i], "-pre") == 0) {
2639 jfriedl_prefix = argv[++i];
2640 continue;
2641 } else if (strcmp(argv[i], "-post") == 0) {
2642 jfriedl_postfix = argv[++i];
2643 continue;
2644 } else if (strcmp(argv[i], "-XT") == 0) {
2645 sscanf(argv[++i], "%d", &jfriedl_XT);
2646 continue;
2647 } else if (strcmp(argv[i], "-XR") == 0) {
2648 sscanf(argv[++i], "%d", &jfriedl_XR);
2649 continue;
2650 }
2651 #endif
2652
2653
2654 /* One-char options; many that have no data may be in a single argument; we
2655 continue till we hit the last one or one that needs data. */
2656
2657 else
2658 {
2659 char *s = argv[i] + 1;
2660 longop = FALSE;
2661
2662 while (*s != 0)
2663 {
2664 for (op = optionlist; op->one_char != 0; op++)
2665 {
2666 if (*s == op->one_char) break;
2667 }
2668 if (op->one_char == 0)
2669 {
2670 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2671 *s, argv[i]);
2672 pcregrep_exit(usage(2));
2673 }
2674
2675 option_data = s+1;
2676
2677 /* Break out if this is the last character in the string; it's handled
2678 below like a single multi-char option. */
2679
2680 if (*option_data == 0) break;
2681
2682 /* Check for a single-character option that has data: OP_OP_NUMBER(S)
2683 are used for ones that either have a numerical number or defaults, i.e.
2684 the data is optional. If a digit follows, there is data; if not, carry on
2685 with other single-character options in the same string. */
2686
2687 if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
2688 {
2689 if (isdigit((unsigned char)s[1])) break;
2690 }
2691 else /* Check for an option with data */
2692 {
2693 if (op->type != OP_NODATA) break;
2694 }
2695
2696 /* Handle a single-character option with no data, then loop for the
2697 next character in the string. */
2698
2699 pcre_options = handle_option(*s++, pcre_options);
2700 }
2701 }
2702
2703 /* At this point we should have op pointing to a matched option. If the type
2704 is NO_DATA, it means that there is no data, and the option might set
2705 something in the PCRE options. */
2706
2707 if (op->type == OP_NODATA)
2708 {
2709 pcre_options = handle_option(op->one_char, pcre_options);
2710 continue;
2711 }
2712
2713 /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
2714 either has a value or defaults to something. It cannot have data in a
2715 separate item. At the moment, the only such options are "colo(u)r",
2716 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2717
2718 if (*option_data == 0 &&
2719 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
2720 op->type == OP_OP_NUMBERS))
2721 {
2722 switch (op->one_char)
2723 {
2724 case N_COLOUR:
2725 colour_option = (char *)"auto";
2726 break;
2727
2728 case 'o':
2729 only_matching_last = add_number(0, only_matching_last);
2730 if (only_matching == NULL) only_matching = only_matching_last;
2731 break;
2732
2733 #ifdef JFRIEDL_DEBUG
2734 case 'S':
2735 S_arg = 0;
2736 break;
2737 #endif
2738 }
2739 continue;
2740 }
2741
2742 /* Otherwise, find the data string for the option. */
2743
2744 if (*option_data == 0)
2745 {
2746 if (i >= argc - 1 || longopwasequals)
2747 {
2748 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2749 pcregrep_exit(usage(2));
2750 }
2751 option_data = argv[++i];
2752 }
2753
2754 /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
2755 added to a chain of numbers. */
2756
2757 if (op->type == OP_OP_NUMBERS)
2758 {
2759 unsigned long int n = decode_number(option_data, op, longop);
2760 omdatastr *omd = (omdatastr *)op->dataptr;
2761 *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
2762 if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
2763 }
2764
2765 /* If the option type is OP_PATLIST, it's the -e option, or one of the
2766 include/exclude options, which can be called multiple times to create lists
2767 of patterns. */
2768
2769 else if (op->type == OP_PATLIST)
2770 {
2771 patdatastr *pd = (patdatastr *)op->dataptr;
2772 *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2773 if (*(pd->lastptr) == NULL) goto EXIT2;
2774 if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2775 }
2776
2777 /* If the option type is OP_FILELIST, it's one of the options that names a
2778 file. */
2779
2780 else if (op->type == OP_FILELIST)
2781 {
2782 fndatastr *fd = (fndatastr *)op->dataptr;
2783 fn = (fnstr *)malloc(sizeof(fnstr));
2784 if (fn == NULL)
2785 {
2786 fprintf(stderr, "pcregrep: malloc failed\n");
2787 goto EXIT2;
2788 }
2789 fn->next = NULL;
2790 fn->name = option_data;
2791 if (*(fd->anchor) == NULL)
2792 *(fd->anchor) = fn;
2793 else
2794 (*(fd->lastptr))->next = fn;
2795 *(fd->lastptr) = fn;
2796 }
2797
2798 /* Handle OP_BINARY_FILES */
2799
2800 else if (op->type == OP_BINFILES)
2801 {
2802 if (strcmp(option_data, "binary") == 0)
2803 binary_files = BIN_BINARY;
2804 else if (strcmp(option_data, "without-match") == 0)
2805 binary_files = BIN_NOMATCH;
2806 else if (strcmp(option_data, "text") == 0)
2807 binary_files = BIN_TEXT;
2808 else
2809 {
2810 fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2811 option_data);
2812 pcregrep_exit(usage(2));
2813 }
2814 }
2815
2816 /* Otherwise, deal with a single string or numeric data value. */
2817
2818 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2819 op->type != OP_OP_NUMBER)
2820 {
2821 *((char **)op->dataptr) = option_data;
2822 }
2823 else
2824 {
2825 unsigned long int n = decode_number(option_data, op, longop);
2826 if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n;
2827 else *((int *)op->dataptr) = n;
2828 }
2829 }
2830
2831 /* Options have been decoded. If -C was used, its value is used as a default
2832 for -A and -B. */
2833
2834 if (both_context > 0)
2835 {
2836 if (after_context == 0) after_context = both_context;
2837 if (before_context == 0) before_context = both_context;
2838 }
2839
2840 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2841 However, all three set show_only_matching because they display, each in their
2842 own way, only the data that has matched. */
2843
2844 if ((only_matching != NULL && (file_offsets || line_offsets)) ||
2845 (file_offsets && line_offsets))
2846 {
2847 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2848 "and/or --line-offsets\n");
2849 pcregrep_exit(usage(2));
2850 }
2851
2852 if (only_matching != NULL || file_offsets || line_offsets)
2853 show_only_matching = TRUE;
2854
2855 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2856 LC_ALL environment variable is set, and if so, use it. */
2857
2858 if (locale == NULL)
2859 {
2860 locale = getenv("LC_ALL");
2861 locale_from = "LCC_ALL";
2862 }
2863
2864 if (locale == NULL)
2865 {
2866 locale = getenv("LC_CTYPE");
2867 locale_from = "LC_CTYPE";
2868 }
2869
2870 /* If a locale has been provided, set it, and generate the tables the PCRE
2871 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2872
2873 if (locale != NULL)
2874 {
2875 if (setlocale(LC_CTYPE, locale) == NULL)
2876 {
2877 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2878 locale, locale_from);
2879 return 2;
2880 }
2881 pcretables = pcre_maketables();
2882 }
2883
2884 /* Sort out colouring */
2885
2886 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2887 {
2888 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2889 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2890 else
2891 {
2892 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2893 colour_option);
2894 return 2;
2895 }
2896 if (do_colour)
2897 {
2898 char *cs = getenv("PCREGREP_COLOUR");
2899 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2900 if (cs != NULL) colour_string = cs;
2901 }
2902 }
2903
2904 /* Interpret the newline type; the default settings are Unix-like. */
2905
2906 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2907 {
2908 pcre_options |= PCRE_NEWLINE_CR;
2909 endlinetype = EL_CR;
2910 }
2911 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2912 {
2913 pcre_options |= PCRE_NEWLINE_LF;
2914 endlinetype = EL_LF;
2915 }
2916 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2917 {
2918 pcre_options |= PCRE_NEWLINE_CRLF;
2919 endlinetype = EL_CRLF;
2920 }
2921 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2922 {
2923 pcre_options |= PCRE_NEWLINE_ANY;
2924 endlinetype = EL_ANY;
2925 }
2926 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2927 {
2928 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2929 endlinetype = EL_ANYCRLF;
2930 }
2931 else
2932 {
2933 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2934 return 2;
2935 }
2936
2937 /* Interpret the text values for -d and -D */
2938
2939 if (dee_option != NULL)
2940 {
2941 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2942 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2943 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2944 else
2945 {
2946 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2947 return 2;
2948 }
2949 }
2950
2951 if (DEE_option != NULL)
2952 {
2953 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2954 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2955 else
2956 {
2957 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2958 return 2;
2959 }
2960 }
2961
2962 /* Check the values for Jeffrey Friedl's debugging options. */
2963
2964 #ifdef JFRIEDL_DEBUG
2965 if (S_arg > 9)
2966 {
2967 fprintf(stderr, "pcregrep: bad value for -S option\n");
2968 return 2;
2969 }
2970 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2971 {
2972 if (jfriedl_XT == 0) jfriedl_XT = 1;
2973 if (jfriedl_XR == 0) jfriedl_XR = 1;
2974 }
2975 #endif
2976
2977 /* Get memory for the main buffer. */
2978
2979 bufsize = 3*bufthird;
2980 main_buffer = (char *)malloc(bufsize);
2981
2982 if (main_buffer == NULL)
2983 {
2984 fprintf(stderr, "pcregrep: malloc failed\n");
2985 goto EXIT2;
2986 }
2987
2988 /* If no patterns were provided by -e, and there are no files provided by -f,
2989 the first argument is the one and only pattern, and it must exist. */
2990
2991 if (patterns == NULL && pattern_files == NULL)
2992 {
2993 if (i >= argc) return usage(2);
2994 patterns = patterns_last = add_pattern(argv[i++], NULL);
2995 if (patterns == NULL) goto EXIT2;
2996 }
2997
2998 /* Compile the patterns that were provided on the command line, either by
2999 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
3000 after all the command-line options are read so that we know which PCRE options
3001 to use. When -F is used, compile_pattern() may add another block into the
3002 chain, so we must not access the next pointer till after the compile. */
3003
3004 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3005 {
3006 if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
3007 (j == 1 && patterns->next == NULL)? 0 : j))
3008 goto EXIT2;
3009 }
3010
3011 /* Read and compile the regular expressions that are provided in files. */
3012
3013 for (fn = pattern_files; fn != NULL; fn = fn->next)
3014 {
3015 if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
3016 goto EXIT2;
3017 }
3018
3019 /* Study the regular expressions, as we will be running them many times. If an
3020 extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
3021 returned, even if studying produces no data. */
3022
3023 if (match_limit > 0 || match_limit_recursion > 0)
3024 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3025
3026 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
3027
3028 #ifdef SUPPORT_PCREGREP_JIT
3029 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3030 jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
3031 #endif
3032
3033 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3034 {
3035 cp->hint = pcre_study(cp->compiled, study_options, &error);
3036 if (error != NULL)
3037 {
3038 char s[16];
3039 if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
3040 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
3041 goto EXIT2;
3042 }
3043 #ifdef SUPPORT_PCREGREP_JIT
3044 if (jit_stack != NULL && cp->hint != NULL)
3045 pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
3046 #endif
3047 }
3048
3049 /* If --match-limit or --recursion-limit was set, put the value(s) into the
3050 pcre_extra block for each pattern. There will always be an extra block because
3051 of the use of PCRE_STUDY_EXTRA_NEEDED above. */
3052
3053 for (cp = patterns; cp != NULL; cp = cp->next)
3054 {
3055 if (match_limit > 0)
3056 {
3057 cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
3058 cp->hint->match_limit = match_limit;
3059 }
3060
3061 if (match_limit_recursion > 0)
3062 {
3063 cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3064 cp->hint->match_limit_recursion = match_limit_recursion;
3065 }
3066 }
3067
3068 /* If there are include or exclude patterns read from the command line, compile
3069 them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
3070 0. */
3071
3072 for (j = 0; j < 4; j++)
3073 {
3074 int k;
3075 for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
3076 {
3077 if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
3078 (k == 1 && cp->next == NULL)? 0 : k))
3079 goto EXIT2;
3080 }
3081 }
3082
3083 /* Read and compile include/exclude patterns from files. */
3084
3085 for (fn = include_from; fn != NULL; fn = fn->next)
3086 {
3087 if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
3088 goto EXIT2;
3089 }
3090
3091 for (fn = exclude_from; fn != NULL; fn = fn->next)
3092 {
3093 if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
3094 goto EXIT2;
3095 }
3096
3097 /* If there are no files that contain lists of files to search, and there are
3098 no file arguments, search stdin, and then exit. */
3099
3100 if (file_lists == NULL && i >= argc)
3101 {
3102 rc = pcregrep(stdin, FR_PLAIN, stdin_name,
3103 (filenames > FN_DEFAULT)? stdin_name : NULL);
3104 goto EXIT;
3105 }
3106
3107 /* If any files that contains a list of files to search have been specified,
3108 read them line by line and search the given files. */
3109
3110 for (fn = file_lists; fn != NULL; fn = fn->next)
3111 {
3112 char buffer[PATBUFSIZE];
3113 FILE *fl;
3114 if (strcmp(fn->name, "-") == 0) fl = stdin; else
3115 {
3116 fl = fopen(fn->name, "rb");
3117 if (fl == NULL)
3118 {
3119 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
3120 strerror(errno));
3121 goto EXIT2;
3122 }
3123 }
3124 while (fgets(buffer, PATBUFSIZE, fl) != NULL)
3125 {
3126 int frc;
3127 char *end = buffer + (int)strlen(buffer);
3128 while (end > buffer && isspace(end[-1])) end--;
3129 *end = 0;
3130 if (*buffer != 0)
3131 {
3132 frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3133 if (frc > 1) rc = frc;
3134 else if (frc == 0 && rc == 1) rc = 0;
3135 }
3136 }
3137 if (fl != stdin) fclose(fl);
3138 }
3139
3140 /* After handling file-list, work through remaining arguments. Pass in the fact
3141 that there is only one argument at top level - this suppresses the file name if
3142 the argument is not a directory and filenames are not otherwise forced. */
3143
3144 only_one_at_top = i == argc - 1 && file_lists == NULL;
3145
3146 for (; i < argc; i++)
3147 {
3148 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
3149 only_one_at_top);
3150 if (frc > 1) rc = frc;
3151 else if (frc == 0 && rc == 1) rc = 0;
3152 }
3153
3154 EXIT:
3155 #ifdef SUPPORT_PCREGREP_JIT
3156 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3157 #endif
3158
3159 if (main_buffer != NULL) free(main_buffer);
3160
3161 free_pattern_chain(patterns);
3162 free_pattern_chain(include_patterns);
3163 free_pattern_chain(include_dir_patterns);
3164 free_pattern_chain(exclude_patterns);
3165 free_pattern_chain(exclude_dir_patterns);
3166
3167 free_file_chain(exclude_from);
3168 free_file_chain(include_from);
3169 free_file_chain(pattern_files);
3170 free_file_chain(file_lists);
3171
3172 while (only_matching != NULL)
3173 {
3174 omstr *this = only_matching;
3175 only_matching = this->next;
3176 free(this);
3177 }
3178
3179 pcregrep_exit(rc);
3180
3181 EXIT2:
3182 rc = 2;
3183 goto EXIT;
3184 }
3185
3186 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5