/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1096 - (show annotations)
Tue Oct 16 15:56:09 2012 UTC (7 years, 1 month ago) by chpe
File MIME type: text/plain
File size: 92271 byte(s)
Error occurred while calculating annotation data.
pcre32: pcregrep: Use uint32 for characters
1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2012 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65
66 #include "pcre.h"
67
68 #define FALSE 0
69 #define TRUE 1
70
71 typedef int BOOL;
72
73 #define OFFSET_SIZE 99
74
75 #if BUFSIZ > 8192
76 #define MAXPATLEN BUFSIZ
77 #else
78 #define MAXPATLEN 8192
79 #endif
80
81 #define PATBUFSIZE (MAXPATLEN + 10) /* Allows for prefix+suffix */
82
83 /* Values for the "filenames" variable, which specifies options for file name
84 output. The order is important; it is assumed that a file name is wanted for
85 all values greater than FN_DEFAULT. */
86
87 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
88
89 /* File reading styles */
90
91 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
92
93 /* Actions for the -d and -D options */
94
95 enum { dee_READ, dee_SKIP, dee_RECURSE };
96 enum { DEE_READ, DEE_SKIP };
97
98 /* Actions for special processing options (flag bits) */
99
100 #define PO_WORD_MATCH 0x0001
101 #define PO_LINE_MATCH 0x0002
102 #define PO_FIXED_STRINGS 0x0004
103
104 /* Line ending types */
105
106 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
107
108 /* Binary file options */
109
110 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
111
112 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
113 environments), a warning is issued if the value of fwrite() is ignored.
114 Unfortunately, casting to (void) does not suppress the warning. To get round
115 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
116 apply to fprintf(). */
117
118 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
119
120
121
122 /*************************************************
123 * Global variables *
124 *************************************************/
125
126 /* Jeffrey Friedl has some debugging requirements that are not part of the
127 regular code. */
128
129 #ifdef JFRIEDL_DEBUG
130 static int S_arg = -1;
131 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
132 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
133 static const char *jfriedl_prefix = "";
134 static const char *jfriedl_postfix = "";
135 #endif
136
137 static int endlinetype;
138
139 static char *colour_string = (char *)"1;31";
140 static char *colour_option = NULL;
141 static char *dee_option = NULL;
142 static char *DEE_option = NULL;
143 static char *locale = NULL;
144 static char *main_buffer = NULL;
145 static char *newline = NULL;
146 static char *om_separator = (char *)"";
147 static char *stdin_name = (char *)"(standard input)";
148
149 static const unsigned char *pcretables = NULL;
150
151 static int after_context = 0;
152 static int before_context = 0;
153 static int binary_files = BIN_BINARY;
154 static int both_context = 0;
155 static int bufthird = PCREGREP_BUFSIZE;
156 static int bufsize = 3*PCREGREP_BUFSIZE;
157
158 #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
159 static int dee_action = dee_SKIP;
160 #else
161 static int dee_action = dee_READ;
162 #endif
163
164 static int DEE_action = DEE_READ;
165 static int error_count = 0;
166 static int filenames = FN_DEFAULT;
167 static int pcre_options = 0;
168 static int process_options = 0;
169
170 #ifdef SUPPORT_PCREGREP_JIT
171 static int study_options = PCRE_STUDY_JIT_COMPILE;
172 #else
173 static int study_options = 0;
174 #endif
175
176 static unsigned long int match_limit = 0;
177 static unsigned long int match_limit_recursion = 0;
178
179 static BOOL count_only = FALSE;
180 static BOOL do_colour = FALSE;
181 static BOOL file_offsets = FALSE;
182 static BOOL hyphenpending = FALSE;
183 static BOOL invert = FALSE;
184 static BOOL line_buffered = FALSE;
185 static BOOL line_offsets = FALSE;
186 static BOOL multiline = FALSE;
187 static BOOL number = FALSE;
188 static BOOL omit_zero_count = FALSE;
189 static BOOL resource_error = FALSE;
190 static BOOL quiet = FALSE;
191 static BOOL show_only_matching = FALSE;
192 static BOOL silent = FALSE;
193 static BOOL utf8 = FALSE;
194
195 /* Structure for list of --only-matching capturing numbers. */
196
197 typedef struct omstr {
198 struct omstr *next;
199 int groupnum;
200 } omstr;
201
202 static omstr *only_matching = NULL;
203 static omstr *only_matching_last = NULL;
204
205 /* Structure for holding the two variables that describe a number chain. */
206
207 typedef struct omdatastr {
208 omstr **anchor;
209 omstr **lastptr;
210 } omdatastr;
211
212 static omdatastr only_matching_data = { &only_matching, &only_matching_last };
213
214 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
215
216 typedef struct fnstr {
217 struct fnstr *next;
218 char *name;
219 } fnstr;
220
221 static fnstr *exclude_from = NULL;
222 static fnstr *exclude_from_last = NULL;
223 static fnstr *include_from = NULL;
224 static fnstr *include_from_last = NULL;
225
226 static fnstr *file_lists = NULL;
227 static fnstr *file_lists_last = NULL;
228 static fnstr *pattern_files = NULL;
229 static fnstr *pattern_files_last = NULL;
230
231 /* Structure for holding the two variables that describe a file name chain. */
232
233 typedef struct fndatastr {
234 fnstr **anchor;
235 fnstr **lastptr;
236 } fndatastr;
237
238 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
239 static fndatastr include_from_data = { &include_from, &include_from_last };
240 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
241 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
242
243 /* Structure for pattern and its compiled form; used for matching patterns and
244 also for include/exclude patterns. */
245
246 typedef struct patstr {
247 struct patstr *next;
248 char *string;
249 pcre *compiled;
250 pcre_extra *hint;
251 } patstr;
252
253 static patstr *patterns = NULL;
254 static patstr *patterns_last = NULL;
255 static patstr *include_patterns = NULL;
256 static patstr *include_patterns_last = NULL;
257 static patstr *exclude_patterns = NULL;
258 static patstr *exclude_patterns_last = NULL;
259 static patstr *include_dir_patterns = NULL;
260 static patstr *include_dir_patterns_last = NULL;
261 static patstr *exclude_dir_patterns = NULL;
262 static patstr *exclude_dir_patterns_last = NULL;
263
264 /* Structure holding the two variables that describe a pattern chain. A pointer
265 to such structures is used for each appropriate option. */
266
267 typedef struct patdatastr {
268 patstr **anchor;
269 patstr **lastptr;
270 } patdatastr;
271
272 static patdatastr match_patdata = { &patterns, &patterns_last };
273 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
274 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
275 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
276 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
277
278 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
279 &include_dir_patterns, &exclude_dir_patterns };
280
281 static const char *incexname[4] = { "--include", "--exclude",
282 "--include-dir", "--exclude-dir" };
283
284 /* Structure for options and list of them */
285
286 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
287 OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
288
289 typedef struct option_item {
290 int type;
291 int one_char;
292 void *dataptr;
293 const char *long_name;
294 const char *help_text;
295 } option_item;
296
297 /* Options without a single-letter equivalent get a negative value. This can be
298 used to identify them. */
299
300 #define N_COLOUR (-1)
301 #define N_EXCLUDE (-2)
302 #define N_EXCLUDE_DIR (-3)
303 #define N_HELP (-4)
304 #define N_INCLUDE (-5)
305 #define N_INCLUDE_DIR (-6)
306 #define N_LABEL (-7)
307 #define N_LOCALE (-8)
308 #define N_NULL (-9)
309 #define N_LOFFSETS (-10)
310 #define N_FOFFSETS (-11)
311 #define N_LBUFFER (-12)
312 #define N_M_LIMIT (-13)
313 #define N_M_LIMIT_REC (-14)
314 #define N_BUFSIZE (-15)
315 #define N_NOJIT (-16)
316 #define N_FILE_LIST (-17)
317 #define N_BINARY_FILES (-18)
318 #define N_EXCLUDE_FROM (-19)
319 #define N_INCLUDE_FROM (-20)
320 #define N_OM_SEPARATOR (-21)
321
322 static option_item optionlist[] = {
323 { OP_NODATA, N_NULL, NULL, "", "terminate options" },
324 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
325 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
326 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
327 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
328 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
329 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" },
330 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
331 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
332 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
333 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
334 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
335 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
336 { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
337 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
338 { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
339 { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
340 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
341 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
342 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
343 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
344 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
345 #ifdef SUPPORT_PCREGREP_JIT
346 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
347 #else
348 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcregrep does not support JIT" },
349 #endif
350 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
351 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
352 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
353 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
354 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
355 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
356 { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
357 { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
358 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
359 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
360 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
361 { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
362 { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
363 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
364 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
365 { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
366 { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
367 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
368 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
369 { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
370 { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
371
372 /* These two were accidentally implemented with underscores instead of
373 hyphens in the option names. As this was not discovered for several releases,
374 the incorrect versions are left in the table for compatibility. However, the
375 --help function misses out any option that has an underscore in its name. */
376
377 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
378 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
379
380 #ifdef JFRIEDL_DEBUG
381 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
382 #endif
383 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
384 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
385 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
386 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
387 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
388 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
389 { OP_NODATA, 0, NULL, NULL, NULL }
390 };
391
392 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
393 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
394 that the combination of -w and -x has the same effect as -x on its own, so we
395 can treat them as the same. Note that the MAXPATLEN macro assumes the longest
396 prefix+suffix is 10 characters; if anything longer is added, it must be
397 adjusted. */
398
399 static const char *prefix[] = {
400 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
401
402 static const char *suffix[] = {
403 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
404
405 /* UTF-8 tables - used only when the newline setting is "any". */
406
407 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
408
409 const char utf8_table4[] = {
410 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
411 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
412 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
413 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
414
415
416
417 /*************************************************
418 * Exit from the program *
419 *************************************************/
420
421 /* If there has been a resource error, give a suitable message.
422
423 Argument: the return code
424 Returns: does not return
425 */
426
427 static void
428 pcregrep_exit(int rc)
429 {
430 if (resource_error)
431 {
432 fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
433 "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
434 PCRE_ERROR_JIT_STACKLIMIT);
435 fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
436 }
437 exit(rc);
438 }
439
440
441 /*************************************************
442 * Add item to chain of patterns *
443 *************************************************/
444
445 /* Used to add an item onto a chain, or just return an unconnected item if the
446 "after" argument is NULL.
447
448 Arguments:
449 s pattern string to add
450 after if not NULL points to item to insert after
451
452 Returns: new pattern block
453 */
454
455 static patstr *
456 add_pattern(char *s, patstr *after)
457 {
458 patstr *p = (patstr *)malloc(sizeof(patstr));
459 if (p == NULL)
460 {
461 fprintf(stderr, "pcregrep: malloc failed\n");
462 pcregrep_exit(2);
463 }
464 if (strlen(s) > MAXPATLEN)
465 {
466 fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
467 MAXPATLEN);
468 return NULL;
469 }
470 p->next = NULL;
471 p->string = s;
472 p->compiled = NULL;
473 p->hint = NULL;
474
475 if (after != NULL)
476 {
477 p->next = after->next;
478 after->next = p;
479 }
480 return p;
481 }
482
483
484 /*************************************************
485 * Free chain of patterns *
486 *************************************************/
487
488 /* Used for several chains of patterns.
489
490 Argument: pointer to start of chain
491 Returns: nothing
492 */
493
494 static void
495 free_pattern_chain(patstr *pc)
496 {
497 while (pc != NULL)
498 {
499 patstr *p = pc;
500 pc = p->next;
501 if (p->hint != NULL) pcre_free_study(p->hint);
502 if (p->compiled != NULL) pcre_free(p->compiled);
503 free(p);
504 }
505 }
506
507
508 /*************************************************
509 * Free chain of file names *
510 *************************************************/
511
512 /*
513 Argument: pointer to start of chain
514 Returns: nothing
515 */
516
517 static void
518 free_file_chain(fnstr *fn)
519 {
520 while (fn != NULL)
521 {
522 fnstr *f = fn;
523 fn = f->next;
524 free(f);
525 }
526 }
527
528
529 /*************************************************
530 * OS-specific functions *
531 *************************************************/
532
533 /* These functions are defined so that they can be made system specific,
534 although at present the only ones are for Unix, Win32, and for "no support". */
535
536
537 /************* Directory scanning in Unix ***********/
538
539 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
540 #include <sys/types.h>
541 #include <sys/stat.h>
542 #include <dirent.h>
543
544 typedef DIR directory_type;
545 #define FILESEP '/'
546
547 static int
548 isdirectory(char *filename)
549 {
550 struct stat statbuf;
551 if (stat(filename, &statbuf) < 0)
552 return 0; /* In the expectation that opening as a file will fail */
553 return (statbuf.st_mode & S_IFMT) == S_IFDIR;
554 }
555
556 static directory_type *
557 opendirectory(char *filename)
558 {
559 return opendir(filename);
560 }
561
562 static char *
563 readdirectory(directory_type *dir)
564 {
565 for (;;)
566 {
567 struct dirent *dent = readdir(dir);
568 if (dent == NULL) return NULL;
569 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
570 return dent->d_name;
571 }
572 /* Control never reaches here */
573 }
574
575 static void
576 closedirectory(directory_type *dir)
577 {
578 closedir(dir);
579 }
580
581
582 /************* Test for regular file in Unix **********/
583
584 static int
585 isregfile(char *filename)
586 {
587 struct stat statbuf;
588 if (stat(filename, &statbuf) < 0)
589 return 1; /* In the expectation that opening as a file will fail */
590 return (statbuf.st_mode & S_IFMT) == S_IFREG;
591 }
592
593
594 /************* Test for a terminal in Unix **********/
595
596 static BOOL
597 is_stdout_tty(void)
598 {
599 return isatty(fileno(stdout));
600 }
601
602 static BOOL
603 is_file_tty(FILE *f)
604 {
605 return isatty(fileno(f));
606 }
607
608
609 /************* Directory scanning in Win32 ***********/
610
611 /* I (Philip Hazel) have no means of testing this code. It was contributed by
612 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
613 when it did not exist. David Byron added a patch that moved the #include of
614 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
615 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
616 undefined when it is indeed undefined. */
617
618 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
619
620 #ifndef STRICT
621 # define STRICT
622 #endif
623 #ifndef WIN32_LEAN_AND_MEAN
624 # define WIN32_LEAN_AND_MEAN
625 #endif
626
627 #include <windows.h>
628
629 #ifndef INVALID_FILE_ATTRIBUTES
630 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
631 #endif
632
633 typedef struct directory_type
634 {
635 HANDLE handle;
636 BOOL first;
637 WIN32_FIND_DATA data;
638 } directory_type;
639
640 #define FILESEP '/'
641
642 int
643 isdirectory(char *filename)
644 {
645 DWORD attr = GetFileAttributes(filename);
646 if (attr == INVALID_FILE_ATTRIBUTES)
647 return 0;
648 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
649 }
650
651 directory_type *
652 opendirectory(char *filename)
653 {
654 size_t len;
655 char *pattern;
656 directory_type *dir;
657 DWORD err;
658 len = strlen(filename);
659 pattern = (char *)malloc(len + 3);
660 dir = (directory_type *)malloc(sizeof(*dir));
661 if ((pattern == NULL) || (dir == NULL))
662 {
663 fprintf(stderr, "pcregrep: malloc failed\n");
664 pcregrep_exit(2);
665 }
666 memcpy(pattern, filename, len);
667 memcpy(&(pattern[len]), "\\*", 3);
668 dir->handle = FindFirstFile(pattern, &(dir->data));
669 if (dir->handle != INVALID_HANDLE_VALUE)
670 {
671 free(pattern);
672 dir->first = TRUE;
673 return dir;
674 }
675 err = GetLastError();
676 free(pattern);
677 free(dir);
678 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
679 return NULL;
680 }
681
682 char *
683 readdirectory(directory_type *dir)
684 {
685 for (;;)
686 {
687 if (!dir->first)
688 {
689 if (!FindNextFile(dir->handle, &(dir->data)))
690 return NULL;
691 }
692 else
693 {
694 dir->first = FALSE;
695 }
696 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
697 return dir->data.cFileName;
698 }
699 #ifndef _MSC_VER
700 return NULL; /* Keep compiler happy; never executed */
701 #endif
702 }
703
704 void
705 closedirectory(directory_type *dir)
706 {
707 FindClose(dir->handle);
708 free(dir);
709 }
710
711
712 /************* Test for regular file in Win32 **********/
713
714 /* I don't know how to do this, or if it can be done; assume all paths are
715 regular if they are not directories. */
716
717 int isregfile(char *filename)
718 {
719 return !isdirectory(filename);
720 }
721
722
723 /************* Test for a terminal in Win32 **********/
724
725 /* I don't know how to do this; assume never */
726
727 static BOOL
728 is_stdout_tty(void)
729 {
730 return FALSE;
731 }
732
733 static BOOL
734 is_file_tty(FILE *f)
735 {
736 return FALSE;
737 }
738
739
740 /************* Directory scanning when we can't do it ***********/
741
742 /* The type is void, and apart from isdirectory(), the functions do nothing. */
743
744 #else
745
746 #define FILESEP 0
747 typedef void directory_type;
748
749 int isdirectory(char *filename) { return 0; }
750 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
751 char *readdirectory(directory_type *dir) { return (char*)0;}
752 void closedirectory(directory_type *dir) {}
753
754
755 /************* Test for regular when we can't do it **********/
756
757 /* Assume all files are regular. */
758
759 int isregfile(char *filename) { return 1; }
760
761
762 /************* Test for a terminal when we can't do it **********/
763
764 static BOOL
765 is_stdout_tty(void)
766 {
767 return FALSE;
768 }
769
770 static BOOL
771 is_file_tty(FILE *f)
772 {
773 return FALSE;
774 }
775
776 #endif
777
778
779
780 #ifndef HAVE_STRERROR
781 /*************************************************
782 * Provide strerror() for non-ANSI libraries *
783 *************************************************/
784
785 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
786 in their libraries, but can provide the same facility by this simple
787 alternative function. */
788
789 extern int sys_nerr;
790 extern char *sys_errlist[];
791
792 char *
793 strerror(int n)
794 {
795 if (n < 0 || n >= sys_nerr) return "unknown error number";
796 return sys_errlist[n];
797 }
798 #endif /* HAVE_STRERROR */
799
800
801
802 /*************************************************
803 * Usage function *
804 *************************************************/
805
806 static int
807 usage(int rc)
808 {
809 option_item *op;
810 fprintf(stderr, "Usage: pcregrep [-");
811 for (op = optionlist; op->one_char != 0; op++)
812 {
813 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
814 }
815 fprintf(stderr, "] [long options] [pattern] [files]\n");
816 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
817 "options.\n");
818 return rc;
819 }
820
821
822
823 /*************************************************
824 * Help function *
825 *************************************************/
826
827 static void
828 help(void)
829 {
830 option_item *op;
831
832 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
833 printf("Search for PATTERN in each FILE or standard input.\n");
834 printf("PATTERN must be present if neither -e nor -f is used.\n");
835 printf("\"-\" can be used as a file name to mean STDIN.\n");
836
837 #ifdef SUPPORT_LIBZ
838 printf("Files whose names end in .gz are read using zlib.\n");
839 #endif
840
841 #ifdef SUPPORT_LIBBZ2
842 printf("Files whose names end in .bz2 are read using bzlib2.\n");
843 #endif
844
845 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
846 printf("Other files and the standard input are read as plain files.\n\n");
847 #else
848 printf("All files are read as plain files, without any interpretation.\n\n");
849 #endif
850
851 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
852 printf("Options:\n");
853
854 for (op = optionlist; op->one_char != 0; op++)
855 {
856 int n;
857 char s[4];
858
859 /* Two options were accidentally implemented and documented with underscores
860 instead of hyphens in their names, something that was not noticed for quite a
861 few releases. When fixing this, I left the underscored versions in the list
862 in case people were using them. However, we don't want to display them in the
863 help data. There are no other options that contain underscores, and we do not
864 expect ever to implement such options. Therefore, just omit any option that
865 contains an underscore. */
866
867 if (strchr(op->long_name, '_') != NULL) continue;
868
869 if (op->one_char > 0 && (op->long_name)[0] == 0)
870 n = 31 - printf(" -%c", op->one_char);
871 else
872 {
873 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
874 else strcpy(s, " ");
875 n = 31 - printf(" %s --%s", s, op->long_name);
876 }
877
878 if (n < 1) n = 1;
879 printf("%.*s%s\n", n, " ", op->help_text);
880 }
881
882 printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
883 printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
884 printf("When reading patterns or file names from a file, trailing white\n");
885 printf("space is removed and blank lines are ignored.\n");
886 printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
887
888 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
889 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
890 }
891
892
893
894 /*************************************************
895 * Test exclude/includes *
896 *************************************************/
897
898 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
899 there are no includes, the path must match an include pattern.
900
901 Arguments:
902 path the path to be matched
903 ip the chain of include patterns
904 ep the chain of exclude patterns
905
906 Returns: TRUE if the path is not excluded
907 */
908
909 static BOOL
910 test_incexc(char *path, patstr *ip, patstr *ep)
911 {
912 int plen = strlen(path);
913
914 for (; ep != NULL; ep = ep->next)
915 {
916 if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
917 return FALSE;
918 }
919
920 if (ip == NULL) return TRUE;
921
922 for (; ip != NULL; ip = ip->next)
923 {
924 if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
925 return TRUE;
926 }
927
928 return FALSE;
929 }
930
931
932
933 /*************************************************
934 * Decode integer argument value *
935 *************************************************/
936
937 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
938 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
939 just keep it simple.
940
941 Arguments:
942 option_data the option data string
943 op the option item (for error messages)
944 longop TRUE if option given in long form
945
946 Returns: a long integer
947 */
948
949 static long int
950 decode_number(char *option_data, option_item *op, BOOL longop)
951 {
952 unsigned long int n = 0;
953 char *endptr = option_data;
954 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
955 while (isdigit((unsigned char)(*endptr)))
956 n = n * 10 + (int)(*endptr++ - '0');
957 if (toupper(*endptr) == 'K')
958 {
959 n *= 1024;
960 endptr++;
961 }
962 else if (toupper(*endptr) == 'M')
963 {
964 n *= 1024*1024;
965 endptr++;
966 }
967
968 if (*endptr != 0) /* Error */
969 {
970 if (longop)
971 {
972 char *equals = strchr(op->long_name, '=');
973 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
974 (int)(equals - op->long_name);
975 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
976 option_data, nlen, op->long_name);
977 }
978 else
979 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
980 option_data, op->one_char);
981 pcregrep_exit(usage(2));
982 }
983
984 return n;
985 }
986
987
988
989 /*************************************************
990 * Add item to a chain of numbers *
991 *************************************************/
992
993 /* Used to add an item onto a chain, or just return an unconnected item if the
994 "after" argument is NULL.
995
996 Arguments:
997 n the number to add
998 after if not NULL points to item to insert after
999
1000 Returns: new number block
1001 */
1002
1003 static omstr *
1004 add_number(int n, omstr *after)
1005 {
1006 omstr *om = (omstr *)malloc(sizeof(omstr));
1007
1008 if (om == NULL)
1009 {
1010 fprintf(stderr, "pcregrep: malloc failed\n");
1011 pcregrep_exit(2);
1012 }
1013 om->next = NULL;
1014 om->groupnum = n;
1015
1016 if (after != NULL)
1017 {
1018 om->next = after->next;
1019 after->next = om;
1020 }
1021 return om;
1022 }
1023
1024
1025
1026 /*************************************************
1027 * Read one line of input *
1028 *************************************************/
1029
1030 /* Normally, input is read using fread() into a large buffer, so many lines may
1031 be read at once. However, doing this for tty input means that no output appears
1032 until a lot of input has been typed. Instead, tty input is handled line by
1033 line. We cannot use fgets() for this, because it does not stop at a binary
1034 zero, and therefore there is no way of telling how many characters it has read,
1035 because there may be binary zeros embedded in the data.
1036
1037 Arguments:
1038 buffer the buffer to read into
1039 length the maximum number of characters to read
1040 f the file
1041
1042 Returns: the number of characters read, zero at end of file
1043 */
1044
1045 static unsigned int
1046 read_one_line(char *buffer, int length, FILE *f)
1047 {
1048 int c;
1049 int yield = 0;
1050 while ((c = fgetc(f)) != EOF)
1051 {
1052 buffer[yield++] = c;
1053 if (c == '\n' || yield >= length) break;
1054 }
1055 return yield;
1056 }
1057
1058
1059
1060 /*************************************************
1061 * Find end of line *
1062 *************************************************/
1063
1064 /* The length of the endline sequence that is found is set via lenptr. This may
1065 be zero at the very end of the file if there is no line-ending sequence there.
1066
1067 Arguments:
1068 p current position in line
1069 endptr end of available data
1070 lenptr where to put the length of the eol sequence
1071
1072 Returns: pointer after the last byte of the line,
1073 including the newline byte(s)
1074 */
1075
1076 static char *
1077 end_of_line(char *p, char *endptr, int *lenptr)
1078 {
1079 switch(endlinetype)
1080 {
1081 default: /* Just in case */
1082 case EL_LF:
1083 while (p < endptr && *p != '\n') p++;
1084 if (p < endptr)
1085 {
1086 *lenptr = 1;
1087 return p + 1;
1088 }
1089 *lenptr = 0;
1090 return endptr;
1091
1092 case EL_CR:
1093 while (p < endptr && *p != '\r') p++;
1094 if (p < endptr)
1095 {
1096 *lenptr = 1;
1097 return p + 1;
1098 }
1099 *lenptr = 0;
1100 return endptr;
1101
1102 case EL_CRLF:
1103 for (;;)
1104 {
1105 while (p < endptr && *p != '\r') p++;
1106 if (++p >= endptr)
1107 {
1108 *lenptr = 0;
1109 return endptr;
1110 }
1111 if (*p == '\n')
1112 {
1113 *lenptr = 2;
1114 return p + 1;
1115 }
1116 }
1117 break;
1118
1119 case EL_ANYCRLF:
1120 while (p < endptr)
1121 {
1122 int extra = 0;
1123 register int c = *((unsigned char *)p);
1124
1125 if (utf8 && c >= 0xc0)
1126 {
1127 int gcii, gcss;
1128 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1129 gcss = 6*extra;
1130 c = (c & utf8_table3[extra]) << gcss;
1131 for (gcii = 1; gcii <= extra; gcii++)
1132 {
1133 gcss -= 6;
1134 c |= (p[gcii] & 0x3f) << gcss;
1135 }
1136 }
1137
1138 p += 1 + extra;
1139
1140 switch (c)
1141 {
1142 case '\n':
1143 *lenptr = 1;
1144 return p;
1145
1146 case '\r':
1147 if (p < endptr && *p == '\n')
1148 {
1149 *lenptr = 2;
1150 p++;
1151 }
1152 else *lenptr = 1;
1153 return p;
1154
1155 default:
1156 break;
1157 }
1158 } /* End of loop for ANYCRLF case */
1159
1160 *lenptr = 0; /* Must have hit the end */
1161 return endptr;
1162
1163 case EL_ANY:
1164 while (p < endptr)
1165 {
1166 int extra = 0;
1167 register int c = *((unsigned char *)p);
1168
1169 if (utf8 && c >= 0xc0)
1170 {
1171 int gcii, gcss;
1172 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1173 gcss = 6*extra;
1174 c = (c & utf8_table3[extra]) << gcss;
1175 for (gcii = 1; gcii <= extra; gcii++)
1176 {
1177 gcss -= 6;
1178 c |= (p[gcii] & 0x3f) << gcss;
1179 }
1180 }
1181
1182 p += 1 + extra;
1183
1184 switch (c)
1185 {
1186 case '\n': /* LF */
1187 case '\v': /* VT */
1188 case '\f': /* FF */
1189 *lenptr = 1;
1190 return p;
1191
1192 case '\r': /* CR */
1193 if (p < endptr && *p == '\n')
1194 {
1195 *lenptr = 2;
1196 p++;
1197 }
1198 else *lenptr = 1;
1199 return p;
1200
1201 #ifndef EBCDIC
1202 case 0x85: /* Unicode NEL */
1203 *lenptr = utf8? 2 : 1;
1204 return p;
1205
1206 case 0x2028: /* Unicode LS */
1207 case 0x2029: /* Unicode PS */
1208 *lenptr = 3;
1209 return p;
1210 #endif /* Not EBCDIC */
1211
1212 default:
1213 break;
1214 }
1215 } /* End of loop for ANY case */
1216
1217 *lenptr = 0; /* Must have hit the end */
1218 return endptr;
1219 } /* End of overall switch */
1220 }
1221
1222
1223
1224 /*************************************************
1225 * Find start of previous line *
1226 *************************************************/
1227
1228 /* This is called when looking back for before lines to print.
1229
1230 Arguments:
1231 p start of the subsequent line
1232 startptr start of available data
1233
1234 Returns: pointer to the start of the previous line
1235 */
1236
1237 static char *
1238 previous_line(char *p, char *startptr)
1239 {
1240 switch(endlinetype)
1241 {
1242 default: /* Just in case */
1243 case EL_LF:
1244 p--;
1245 while (p > startptr && p[-1] != '\n') p--;
1246 return p;
1247
1248 case EL_CR:
1249 p--;
1250 while (p > startptr && p[-1] != '\n') p--;
1251 return p;
1252
1253 case EL_CRLF:
1254 for (;;)
1255 {
1256 p -= 2;
1257 while (p > startptr && p[-1] != '\n') p--;
1258 if (p <= startptr + 1 || p[-2] == '\r') return p;
1259 }
1260 return p; /* But control should never get here */
1261
1262 case EL_ANY:
1263 case EL_ANYCRLF:
1264 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1265 if (utf8) while ((*p & 0xc0) == 0x80) p--;
1266
1267 while (p > startptr)
1268 {
1269 register unsigned int c;
1270 char *pp = p - 1;
1271
1272 if (utf8)
1273 {
1274 int extra = 0;
1275 while ((*pp & 0xc0) == 0x80) pp--;
1276 c = *((unsigned char *)pp);
1277 if (c >= 0xc0)
1278 {
1279 int gcii, gcss;
1280 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1281 gcss = 6*extra;
1282 c = (c & utf8_table3[extra]) << gcss;
1283 for (gcii = 1; gcii <= extra; gcii++)
1284 {
1285 gcss -= 6;
1286 c |= (pp[gcii] & 0x3f) << gcss;
1287 }
1288 }
1289 }
1290 else c = *((unsigned char *)pp);
1291
1292 if (endlinetype == EL_ANYCRLF) switch (c)
1293 {
1294 case '\n': /* LF */
1295 case '\r': /* CR */
1296 return p;
1297
1298 default:
1299 break;
1300 }
1301
1302 else switch (c)
1303 {
1304 case '\n': /* LF */
1305 case '\v': /* VT */
1306 case '\f': /* FF */
1307 case '\r': /* CR */
1308 #ifndef EBCDIE
1309 case 0x85: /* Unicode NEL */
1310 case 0x2028: /* Unicode LS */
1311 case 0x2029: /* Unicode PS */
1312 #endif /* Not EBCDIC */
1313 return p;
1314
1315 default:
1316 break;
1317 }
1318
1319 p = pp; /* Back one character */
1320 } /* End of loop for ANY case */
1321
1322 return startptr; /* Hit start of data */
1323 } /* End of overall switch */
1324 }
1325
1326
1327
1328
1329
1330 /*************************************************
1331 * Print the previous "after" lines *
1332 *************************************************/
1333
1334 /* This is called if we are about to lose said lines because of buffer filling,
1335 and at the end of the file. The data in the line is written using fwrite() so
1336 that a binary zero does not terminate it.
1337
1338 Arguments:
1339 lastmatchnumber the number of the last matching line, plus one
1340 lastmatchrestart where we restarted after the last match
1341 endptr end of available data
1342 printname filename for printing
1343
1344 Returns: nothing
1345 */
1346
1347 static void
1348 do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1349 char *printname)
1350 {
1351 if (after_context > 0 && lastmatchnumber > 0)
1352 {
1353 int count = 0;
1354 while (lastmatchrestart < endptr && count++ < after_context)
1355 {
1356 int ellength;
1357 char *pp = lastmatchrestart;
1358 if (printname != NULL) fprintf(stdout, "%s-", printname);
1359 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1360 pp = end_of_line(pp, endptr, &ellength);
1361 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1362 lastmatchrestart = pp;
1363 }
1364 hyphenpending = TRUE;
1365 }
1366 }
1367
1368
1369
1370 /*************************************************
1371 * Apply patterns to subject till one matches *
1372 *************************************************/
1373
1374 /* This function is called to run through all patterns, looking for a match. It
1375 is used multiple times for the same subject when colouring is enabled, in order
1376 to find all possible matches.
1377
1378 Arguments:
1379 matchptr the start of the subject
1380 length the length of the subject to match
1381 startoffset where to start matching
1382 offsets the offets vector to fill in
1383 mrc address of where to put the result of pcre_exec()
1384
1385 Returns: TRUE if there was a match
1386 FALSE if there was no match
1387 invert if there was a non-fatal error
1388 */
1389
1390 static BOOL
1391 match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
1392 int *mrc)
1393 {
1394 int i;
1395 size_t slen = length;
1396 patstr *p = patterns;
1397 const char *msg = "this text:\n\n";
1398
1399 if (slen > 200)
1400 {
1401 slen = 200;
1402 msg = "text that starts:\n\n";
1403 }
1404 for (i = 1; p != NULL; p = p->next, i++)
1405 {
1406 *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1407 startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
1408 if (*mrc >= 0) return TRUE;
1409 if (*mrc == PCRE_ERROR_NOMATCH) continue;
1410 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1411 if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1412 fprintf(stderr, "%s", msg);
1413 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
1414 fprintf(stderr, "\n\n");
1415 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1416 *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1417 resource_error = TRUE;
1418 if (error_count++ > 20)
1419 {
1420 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1421 pcregrep_exit(2);
1422 }
1423 return invert; /* No more matching; don't show the line again */
1424 }
1425
1426 return FALSE; /* No match, no errors */
1427 }
1428
1429
1430
1431 /*************************************************
1432 * Grep an individual file *
1433 *************************************************/
1434
1435 /* This is called from grep_or_recurse() below. It uses a buffer that is three
1436 times the value of bufthird. The matching point is never allowed to stray into
1437 the top third of the buffer, thus keeping more of the file available for
1438 context printing or for multiline scanning. For large files, the pointer will
1439 be in the middle third most of the time, so the bottom third is available for
1440 "before" context printing.
1441
1442 Arguments:
1443 handle the fopened FILE stream for a normal file
1444 the gzFile pointer when reading is via libz
1445 the BZFILE pointer when reading is via libbz2
1446 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1447 filename the file name or NULL (for errors)
1448 printname the file name if it is to be printed for each match
1449 or NULL if the file name is not to be printed
1450 it cannot be NULL if filenames[_nomatch]_only is set
1451
1452 Returns: 0 if there was at least one match
1453 1 otherwise (no matches)
1454 2 if an overlong line is encountered
1455 3 if there is a read error on a .bz2 file
1456 */
1457
1458 static int
1459 pcregrep(void *handle, int frtype, char *filename, char *printname)
1460 {
1461 int rc = 1;
1462 int linenumber = 1;
1463 int lastmatchnumber = 0;
1464 int count = 0;
1465 int filepos = 0;
1466 int offsets[OFFSET_SIZE];
1467 char *lastmatchrestart = NULL;
1468 char *ptr = main_buffer;
1469 char *endptr;
1470 size_t bufflength;
1471 BOOL binary = FALSE;
1472 BOOL endhyphenpending = FALSE;
1473 BOOL input_line_buffered = line_buffered;
1474 FILE *in = NULL; /* Ensure initialized */
1475
1476 #ifdef SUPPORT_LIBZ
1477 gzFile ingz = NULL;
1478 #endif
1479
1480 #ifdef SUPPORT_LIBBZ2
1481 BZFILE *inbz2 = NULL;
1482 #endif
1483
1484
1485 /* Do the first read into the start of the buffer and set up the pointer to end
1486 of what we have. In the case of libz, a non-zipped .gz file will be read as a
1487 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1488 fail. */
1489
1490 #ifdef SUPPORT_LIBZ
1491 if (frtype == FR_LIBZ)
1492 {
1493 ingz = (gzFile)handle;
1494 bufflength = gzread (ingz, main_buffer, bufsize);
1495 }
1496 else
1497 #endif
1498
1499 #ifdef SUPPORT_LIBBZ2
1500 if (frtype == FR_LIBBZ2)
1501 {
1502 inbz2 = (BZFILE *)handle;
1503 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1504 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1505 } /* without the cast it is unsigned. */
1506 else
1507 #endif
1508
1509 {
1510 in = (FILE *)handle;
1511 if (is_file_tty(in)) input_line_buffered = TRUE;
1512 bufflength = input_line_buffered?
1513 read_one_line(main_buffer, bufsize, in) :
1514 fread(main_buffer, 1, bufsize, in);
1515 }
1516
1517 endptr = main_buffer + bufflength;
1518
1519 /* Unless binary-files=text, see if we have a binary file. This uses the same
1520 rule as GNU grep, namely, a search for a binary zero byte near the start of the
1521 file. */
1522
1523 if (binary_files != BIN_TEXT)
1524 {
1525 binary =
1526 memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1527 if (binary && binary_files == BIN_NOMATCH) return 1;
1528 }
1529
1530 /* Loop while the current pointer is not at the end of the file. For large
1531 files, endptr will be at the end of the buffer when we are in the middle of the
1532 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1533 way, the buffer is shifted left and re-filled. */
1534
1535 while (ptr < endptr)
1536 {
1537 int endlinelength;
1538 int mrc = 0;
1539 int startoffset = 0;
1540 BOOL match;
1541 char *matchptr = ptr;
1542 char *t = ptr;
1543 size_t length, linelength;
1544
1545 /* At this point, ptr is at the start of a line. We need to find the length
1546 of the subject string to pass to pcre_exec(). In multiline mode, it is the
1547 length remainder of the data in the buffer. Otherwise, it is the length of
1548 the next line, excluding the terminating newline. After matching, we always
1549 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1550 option is used for compiling, so that any match is constrained to be in the
1551 first line. */
1552
1553 t = end_of_line(t, endptr, &endlinelength);
1554 linelength = t - ptr - endlinelength;
1555 length = multiline? (size_t)(endptr - ptr) : linelength;
1556
1557 /* Check to see if the line we are looking at extends right to the very end
1558 of the buffer without a line terminator. This means the line is too long to
1559 handle. */
1560
1561 if (endlinelength == 0 && t == main_buffer + bufsize)
1562 {
1563 fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1564 "pcregrep: check the --buffer-size option\n",
1565 linenumber,
1566 (filename == NULL)? "" : " of file ",
1567 (filename == NULL)? "" : filename);
1568 return 2;
1569 }
1570
1571 /* Extra processing for Jeffrey Friedl's debugging. */
1572
1573 #ifdef JFRIEDL_DEBUG
1574 if (jfriedl_XT || jfriedl_XR)
1575 {
1576 #include <sys/time.h>
1577 #include <time.h>
1578 struct timeval start_time, end_time;
1579 struct timezone dummy;
1580 int i;
1581
1582 if (jfriedl_XT)
1583 {
1584 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1585 const char *orig = ptr;
1586 ptr = malloc(newlen + 1);
1587 if (!ptr) {
1588 printf("out of memory");
1589 pcregrep_exit(2);
1590 }
1591 endptr = ptr;
1592 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1593 for (i = 0; i < jfriedl_XT; i++) {
1594 strncpy(endptr, orig, length);
1595 endptr += length;
1596 }
1597 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1598 length = newlen;
1599 }
1600
1601 if (gettimeofday(&start_time, &dummy) != 0)
1602 perror("bad gettimeofday");
1603
1604
1605 for (i = 0; i < jfriedl_XR; i++)
1606 match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1607 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1608
1609 if (gettimeofday(&end_time, &dummy) != 0)
1610 perror("bad gettimeofday");
1611
1612 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1613 -
1614 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1615
1616 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1617 return 0;
1618 }
1619 #endif
1620
1621 /* We come back here after a match when show_only_matching is set, in order
1622 to find any further matches in the same line. This applies to
1623 --only-matching, --file-offsets, and --line-offsets. */
1624
1625 ONLY_MATCHING_RESTART:
1626
1627 /* Run through all the patterns until one matches or there is an error other
1628 than NOMATCH. This code is in a subroutine so that it can be re-used for
1629 finding subsequent matches when colouring matched lines. */
1630
1631 match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1632
1633 /* If it's a match or a not-match (as required), do what's wanted. */
1634
1635 if (match != invert)
1636 {
1637 BOOL hyphenprinted = FALSE;
1638
1639 /* We've failed if we want a file that doesn't have any matches. */
1640
1641 if (filenames == FN_NOMATCH_ONLY) return 1;
1642
1643 /* Just count if just counting is wanted. */
1644
1645 if (count_only) count++;
1646
1647 /* When handling a binary file and binary-files==binary, the "binary"
1648 variable will be set true (it's false in all other cases). In this
1649 situation we just want to output the file name. No need to scan further. */
1650
1651 else if (binary)
1652 {
1653 fprintf(stdout, "Binary file %s matches\n", filename);
1654 return 0;
1655 }
1656
1657 /* If all we want is a file name, there is no need to scan any more lines
1658 in the file. */
1659
1660 else if (filenames == FN_MATCH_ONLY)
1661 {
1662 fprintf(stdout, "%s\n", printname);
1663 return 0;
1664 }
1665
1666 /* Likewise, if all we want is a yes/no answer. */
1667
1668 else if (quiet) return 0;
1669
1670 /* The --only-matching option prints just the substring that matched,
1671 and/or one or more captured portions of it, as long as these strings are
1672 not empty. The --file-offsets and --line-offsets options output offsets for
1673 the matching substring (all three set show_only_matching). None of these
1674 mutually exclusive options prints any context. Afterwards, adjust the start
1675 and then jump back to look for further matches in the same line. If we are
1676 in invert mode, however, nothing is printed and we do not restart - this
1677 could still be useful because the return code is set. */
1678
1679 else if (show_only_matching)
1680 {
1681 if (!invert)
1682 {
1683 if (printname != NULL) fprintf(stdout, "%s:", printname);
1684 if (number) fprintf(stdout, "%d:", linenumber);
1685
1686 /* Handle --line-offsets */
1687
1688 if (line_offsets)
1689 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1690 offsets[1] - offsets[0]);
1691
1692 /* Handle --file-offsets */
1693
1694 else if (file_offsets)
1695 fprintf(stdout, "%d,%d\n",
1696 (int)(filepos + matchptr + offsets[0] - ptr),
1697 offsets[1] - offsets[0]);
1698
1699 /* Handle --only-matching, which may occur many times */
1700
1701 else
1702 {
1703 BOOL printed = FALSE;
1704 omstr *om;
1705
1706 for (om = only_matching; om != NULL; om = om->next)
1707 {
1708 int n = om->groupnum;
1709 if (n < mrc)
1710 {
1711 int plen = offsets[2*n + 1] - offsets[2*n];
1712 if (plen > 0)
1713 {
1714 if (printed) fprintf(stdout, "%s", om_separator);
1715 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1716 FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
1717 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1718 printed = TRUE;
1719 }
1720 }
1721 }
1722
1723 if (printed || printname != NULL || number) fprintf(stdout, "\n");
1724 }
1725
1726 /* Prepare to repeat to find the next match */
1727
1728 match = FALSE;
1729 if (line_buffered) fflush(stdout);
1730 rc = 0; /* Had some success */
1731 startoffset = offsets[1]; /* Restart after the match */
1732 goto ONLY_MATCHING_RESTART;
1733 }
1734 }
1735
1736 /* This is the default case when none of the above options is set. We print
1737 the matching lines(s), possibly preceded and/or followed by other lines of
1738 context. */
1739
1740 else
1741 {
1742 /* See if there is a requirement to print some "after" lines from a
1743 previous match. We never print any overlaps. */
1744
1745 if (after_context > 0 && lastmatchnumber > 0)
1746 {
1747 int ellength;
1748 int linecount = 0;
1749 char *p = lastmatchrestart;
1750
1751 while (p < ptr && linecount < after_context)
1752 {
1753 p = end_of_line(p, ptr, &ellength);
1754 linecount++;
1755 }
1756
1757 /* It is important to advance lastmatchrestart during this printing so
1758 that it interacts correctly with any "before" printing below. Print
1759 each line's data using fwrite() in case there are binary zeroes. */
1760
1761 while (lastmatchrestart < p)
1762 {
1763 char *pp = lastmatchrestart;
1764 if (printname != NULL) fprintf(stdout, "%s-", printname);
1765 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1766 pp = end_of_line(pp, endptr, &ellength);
1767 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1768 lastmatchrestart = pp;
1769 }
1770 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1771 }
1772
1773 /* If there were non-contiguous lines printed above, insert hyphens. */
1774
1775 if (hyphenpending)
1776 {
1777 fprintf(stdout, "--\n");
1778 hyphenpending = FALSE;
1779 hyphenprinted = TRUE;
1780 }
1781
1782 /* See if there is a requirement to print some "before" lines for this
1783 match. Again, don't print overlaps. */
1784
1785 if (before_context > 0)
1786 {
1787 int linecount = 0;
1788 char *p = ptr;
1789
1790 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1791 linecount < before_context)
1792 {
1793 linecount++;
1794 p = previous_line(p, main_buffer);
1795 }
1796
1797 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1798 fprintf(stdout, "--\n");
1799
1800 while (p < ptr)
1801 {
1802 int ellength;
1803 char *pp = p;
1804 if (printname != NULL) fprintf(stdout, "%s-", printname);
1805 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1806 pp = end_of_line(pp, endptr, &ellength);
1807 FWRITE(p, 1, pp - p, stdout);
1808 p = pp;
1809 }
1810 }
1811
1812 /* Now print the matching line(s); ensure we set hyphenpending at the end
1813 of the file if any context lines are being output. */
1814
1815 if (after_context > 0 || before_context > 0)
1816 endhyphenpending = TRUE;
1817
1818 if (printname != NULL) fprintf(stdout, "%s:", printname);
1819 if (number) fprintf(stdout, "%d:", linenumber);
1820
1821 /* In multiline mode, we want to print to the end of the line in which
1822 the end of the matched string is found, so we adjust linelength and the
1823 line number appropriately, but only when there actually was a match
1824 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1825 the match will always be before the first newline sequence. */
1826
1827 if (multiline & !invert)
1828 {
1829 char *endmatch = ptr + offsets[1];
1830 t = ptr;
1831 while (t < endmatch)
1832 {
1833 t = end_of_line(t, endptr, &endlinelength);
1834 if (t < endmatch) linenumber++; else break;
1835 }
1836 linelength = t - ptr - endlinelength;
1837 }
1838
1839 /*** NOTE: Use only fwrite() to output the data line, so that binary
1840 zeroes are treated as just another data character. */
1841
1842 /* This extra option, for Jeffrey Friedl's debugging requirements,
1843 replaces the matched string, or a specific captured string if it exists,
1844 with X. When this happens, colouring is ignored. */
1845
1846 #ifdef JFRIEDL_DEBUG
1847 if (S_arg >= 0 && S_arg < mrc)
1848 {
1849 int first = S_arg * 2;
1850 int last = first + 1;
1851 FWRITE(ptr, 1, offsets[first], stdout);
1852 fprintf(stdout, "X");
1853 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1854 }
1855 else
1856 #endif
1857
1858 /* We have to split the line(s) up if colouring, and search for further
1859 matches, but not of course if the line is a non-match. */
1860
1861 if (do_colour && !invert)
1862 {
1863 int plength;
1864 FWRITE(ptr, 1, offsets[0], stdout);
1865 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1866 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1867 fprintf(stdout, "%c[00m", 0x1b);
1868 for (;;)
1869 {
1870 startoffset = offsets[1];
1871 if (startoffset >= (int)linelength + endlinelength ||
1872 !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1873 break;
1874 FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1875 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1876 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1877 fprintf(stdout, "%c[00m", 0x1b);
1878 }
1879
1880 /* In multiline mode, we may have already printed the complete line
1881 and its line-ending characters (if they matched the pattern), so there
1882 may be no more to print. */
1883
1884 plength = (int)((linelength + endlinelength) - startoffset);
1885 if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1886 }
1887
1888 /* Not colouring; no need to search for further matches */
1889
1890 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1891 }
1892
1893 /* End of doing what has to be done for a match. If --line-buffered was
1894 given, flush the output. */
1895
1896 if (line_buffered) fflush(stdout);
1897 rc = 0; /* Had some success */
1898
1899 /* Remember where the last match happened for after_context. We remember
1900 where we are about to restart, and that line's number. */
1901
1902 lastmatchrestart = ptr + linelength + endlinelength;
1903 lastmatchnumber = linenumber + 1;
1904 }
1905
1906 /* For a match in multiline inverted mode (which of course did not cause
1907 anything to be printed), we have to move on to the end of the match before
1908 proceeding. */
1909
1910 if (multiline && invert && match)
1911 {
1912 int ellength;
1913 char *endmatch = ptr + offsets[1];
1914 t = ptr;
1915 while (t < endmatch)
1916 {
1917 t = end_of_line(t, endptr, &ellength);
1918 if (t <= endmatch) linenumber++; else break;
1919 }
1920 endmatch = end_of_line(endmatch, endptr, &ellength);
1921 linelength = endmatch - ptr - ellength;
1922 }
1923
1924 /* Advance to after the newline and increment the line number. The file
1925 offset to the current line is maintained in filepos. */
1926
1927 ptr += linelength + endlinelength;
1928 filepos += (int)(linelength + endlinelength);
1929 linenumber++;
1930
1931 /* If input is line buffered, and the buffer is not yet full, read another
1932 line and add it into the buffer. */
1933
1934 if (input_line_buffered && bufflength < (size_t)bufsize)
1935 {
1936 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1937 bufflength += add;
1938 endptr += add;
1939 }
1940
1941 /* If we haven't yet reached the end of the file (the buffer is full), and
1942 the current point is in the top 1/3 of the buffer, slide the buffer down by
1943 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1944 about to be lost, print them. */
1945
1946 if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1947 {
1948 if (after_context > 0 &&
1949 lastmatchnumber > 0 &&
1950 lastmatchrestart < main_buffer + bufthird)
1951 {
1952 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1953 lastmatchnumber = 0;
1954 }
1955
1956 /* Now do the shuffle */
1957
1958 memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1959 ptr -= bufthird;
1960
1961 #ifdef SUPPORT_LIBZ
1962 if (frtype == FR_LIBZ)
1963 bufflength = 2*bufthird +
1964 gzread (ingz, main_buffer + 2*bufthird, bufthird);
1965 else
1966 #endif
1967
1968 #ifdef SUPPORT_LIBBZ2
1969 if (frtype == FR_LIBBZ2)
1970 bufflength = 2*bufthird +
1971 BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1972 else
1973 #endif
1974
1975 bufflength = 2*bufthird +
1976 (input_line_buffered?
1977 read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1978 fread(main_buffer + 2*bufthird, 1, bufthird, in));
1979 endptr = main_buffer + bufflength;
1980
1981 /* Adjust any last match point */
1982
1983 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1984 }
1985 } /* Loop through the whole file */
1986
1987 /* End of file; print final "after" lines if wanted; do_after_lines sets
1988 hyphenpending if it prints something. */
1989
1990 if (!show_only_matching && !count_only)
1991 {
1992 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1993 hyphenpending |= endhyphenpending;
1994 }
1995
1996 /* Print the file name if we are looking for those without matches and there
1997 were none. If we found a match, we won't have got this far. */
1998
1999 if (filenames == FN_NOMATCH_ONLY)
2000 {
2001 fprintf(stdout, "%s\n", printname);
2002 return 0;
2003 }
2004
2005 /* Print the match count if wanted */
2006
2007 if (count_only)
2008 {
2009 if (count > 0 || !omit_zero_count)
2010 {
2011 if (printname != NULL && filenames != FN_NONE)
2012 fprintf(stdout, "%s:", printname);
2013 fprintf(stdout, "%d\n", count);
2014 }
2015 }
2016
2017 return rc;
2018 }
2019
2020
2021
2022 /*************************************************
2023 * Grep a file or recurse into a directory *
2024 *************************************************/
2025
2026 /* Given a path name, if it's a directory, scan all the files if we are
2027 recursing; if it's a file, grep it.
2028
2029 Arguments:
2030 pathname the path to investigate
2031 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
2032 only_one_at_top TRUE if the path is the only one at toplevel
2033
2034 Returns: -1 the file/directory was skipped
2035 0 if there was at least one match
2036 1 if there were no matches
2037 2 there was some kind of error
2038
2039 However, file opening failures are suppressed if "silent" is set.
2040 */
2041
2042 static int
2043 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
2044 {
2045 int rc = 1;
2046 int frtype;
2047 void *handle;
2048 char *lastcomp;
2049 FILE *in = NULL; /* Ensure initialized */
2050
2051 #ifdef SUPPORT_LIBZ
2052 gzFile ingz = NULL;
2053 #endif
2054
2055 #ifdef SUPPORT_LIBBZ2
2056 BZFILE *inbz2 = NULL;
2057 #endif
2058
2059 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2060 int pathlen;
2061 #endif
2062
2063 /* If the file name is "-" we scan stdin */
2064
2065 if (strcmp(pathname, "-") == 0)
2066 {
2067 return pcregrep(stdin, FR_PLAIN, stdin_name,
2068 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
2069 stdin_name : NULL);
2070 }
2071
2072 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
2073 directories, whereas --include and --exclude apply to everything else. The test
2074 is against the final component of the path. */
2075
2076 lastcomp = strrchr(pathname, FILESEP);
2077 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
2078
2079 /* If the file is a directory, skip if not recursing or if explicitly excluded.
2080 Otherwise, scan the directory and recurse for each path within it. The scanning
2081 code is localized so it can be made system-specific. */
2082
2083 if (isdirectory(pathname))
2084 {
2085 if (dee_action == dee_SKIP ||
2086 !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
2087 return -1;
2088
2089 if (dee_action == dee_RECURSE)
2090 {
2091 char buffer[1024];
2092 char *nextfile;
2093 directory_type *dir = opendirectory(pathname);
2094
2095 if (dir == NULL)
2096 {
2097 if (!silent)
2098 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
2099 strerror(errno));
2100 return 2;
2101 }
2102
2103 while ((nextfile = readdirectory(dir)) != NULL)
2104 {
2105 int frc;
2106 sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
2107 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
2108 if (frc > 1) rc = frc;
2109 else if (frc == 0 && rc == 1) rc = 0;
2110 }
2111
2112 closedirectory(dir);
2113 return rc;
2114 }
2115 }
2116
2117 /* If the file is not a directory and not a regular file, skip it if that's
2118 been requested. Otherwise, check for explicit include/exclude. */
2119
2120 else if ((!isregfile(pathname) && DEE_action == DEE_SKIP) ||
2121 !test_incexc(lastcomp, include_patterns, exclude_patterns))
2122 return -1;
2123
2124 /* Control reaches here if we have a regular file, or if we have a directory
2125 and recursion or skipping was not requested, or if we have anything else and
2126 skipping was not requested. The scan proceeds. If this is the first and only
2127 argument at top level, we don't show the file name, unless we are only showing
2128 the file name, or the filename was forced (-H). */
2129
2130 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2131 pathlen = (int)(strlen(pathname));
2132 #endif
2133
2134 /* Open using zlib if it is supported and the file name ends with .gz. */
2135
2136 #ifdef SUPPORT_LIBZ
2137 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
2138 {
2139 ingz = gzopen(pathname, "rb");
2140 if (ingz == NULL)
2141 {
2142 if (!silent)
2143 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2144 strerror(errno));
2145 return 2;
2146 }
2147 handle = (void *)ingz;
2148 frtype = FR_LIBZ;
2149 }
2150 else
2151 #endif
2152
2153 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
2154
2155 #ifdef SUPPORT_LIBBZ2
2156 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
2157 {
2158 inbz2 = BZ2_bzopen(pathname, "rb");
2159 handle = (void *)inbz2;
2160 frtype = FR_LIBBZ2;
2161 }
2162 else
2163 #endif
2164
2165 /* Otherwise use plain fopen(). The label is so that we can come back here if
2166 an attempt to read a .bz2 file indicates that it really is a plain file. */
2167
2168 #ifdef SUPPORT_LIBBZ2
2169 PLAIN_FILE:
2170 #endif
2171 {
2172 in = fopen(pathname, "rb");
2173 handle = (void *)in;
2174 frtype = FR_PLAIN;
2175 }
2176
2177 /* All the opening methods return errno when they fail. */
2178
2179 if (handle == NULL)
2180 {
2181 if (!silent)
2182 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2183 strerror(errno));
2184 return 2;
2185 }
2186
2187 /* Now grep the file */
2188
2189 rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
2190 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
2191
2192 /* Close in an appropriate manner. */
2193
2194 #ifdef SUPPORT_LIBZ
2195 if (frtype == FR_LIBZ)
2196 gzclose(ingz);
2197 else
2198 #endif
2199
2200 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
2201 read failed. If the error indicates that the file isn't in fact bzipped, try
2202 again as a normal file. */
2203
2204 #ifdef SUPPORT_LIBBZ2
2205 if (frtype == FR_LIBBZ2)
2206 {
2207 if (rc == 3)
2208 {
2209 int errnum;
2210 const char *err = BZ2_bzerror(inbz2, &errnum);
2211 if (errnum == BZ_DATA_ERROR_MAGIC)
2212 {
2213 BZ2_bzclose(inbz2);
2214 goto PLAIN_FILE;
2215 }
2216 else if (!silent)
2217 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
2218 pathname, err);
2219 rc = 2; /* The normal "something went wrong" code */
2220 }
2221 BZ2_bzclose(inbz2);
2222 }
2223 else
2224 #endif
2225
2226 /* Normal file close */
2227
2228 fclose(in);
2229
2230 /* Pass back the yield from pcregrep(). */
2231
2232 return rc;
2233 }
2234
2235
2236
2237 /*************************************************
2238 * Handle a single-letter, no data option *
2239 *************************************************/
2240
2241 static int
2242 handle_option(int letter, int options)
2243 {
2244 switch(letter)
2245 {
2246 case N_FOFFSETS: file_offsets = TRUE; break;
2247 case N_HELP: help(); pcregrep_exit(0);
2248 case N_LBUFFER: line_buffered = TRUE; break;
2249 case N_LOFFSETS: line_offsets = number = TRUE; break;
2250 case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2251 case 'a': binary_files = BIN_TEXT; break;
2252 case 'c': count_only = TRUE; break;
2253 case 'F': process_options |= PO_FIXED_STRINGS; break;
2254 case 'H': filenames = FN_FORCE; break;
2255 case 'I': binary_files = BIN_NOMATCH; break;
2256 case 'h': filenames = FN_NONE; break;
2257 case 'i': options |= PCRE_CASELESS; break;
2258 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2259 case 'L': filenames = FN_NOMATCH_ONLY; break;
2260 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2261 case 'n': number = TRUE; break;
2262
2263 case 'o':
2264 only_matching_last = add_number(0, only_matching_last);
2265 if (only_matching == NULL) only_matching = only_matching_last;
2266 break;
2267
2268 case 'q': quiet = TRUE; break;
2269 case 'r': dee_action = dee_RECURSE; break;
2270 case 's': silent = TRUE; break;
2271 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
2272 case 'v': invert = TRUE; break;
2273 case 'w': process_options |= PO_WORD_MATCH; break;
2274 case 'x': process_options |= PO_LINE_MATCH; break;
2275
2276 case 'V':
2277 fprintf(stdout, "pcregrep version %s\n", pcre_version());
2278 pcregrep_exit(0);
2279 break;
2280
2281 default:
2282 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2283 pcregrep_exit(usage(2));
2284 }
2285
2286 return options;
2287 }
2288
2289
2290
2291
2292 /*************************************************
2293 * Construct printed ordinal *
2294 *************************************************/
2295
2296 /* This turns a number into "1st", "3rd", etc. */
2297
2298 static char *
2299 ordin(int n)
2300 {
2301 static char buffer[8];
2302 char *p = buffer;
2303 sprintf(p, "%d", n);
2304 while (*p != 0) p++;
2305 switch (n%10)
2306 {
2307 case 1: strcpy(p, "st"); break;
2308 case 2: strcpy(p, "nd"); break;
2309 case 3: strcpy(p, "rd"); break;
2310 default: strcpy(p, "th"); break;
2311 }
2312 return buffer;
2313 }
2314
2315
2316
2317 /*************************************************
2318 * Compile a single pattern *
2319 *************************************************/
2320
2321 /* Do nothing if the pattern has already been compiled. This is the case for
2322 include/exclude patterns read from a file.
2323
2324 When the -F option has been used, each "pattern" may be a list of strings,
2325 separated by line breaks. They will be matched literally. We split such a
2326 string and compile the first substring, inserting an additional block into the
2327 pattern chain.
2328
2329 Arguments:
2330 p points to the pattern block
2331 options the PCRE options
2332 popts the processing options
2333 fromfile TRUE if the pattern was read from a file
2334 fromtext file name or identifying text (e.g. "include")
2335 count 0 if this is the only command line pattern, or
2336 number of the command line pattern, or
2337 linenumber for a pattern from a file
2338
2339 Returns: TRUE on success, FALSE after an error
2340 */
2341
2342 static BOOL
2343 compile_pattern(patstr *p, int options, int popts, int fromfile,
2344 const char *fromtext, int count)
2345 {
2346 char buffer[PATBUFSIZE];
2347 const char *error;
2348 char *ps = p->string;
2349 int patlen = strlen(ps);
2350 int errptr;
2351
2352 if (p->compiled != NULL) return TRUE;
2353
2354 if ((popts & PO_FIXED_STRINGS) != 0)
2355 {
2356 int ellength;
2357 char *eop = ps + patlen;
2358 char *pe = end_of_line(ps, eop, &ellength);
2359
2360 if (ellength != 0)
2361 {
2362 if (add_pattern(pe, p) == NULL) return FALSE;
2363 patlen = (int)(pe - ps - ellength);
2364 }
2365 }
2366
2367 sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2368 p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2369 if (p->compiled != NULL) return TRUE;
2370
2371 /* Handle compile errors */
2372
2373 errptr -= (int)strlen(prefix[popts]);
2374 if (errptr > patlen) errptr = patlen;
2375
2376 if (fromfile)
2377 {
2378 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2379 "at offset %d: %s\n", count, fromtext, errptr, error);
2380 }
2381 else
2382 {
2383 if (count == 0)
2384 fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2385 fromtext, errptr, error);
2386 else
2387 fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2388 ordin(count), fromtext, errptr, error);
2389 }
2390
2391 return FALSE;
2392 }
2393
2394
2395
2396 /*************************************************
2397 * Read and compile a file of patterns *
2398 *************************************************/
2399
2400 /* This is used for --filelist, --include-from, and --exclude-from.
2401
2402 Arguments:
2403 name the name of the file; "-" is stdin
2404 patptr pointer to the pattern chain anchor
2405 patlastptr pointer to the last pattern pointer
2406 popts the process options to pass to pattern_compile()
2407
2408 Returns: TRUE if all went well
2409 */
2410
2411 static BOOL
2412 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2413 {
2414 int linenumber = 0;
2415 FILE *f;
2416 char *filename;
2417 char buffer[PATBUFSIZE];
2418
2419 if (strcmp(name, "-") == 0)
2420 {
2421 f = stdin;
2422 filename = stdin_name;
2423 }
2424 else
2425 {
2426 f = fopen(name, "r");
2427 if (f == NULL)
2428 {
2429 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2430 return FALSE;
2431 }
2432 filename = name;
2433 }
2434
2435 while (fgets(buffer, PATBUFSIZE, f) != NULL)
2436 {
2437 char *s = buffer + (int)strlen(buffer);
2438 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2439 *s = 0;
2440 linenumber++;
2441 if (buffer[0] == 0) continue; /* Skip blank lines */
2442
2443 /* Note: this call to add_pattern() puts a pointer to the local variable
2444 "buffer" into the pattern chain. However, that pointer is used only when
2445 compiling the pattern, which happens immediately below, so we flatten it
2446 afterwards, as a precaution against any later code trying to use it. */
2447
2448 *patlastptr = add_pattern(buffer, *patlastptr);
2449 if (*patlastptr == NULL) return FALSE;
2450 if (*patptr == NULL) *patptr = *patlastptr;
2451
2452 /* This loop is needed because compiling a "pattern" when -F is set may add
2453 on additional literal patterns if the original contains a newline. In the
2454 common case, it never will, because fgets() stops at a newline. However,
2455 the -N option can be used to give pcregrep a different newline setting. */
2456
2457 for(;;)
2458 {
2459 if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2460 linenumber))
2461 return FALSE;
2462 (*patlastptr)->string = NULL; /* Insurance */
2463 if ((*patlastptr)->next == NULL) break;
2464 *patlastptr = (*patlastptr)->next;
2465 }
2466 }
2467
2468 if (f != stdin) fclose(f);
2469 return TRUE;
2470 }
2471
2472
2473
2474 /*************************************************
2475 * Main program *
2476 *************************************************/
2477
2478 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2479
2480 int
2481 main(int argc, char **argv)
2482 {
2483 int i, j;
2484 int rc = 1;
2485 BOOL only_one_at_top;
2486 patstr *cp;
2487 fnstr *fn;
2488 const char *locale_from = "--locale";
2489 const char *error;
2490
2491 #ifdef SUPPORT_PCREGREP_JIT
2492 pcre_jit_stack *jit_stack = NULL;
2493 #endif
2494
2495 /* Set the default line ending value from the default in the PCRE library;
2496 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2497 Note that the return values from pcre_config(), though derived from the ASCII
2498 codes, are the same in EBCDIC environments, so we must use the actual values
2499 rather than escapes such as as '\r'. */
2500
2501 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2502 switch(i)
2503 {
2504 default: newline = (char *)"lf"; break;
2505 case 13: newline = (char *)"cr"; break;
2506 case (13 << 8) | 10: newline = (char *)"crlf"; break;
2507 case -1: newline = (char *)"any"; break;
2508 case -2: newline = (char *)"anycrlf"; break;
2509 }
2510
2511 /* Process the options */
2512
2513 for (i = 1; i < argc; i++)
2514 {
2515 option_item *op = NULL;
2516 char *option_data = (char *)""; /* default to keep compiler happy */
2517 BOOL longop;
2518 BOOL longopwasequals = FALSE;
2519
2520 if (argv[i][0] != '-') break;
2521
2522 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2523 but only if we have previously had -e or -f to define the patterns. */
2524
2525 if (argv[i][1] == 0)
2526 {
2527 if (pattern_files != NULL || patterns != NULL) break;
2528 else pcregrep_exit(usage(2));
2529 }
2530
2531 /* Handle a long name option, or -- to terminate the options */
2532
2533 if (argv[i][1] == '-')
2534 {
2535 char *arg = argv[i] + 2;
2536 char *argequals = strchr(arg, '=');
2537
2538 if (*arg == 0) /* -- terminates options */
2539 {
2540 i++;
2541 break; /* out of the options-handling loop */
2542 }
2543
2544 longop = TRUE;
2545
2546 /* Some long options have data that follows after =, for example file=name.
2547 Some options have variations in the long name spelling: specifically, we
2548 allow "regexp" because GNU grep allows it, though I personally go along
2549 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2550 These options are entered in the table as "regex(p)". Options can be in
2551 both these categories. */
2552
2553 for (op = optionlist; op->one_char != 0; op++)
2554 {
2555 char *opbra = strchr(op->long_name, '(');
2556 char *equals = strchr(op->long_name, '=');
2557
2558 /* Handle options with only one spelling of the name */
2559
2560 if (opbra == NULL) /* Does not contain '(' */
2561 {
2562 if (equals == NULL) /* Not thing=data case */
2563 {
2564 if (strcmp(arg, op->long_name) == 0) break;
2565 }
2566 else /* Special case xxx=data */
2567 {
2568 int oplen = (int)(equals - op->long_name);
2569 int arglen = (argequals == NULL)?
2570 (int)strlen(arg) : (int)(argequals - arg);
2571 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2572 {
2573 option_data = arg + arglen;
2574 if (*option_data == '=')
2575 {
2576 option_data++;
2577 longopwasequals = TRUE;
2578 }
2579 break;
2580 }
2581 }
2582 }
2583
2584 /* Handle options with an alternate spelling of the name */
2585
2586 else
2587 {
2588 char buff1[24];
2589 char buff2[24];
2590
2591 int baselen = (int)(opbra - op->long_name);
2592 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2593 int arglen = (argequals == NULL || equals == NULL)?
2594 (int)strlen(arg) : (int)(argequals - arg);
2595
2596 sprintf(buff1, "%.*s", baselen, op->long_name);
2597 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2598
2599 if (strncmp(arg, buff1, arglen) == 0 ||
2600 strncmp(arg, buff2, arglen) == 0)
2601 {
2602 if (equals != NULL && argequals != NULL)
2603 {
2604 option_data = argequals;
2605 if (*option_data == '=')
2606 {
2607 option_data++;
2608 longopwasequals = TRUE;
2609 }
2610 }
2611 break;
2612 }
2613 }
2614 }
2615
2616 if (op->one_char == 0)
2617 {
2618 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2619 pcregrep_exit(usage(2));
2620 }
2621 }
2622
2623 /* Jeffrey Friedl's debugging harness uses these additional options which
2624 are not in the right form for putting in the option table because they use
2625 only one hyphen, yet are more than one character long. By putting them
2626 separately here, they will not get displayed as part of the help() output,
2627 but I don't think Jeffrey will care about that. */
2628
2629 #ifdef JFRIEDL_DEBUG
2630 else if (strcmp(argv[i], "-pre") == 0) {
2631 jfriedl_prefix = argv[++i];
2632 continue;
2633 } else if (strcmp(argv[i], "-post") == 0) {
2634 jfriedl_postfix = argv[++i];
2635 continue;
2636 } else if (strcmp(argv[i], "-XT") == 0) {
2637 sscanf(argv[++i], "%d", &jfriedl_XT);
2638 continue;
2639 } else if (strcmp(argv[i], "-XR") == 0) {
2640 sscanf(argv[++i], "%d", &jfriedl_XR);
2641 continue;
2642 }
2643 #endif
2644
2645
2646 /* One-char options; many that have no data may be in a single argument; we
2647 continue till we hit the last one or one that needs data. */
2648
2649 else
2650 {
2651 char *s = argv[i] + 1;
2652 longop = FALSE;
2653
2654 while (*s != 0)
2655 {
2656 for (op = optionlist; op->one_char != 0; op++)
2657 {
2658 if (*s == op->one_char) break;
2659 }
2660 if (op->one_char == 0)
2661 {
2662 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2663 *s, argv[i]);
2664 pcregrep_exit(usage(2));
2665 }
2666
2667 option_data = s+1;
2668
2669 /* Break out if this is the last character in the string; it's handled
2670 below like a single multi-char option. */
2671
2672 if (*option_data == 0) break;
2673
2674 /* Check for a single-character option that has data: OP_OP_NUMBER(S)
2675 are used for ones that either have a numerical number or defaults, i.e.
2676 the data is optional. If a digit follows, there is data; if not, carry on
2677 with other single-character options in the same string. */
2678
2679 if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
2680 {
2681 if (isdigit((unsigned char)s[1])) break;
2682 }
2683 else /* Check for an option with data */
2684 {
2685 if (op->type != OP_NODATA) break;
2686 }
2687
2688 /* Handle a single-character option with no data, then loop for the
2689 next character in the string. */
2690
2691 pcre_options = handle_option(*s++, pcre_options);
2692 }
2693 }
2694
2695 /* At this point we should have op pointing to a matched option. If the type
2696 is NO_DATA, it means that there is no data, and the option might set
2697 something in the PCRE options. */
2698
2699 if (op->type == OP_NODATA)
2700 {
2701 pcre_options = handle_option(op->one_char, pcre_options);
2702 continue;
2703 }
2704
2705 /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
2706 either has a value or defaults to something. It cannot have data in a
2707 separate item. At the moment, the only such options are "colo(u)r",
2708 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2709
2710 if (*option_data == 0 &&
2711 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
2712 op->type == OP_OP_NUMBERS))
2713 {
2714 switch (op->one_char)
2715 {
2716 case N_COLOUR:
2717 colour_option = (char *)"auto";
2718 break;
2719
2720 case 'o':
2721 only_matching_last = add_number(0, only_matching_last);
2722 if (only_matching == NULL) only_matching = only_matching_last;
2723 break;
2724
2725 #ifdef JFRIEDL_DEBUG
2726 case 'S':
2727 S_arg = 0;
2728 break;
2729 #endif
2730 }
2731 continue;
2732 }
2733
2734 /* Otherwise, find the data string for the option. */
2735
2736 if (*option_data == 0)
2737 {
2738 if (i >= argc - 1 || longopwasequals)
2739 {
2740 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2741 pcregrep_exit(usage(2));
2742 }
2743 option_data = argv[++i];
2744 }
2745
2746 /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
2747 added to a chain of numbers. */
2748
2749 if (op->type == OP_OP_NUMBERS)
2750 {
2751 unsigned long int n = decode_number(option_data, op, longop);
2752 omdatastr *omd = (omdatastr *)op->dataptr;
2753 *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
2754 if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
2755 }
2756
2757 /* If the option type is OP_PATLIST, it's the -e option, or one of the
2758 include/exclude options, which can be called multiple times to create lists
2759 of patterns. */
2760
2761 else if (op->type == OP_PATLIST)
2762 {
2763 patdatastr *pd = (patdatastr *)op->dataptr;
2764 *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2765 if (*(pd->lastptr) == NULL) goto EXIT2;
2766 if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2767 }
2768
2769 /* If the option type is OP_FILELIST, it's one of the options that names a
2770 file. */
2771
2772 else if (op->type == OP_FILELIST)
2773 {
2774 fndatastr *fd = (fndatastr *)op->dataptr;
2775 fn = (fnstr *)malloc(sizeof(fnstr));
2776 if (fn == NULL)
2777 {
2778 fprintf(stderr, "pcregrep: malloc failed\n");
2779 goto EXIT2;
2780 }
2781 fn->next = NULL;
2782 fn->name = option_data;
2783 if (*(fd->anchor) == NULL)
2784 *(fd->anchor) = fn;
2785 else
2786 (*(fd->lastptr))->next = fn;
2787 *(fd->lastptr) = fn;
2788 }
2789
2790 /* Handle OP_BINARY_FILES */
2791
2792 else if (op->type == OP_BINFILES)
2793 {
2794 if (strcmp(option_data, "binary") == 0)
2795 binary_files = BIN_BINARY;
2796 else if (strcmp(option_data, "without-match") == 0)
2797 binary_files = BIN_NOMATCH;
2798 else if (strcmp(option_data, "text") == 0)
2799 binary_files = BIN_TEXT;
2800 else
2801 {
2802 fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2803 option_data);
2804 pcregrep_exit(usage(2));
2805 }
2806 }
2807
2808 /* Otherwise, deal with a single string or numeric data value. */
2809
2810 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2811 op->type != OP_OP_NUMBER)
2812 {
2813 *((char **)op->dataptr) = option_data;
2814 }
2815 else
2816 {
2817 unsigned long int n = decode_number(option_data, op, longop);
2818 if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n;
2819 else *((int *)op->dataptr) = n;
2820 }
2821 }
2822
2823 /* Options have been decoded. If -C was used, its value is used as a default
2824 for -A and -B. */
2825
2826 if (both_context > 0)
2827 {
2828 if (after_context == 0) after_context = both_context;
2829 if (before_context == 0) before_context = both_context;
2830 }
2831
2832 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2833 However, all three set show_only_matching because they display, each in their
2834 own way, only the data that has matched. */
2835
2836 if ((only_matching != NULL && (file_offsets || line_offsets)) ||
2837 (file_offsets && line_offsets))
2838 {
2839 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2840 "and/or --line-offsets\n");
2841 pcregrep_exit(usage(2));
2842 }
2843
2844 if (only_matching != NULL || file_offsets || line_offsets)
2845 show_only_matching = TRUE;
2846
2847 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2848 LC_ALL environment variable is set, and if so, use it. */
2849
2850 if (locale == NULL)
2851 {
2852 locale = getenv("LC_ALL");
2853 locale_from = "LCC_ALL";
2854 }
2855
2856 if (locale == NULL)
2857 {
2858 locale = getenv("LC_CTYPE");
2859 locale_from = "LC_CTYPE";
2860 }
2861
2862 /* If a locale has been provided, set it, and generate the tables the PCRE
2863 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2864
2865 if (locale != NULL)
2866 {
2867 if (setlocale(LC_CTYPE, locale) == NULL)
2868 {
2869 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2870 locale, locale_from);
2871 return 2;
2872 }
2873 pcretables = pcre_maketables();
2874 }
2875
2876 /* Sort out colouring */
2877
2878 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2879 {
2880 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2881 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2882 else
2883 {
2884 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2885 colour_option);
2886 return 2;
2887 }
2888 if (do_colour)
2889 {
2890 char *cs = getenv("PCREGREP_COLOUR");
2891 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2892 if (cs != NULL) colour_string = cs;
2893 }
2894 }
2895
2896 /* Interpret the newline type; the default settings are Unix-like. */
2897
2898 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2899 {
2900 pcre_options |= PCRE_NEWLINE_CR;
2901 endlinetype = EL_CR;
2902 }
2903 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2904 {
2905 pcre_options |= PCRE_NEWLINE_LF;
2906 endlinetype = EL_LF;
2907 }
2908 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2909 {
2910 pcre_options |= PCRE_NEWLINE_CRLF;
2911 endlinetype = EL_CRLF;
2912 }
2913 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2914 {
2915 pcre_options |= PCRE_NEWLINE_ANY;
2916 endlinetype = EL_ANY;
2917 }
2918 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2919 {
2920 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2921 endlinetype = EL_ANYCRLF;
2922 }
2923 else
2924 {
2925 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2926 return 2;
2927 }
2928
2929 /* Interpret the text values for -d and -D */
2930
2931 if (dee_option != NULL)
2932 {
2933 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2934 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2935 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2936 else
2937 {
2938 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2939 return 2;
2940 }
2941 }
2942
2943 if (DEE_option != NULL)
2944 {
2945 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2946 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2947 else
2948 {
2949 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2950 return 2;
2951 }
2952 }
2953
2954 /* Check the values for Jeffrey Friedl's debugging options. */
2955
2956 #ifdef JFRIEDL_DEBUG
2957 if (S_arg > 9)
2958 {
2959 fprintf(stderr, "pcregrep: bad value for -S option\n");
2960 return 2;
2961 }
2962 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2963 {
2964 if (jfriedl_XT == 0) jfriedl_XT = 1;
2965 if (jfriedl_XR == 0) jfriedl_XR = 1;
2966 }
2967 #endif
2968
2969 /* Get memory for the main buffer. */
2970
2971 bufsize = 3*bufthird;
2972 main_buffer = (char *)malloc(bufsize);
2973
2974 if (main_buffer == NULL)
2975 {
2976 fprintf(stderr, "pcregrep: malloc failed\n");
2977 goto EXIT2;
2978 }
2979
2980 /* If no patterns were provided by -e, and there are no files provided by -f,
2981 the first argument is the one and only pattern, and it must exist. */
2982
2983 if (patterns == NULL && pattern_files == NULL)
2984 {
2985 if (i >= argc) return usage(2);
2986 patterns = patterns_last = add_pattern(argv[i++], NULL);
2987 if (patterns == NULL) goto EXIT2;
2988 }
2989
2990 /* Compile the patterns that were provided on the command line, either by
2991 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
2992 after all the command-line options are read so that we know which PCRE options
2993 to use. When -F is used, compile_pattern() may add another block into the
2994 chain, so we must not access the next pointer till after the compile. */
2995
2996 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
2997 {
2998 if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
2999 (j == 1 && patterns->next == NULL)? 0 : j))
3000 goto EXIT2;
3001 }
3002
3003 /* Read and compile the regular expressions that are provided in files. */
3004
3005 for (fn = pattern_files; fn != NULL; fn = fn->next)
3006 {
3007 if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
3008 goto EXIT2;
3009 }
3010
3011 /* Study the regular expressions, as we will be running them many times. If an
3012 extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
3013 returned, even if studying produces no data. */
3014
3015 if (match_limit > 0 || match_limit_recursion > 0)
3016 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3017
3018 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
3019
3020 #ifdef SUPPORT_PCREGREP_JIT
3021 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3022 jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
3023 #endif
3024
3025 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3026 {
3027 cp->hint = pcre_study(cp->compiled, study_options, &error);
3028 if (error != NULL)
3029 {
3030 char s[16];
3031 if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
3032 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
3033 goto EXIT2;
3034 }
3035 #ifdef SUPPORT_PCREGREP_JIT
3036 if (jit_stack != NULL && cp->hint != NULL)
3037 pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
3038 #endif
3039 }
3040
3041 /* If --match-limit or --recursion-limit was set, put the value(s) into the
3042 pcre_extra block for each pattern. There will always be an extra block because
3043 of the use of PCRE_STUDY_EXTRA_NEEDED above. */
3044
3045 for (cp = patterns; cp != NULL; cp = cp->next)
3046 {
3047 if (match_limit > 0)
3048 {
3049 cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
3050 cp->hint->match_limit = match_limit;
3051 }
3052
3053 if (match_limit_recursion > 0)
3054 {
3055 cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3056 cp->hint->match_limit_recursion = match_limit_recursion;
3057 }
3058 }
3059
3060 /* If there are include or exclude patterns read from the command line, compile
3061 them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
3062 0. */
3063
3064 for (j = 0; j < 4; j++)
3065 {
3066 int k;
3067 for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
3068 {
3069 if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
3070 (k == 1 && cp->next == NULL)? 0 : k))
3071 goto EXIT2;
3072 }
3073 }
3074
3075 /* Read and compile include/exclude patterns from files. */
3076
3077 for (fn = include_from; fn != NULL; fn = fn->next)
3078 {
3079 if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
3080 goto EXIT2;
3081 }
3082
3083 for (fn = exclude_from; fn != NULL; fn = fn->next)
3084 {
3085 if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
3086 goto EXIT2;
3087 }
3088
3089 /* If there are no files that contain lists of files to search, and there are
3090 no file arguments, search stdin, and then exit. */
3091
3092 if (file_lists == NULL && i >= argc)
3093 {
3094 rc = pcregrep(stdin, FR_PLAIN, stdin_name,
3095 (filenames > FN_DEFAULT)? stdin_name : NULL);
3096 goto EXIT;
3097 }
3098
3099 /* If any files that contains a list of files to search have been specified,
3100 read them line by line and search the given files. */
3101
3102 for (fn = file_lists; fn != NULL; fn = fn->next)
3103 {
3104 char buffer[PATBUFSIZE];
3105 FILE *fl;
3106 if (strcmp(fn->name, "-") == 0) fl = stdin; else
3107 {
3108 fl = fopen(fn->name, "rb");
3109 if (fl == NULL)
3110 {
3111 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
3112 strerror(errno));
3113 goto EXIT2;
3114 }
3115 }
3116 while (fgets(buffer, PATBUFSIZE, fl) != NULL)
3117 {
3118 int frc;
3119 char *end = buffer + (int)strlen(buffer);
3120 while (end > buffer && isspace(end[-1])) end--;
3121 *end = 0;
3122 if (*buffer != 0)
3123 {
3124 frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3125 if (frc > 1) rc = frc;
3126 else if (frc == 0 && rc == 1) rc = 0;
3127 }
3128 }
3129 if (fl != stdin) fclose(fl);
3130 }
3131
3132 /* After handling file-list, work through remaining arguments. Pass in the fact
3133 that there is only one argument at top level - this suppresses the file name if
3134 the argument is not a directory and filenames are not otherwise forced. */
3135
3136 only_one_at_top = i == argc - 1 && file_lists == NULL;
3137
3138 for (; i < argc; i++)
3139 {
3140 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
3141 only_one_at_top);
3142 if (frc > 1) rc = frc;
3143 else if (frc == 0 && rc == 1) rc = 0;
3144 }
3145
3146 EXIT:
3147 #ifdef SUPPORT_PCREGREP_JIT
3148 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3149 #endif
3150
3151 if (main_buffer != NULL) free(main_buffer);
3152
3153 free_pattern_chain(patterns);
3154 free_pattern_chain(include_patterns);
3155 free_pattern_chain(include_dir_patterns);
3156 free_pattern_chain(exclude_patterns);
3157 free_pattern_chain(exclude_dir_patterns);
3158
3159 free_file_chain(exclude_from);
3160 free_file_chain(include_from);
3161 free_file_chain(pattern_files);
3162 free_file_chain(file_lists);
3163
3164 while (only_matching != NULL)
3165 {
3166 omstr *this = only_matching;
3167 only_matching = this->next;
3168 free(this);
3169 }
3170
3171 pcregrep_exit(rc);
3172
3173 EXIT2:
3174 rc = 2;
3175 goto EXIT;
3176 }
3177
3178 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5