/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1035 - (show annotations)
Mon Sep 10 16:23:12 2012 UTC (7 years, 2 months ago) by ph10
File MIME type: text/plain
File size: 88838 byte(s)
Make pcregrep use PCRE_STUDY_EXTRA_NEEDED.
1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2012 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65
66 #include "pcre.h"
67
68 #define FALSE 0
69 #define TRUE 1
70
71 typedef int BOOL;
72
73 #define OFFSET_SIZE 99
74
75 #if BUFSIZ > 8192
76 #define MAXPATLEN BUFSIZ
77 #else
78 #define MAXPATLEN 8192
79 #endif
80
81 #define PATBUFSIZE (MAXPATLEN + 10) /* Allows for prefix+suffix */
82
83 /* Values for the "filenames" variable, which specifies options for file name
84 output. The order is important; it is assumed that a file name is wanted for
85 all values greater than FN_DEFAULT. */
86
87 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
88
89 /* File reading styles */
90
91 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
92
93 /* Actions for the -d and -D options */
94
95 enum { dee_READ, dee_SKIP, dee_RECURSE };
96 enum { DEE_READ, DEE_SKIP };
97
98 /* Actions for special processing options (flag bits) */
99
100 #define PO_WORD_MATCH 0x0001
101 #define PO_LINE_MATCH 0x0002
102 #define PO_FIXED_STRINGS 0x0004
103
104 /* Line ending types */
105
106 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
107
108 /* Binary file options */
109
110 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
111
112 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
113 environments), a warning is issued if the value of fwrite() is ignored.
114 Unfortunately, casting to (void) does not suppress the warning. To get round
115 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
116 apply to fprintf(). */
117
118 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
119
120
121
122 /*************************************************
123 * Global variables *
124 *************************************************/
125
126 /* Jeffrey Friedl has some debugging requirements that are not part of the
127 regular code. */
128
129 #ifdef JFRIEDL_DEBUG
130 static int S_arg = -1;
131 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
132 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
133 static const char *jfriedl_prefix = "";
134 static const char *jfriedl_postfix = "";
135 #endif
136
137 static int endlinetype;
138
139 static char *colour_string = (char *)"1;31";
140 static char *colour_option = NULL;
141 static char *dee_option = NULL;
142 static char *DEE_option = NULL;
143 static char *locale = NULL;
144 static char *main_buffer = NULL;
145 static char *newline = NULL;
146 static char *stdin_name = (char *)"(standard input)";
147
148 static const unsigned char *pcretables = NULL;
149
150 static int after_context = 0;
151 static int before_context = 0;
152 static int binary_files = BIN_BINARY;
153 static int both_context = 0;
154 static int bufthird = PCREGREP_BUFSIZE;
155 static int bufsize = 3*PCREGREP_BUFSIZE;
156
157 #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
158 static int dee_action = dee_SKIP;
159 #else
160 static int dee_action = dee_READ;
161 #endif
162
163 static int DEE_action = DEE_READ;
164 static int error_count = 0;
165 static int filenames = FN_DEFAULT;
166 static int only_matching = -1;
167 static int pcre_options = 0;
168 static int process_options = 0;
169
170 #ifdef SUPPORT_PCREGREP_JIT
171 static int study_options = PCRE_STUDY_JIT_COMPILE;
172 #else
173 static int study_options = 0;
174 #endif
175
176 static unsigned long int match_limit = 0;
177 static unsigned long int match_limit_recursion = 0;
178
179 static BOOL count_only = FALSE;
180 static BOOL do_colour = FALSE;
181 static BOOL file_offsets = FALSE;
182 static BOOL hyphenpending = FALSE;
183 static BOOL invert = FALSE;
184 static BOOL line_buffered = FALSE;
185 static BOOL line_offsets = FALSE;
186 static BOOL multiline = FALSE;
187 static BOOL number = FALSE;
188 static BOOL omit_zero_count = FALSE;
189 static BOOL resource_error = FALSE;
190 static BOOL quiet = FALSE;
191 static BOOL silent = FALSE;
192 static BOOL utf8 = FALSE;
193
194 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
195
196 typedef struct fnstr {
197 struct fnstr *next;
198 char *name;
199 } fnstr;
200
201 static fnstr *exclude_from = NULL;
202 static fnstr *exclude_from_last = NULL;
203 static fnstr *include_from = NULL;
204 static fnstr *include_from_last = NULL;
205
206 static fnstr *file_lists = NULL;
207 static fnstr *file_lists_last = NULL;
208 static fnstr *pattern_files = NULL;
209 static fnstr *pattern_files_last = NULL;
210
211 /* Structure for holding the two variables that describe a file name chain. */
212
213 typedef struct fndatastr {
214 fnstr **anchor;
215 fnstr **lastptr;
216 } fndatastr;
217
218 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
219 static fndatastr include_from_data = { &include_from, &include_from_last };
220 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
221 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
222
223 /* Structure for pattern and its compiled form; used for matching patterns and
224 also for include/exclude patterns. */
225
226 typedef struct patstr {
227 struct patstr *next;
228 char *string;
229 pcre *compiled;
230 pcre_extra *hint;
231 } patstr;
232
233 static patstr *patterns = NULL;
234 static patstr *patterns_last = NULL;
235 static patstr *include_patterns = NULL;
236 static patstr *include_patterns_last = NULL;
237 static patstr *exclude_patterns = NULL;
238 static patstr *exclude_patterns_last = NULL;
239 static patstr *include_dir_patterns = NULL;
240 static patstr *include_dir_patterns_last = NULL;
241 static patstr *exclude_dir_patterns = NULL;
242 static patstr *exclude_dir_patterns_last = NULL;
243
244 /* Structure holding the two variables that describe a pattern chain. A pointer
245 to such structures is used for each appropriate option. */
246
247 typedef struct patdatastr {
248 patstr **anchor;
249 patstr **lastptr;
250 } patdatastr;
251
252 static patdatastr match_patdata = { &patterns, &patterns_last };
253 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
254 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
255 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
256 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
257
258 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
259 &include_dir_patterns, &exclude_dir_patterns };
260
261 static const char *incexname[4] = { "--include", "--exclude",
262 "--include-dir", "--exclude-dir" };
263
264 /* Structure for options and list of them */
265
266 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
267 OP_OP_NUMBER, OP_PATLIST, OP_FILELIST, OP_BINFILES };
268
269 typedef struct option_item {
270 int type;
271 int one_char;
272 void *dataptr;
273 const char *long_name;
274 const char *help_text;
275 } option_item;
276
277 /* Options without a single-letter equivalent get a negative value. This can be
278 used to identify them. */
279
280 #define N_COLOUR (-1)
281 #define N_EXCLUDE (-2)
282 #define N_EXCLUDE_DIR (-3)
283 #define N_HELP (-4)
284 #define N_INCLUDE (-5)
285 #define N_INCLUDE_DIR (-6)
286 #define N_LABEL (-7)
287 #define N_LOCALE (-8)
288 #define N_NULL (-9)
289 #define N_LOFFSETS (-10)
290 #define N_FOFFSETS (-11)
291 #define N_LBUFFER (-12)
292 #define N_M_LIMIT (-13)
293 #define N_M_LIMIT_REC (-14)
294 #define N_BUFSIZE (-15)
295 #define N_NOJIT (-16)
296 #define N_FILE_LIST (-17)
297 #define N_BINARY_FILES (-18)
298 #define N_EXCLUDE_FROM (-19)
299 #define N_INCLUDE_FROM (-20)
300
301 static option_item optionlist[] = {
302 { OP_NODATA, N_NULL, NULL, "", "terminate options" },
303 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
304 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
305 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
306 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
307 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
308 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" },
309 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
310 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
311 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
312 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
313 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
314 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
315 { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
316 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
317 { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
318 { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
319 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
320 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
321 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
322 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
323 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
324 #ifdef SUPPORT_PCREGREP_JIT
325 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
326 #else
327 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcregrep does not support JIT" },
328 #endif
329 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
330 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
331 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
332 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
333 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
334 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
335 { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
336 { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
337 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
338 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
339 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
340 { OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" },
341 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
342 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
343 { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
344 { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
345 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
346 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
347 { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
348 { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
349
350 /* These two were accidentally implemented with underscores instead of
351 hyphens in the option names. As this was not discovered for several releases,
352 the incorrect versions are left in the table for compatibility. However, the
353 --help function misses out any option that has an underscore in its name. */
354
355 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
356 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
357
358 #ifdef JFRIEDL_DEBUG
359 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
360 #endif
361 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
362 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
363 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
364 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
365 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
366 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
367 { OP_NODATA, 0, NULL, NULL, NULL }
368 };
369
370 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
371 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
372 that the combination of -w and -x has the same effect as -x on its own, so we
373 can treat them as the same. Note that the MAXPATLEN macro assumes the longest
374 prefix+suffix is 10 characters; if anything longer is added, it must be
375 adjusted. */
376
377 static const char *prefix[] = {
378 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
379
380 static const char *suffix[] = {
381 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
382
383 /* UTF-8 tables - used only when the newline setting is "any". */
384
385 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
386
387 const char utf8_table4[] = {
388 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
389 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
390 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
391 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
392
393
394
395 /*************************************************
396 * Add item to chain of patterns *
397 *************************************************/
398
399 /* Used to add an item onto a chain, or just return an unconnected item if the
400 "after" argument is NULL.
401
402 Arguments:
403 s pattern string to add
404 after if not NULL points to item to insert after
405
406 Returns: new pattern block, or NULL after malloc failure
407 */
408
409 static patstr *
410 add_pattern(char *s, patstr *after)
411 {
412 patstr *p = (patstr *)malloc(sizeof(patstr));
413 if (p == NULL)
414 {
415 fprintf(stderr, "pcregrep: malloc failed\n");
416 return NULL;
417 }
418 if (strlen(s) > MAXPATLEN)
419 {
420 fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
421 MAXPATLEN);
422 return NULL;
423 }
424 p->next = NULL;
425 p->string = s;
426 p->compiled = NULL;
427 p->hint = NULL;
428
429 if (after != NULL)
430 {
431 p->next = after->next;
432 after->next = p;
433 }
434 return p;
435 }
436
437
438 /*************************************************
439 * Free chain of patterns *
440 *************************************************/
441
442 /* Used for several chains of patterns.
443
444 Argument: pointer to start of chain
445 Returns: nothing
446 */
447
448 static void
449 free_pattern_chain(patstr *pc)
450 {
451 while (pc != NULL)
452 {
453 patstr *p = pc;
454 pc = p->next;
455 if (p->hint != NULL) pcre_free_study(p->hint);
456 if (p->compiled != NULL) pcre_free(p->compiled);
457 free(p);
458 }
459 }
460
461
462 /*************************************************
463 * Free chain of file names *
464 *************************************************/
465
466 /*
467 Argument: pointer to start of chain
468 Returns: nothing
469 */
470
471 static void
472 free_file_chain(fnstr *fn)
473 {
474 while (fn != NULL)
475 {
476 fnstr *f = fn;
477 fn = f->next;
478 free(f);
479 }
480 }
481
482
483 /*************************************************
484 * Exit from the program *
485 *************************************************/
486
487 /* If there has been a resource error, give a suitable message.
488
489 Argument: the return code
490 Returns: does not return
491 */
492
493 static void
494 pcregrep_exit(int rc)
495 {
496 if (resource_error)
497 {
498 fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
499 "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
500 PCRE_ERROR_JIT_STACKLIMIT);
501 fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
502 }
503
504 exit(rc);
505 }
506
507
508 /*************************************************
509 * OS-specific functions *
510 *************************************************/
511
512 /* These functions are defined so that they can be made system specific,
513 although at present the only ones are for Unix, Win32, and for "no support". */
514
515
516 /************* Directory scanning in Unix ***********/
517
518 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
519 #include <sys/types.h>
520 #include <sys/stat.h>
521 #include <dirent.h>
522
523 typedef DIR directory_type;
524 #define FILESEP '/'
525
526 static int
527 isdirectory(char *filename)
528 {
529 struct stat statbuf;
530 if (stat(filename, &statbuf) < 0)
531 return 0; /* In the expectation that opening as a file will fail */
532 return (statbuf.st_mode & S_IFMT) == S_IFDIR;
533 }
534
535 static directory_type *
536 opendirectory(char *filename)
537 {
538 return opendir(filename);
539 }
540
541 static char *
542 readdirectory(directory_type *dir)
543 {
544 for (;;)
545 {
546 struct dirent *dent = readdir(dir);
547 if (dent == NULL) return NULL;
548 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
549 return dent->d_name;
550 }
551 /* Control never reaches here */
552 }
553
554 static void
555 closedirectory(directory_type *dir)
556 {
557 closedir(dir);
558 }
559
560
561 /************* Test for regular file in Unix **********/
562
563 static int
564 isregfile(char *filename)
565 {
566 struct stat statbuf;
567 if (stat(filename, &statbuf) < 0)
568 return 1; /* In the expectation that opening as a file will fail */
569 return (statbuf.st_mode & S_IFMT) == S_IFREG;
570 }
571
572
573 /************* Test for a terminal in Unix **********/
574
575 static BOOL
576 is_stdout_tty(void)
577 {
578 return isatty(fileno(stdout));
579 }
580
581 static BOOL
582 is_file_tty(FILE *f)
583 {
584 return isatty(fileno(f));
585 }
586
587
588 /************* Directory scanning in Win32 ***********/
589
590 /* I (Philip Hazel) have no means of testing this code. It was contributed by
591 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
592 when it did not exist. David Byron added a patch that moved the #include of
593 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
594 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
595 undefined when it is indeed undefined. */
596
597 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
598
599 #ifndef STRICT
600 # define STRICT
601 #endif
602 #ifndef WIN32_LEAN_AND_MEAN
603 # define WIN32_LEAN_AND_MEAN
604 #endif
605
606 #include <windows.h>
607
608 #ifndef INVALID_FILE_ATTRIBUTES
609 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
610 #endif
611
612 typedef struct directory_type
613 {
614 HANDLE handle;
615 BOOL first;
616 WIN32_FIND_DATA data;
617 } directory_type;
618
619 #define FILESEP '/'
620
621 int
622 isdirectory(char *filename)
623 {
624 DWORD attr = GetFileAttributes(filename);
625 if (attr == INVALID_FILE_ATTRIBUTES)
626 return 0;
627 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
628 }
629
630 directory_type *
631 opendirectory(char *filename)
632 {
633 size_t len;
634 char *pattern;
635 directory_type *dir;
636 DWORD err;
637 len = strlen(filename);
638 pattern = (char *)malloc(len + 3);
639 dir = (directory_type *)malloc(sizeof(*dir));
640 if ((pattern == NULL) || (dir == NULL))
641 {
642 fprintf(stderr, "pcregrep: malloc failed\n");
643 pcregrep_exit(2);
644 }
645 memcpy(pattern, filename, len);
646 memcpy(&(pattern[len]), "\\*", 3);
647 dir->handle = FindFirstFile(pattern, &(dir->data));
648 if (dir->handle != INVALID_HANDLE_VALUE)
649 {
650 free(pattern);
651 dir->first = TRUE;
652 return dir;
653 }
654 err = GetLastError();
655 free(pattern);
656 free(dir);
657 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
658 return NULL;
659 }
660
661 char *
662 readdirectory(directory_type *dir)
663 {
664 for (;;)
665 {
666 if (!dir->first)
667 {
668 if (!FindNextFile(dir->handle, &(dir->data)))
669 return NULL;
670 }
671 else
672 {
673 dir->first = FALSE;
674 }
675 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
676 return dir->data.cFileName;
677 }
678 #ifndef _MSC_VER
679 return NULL; /* Keep compiler happy; never executed */
680 #endif
681 }
682
683 void
684 closedirectory(directory_type *dir)
685 {
686 FindClose(dir->handle);
687 free(dir);
688 }
689
690
691 /************* Test for regular file in Win32 **********/
692
693 /* I don't know how to do this, or if it can be done; assume all paths are
694 regular if they are not directories. */
695
696 int isregfile(char *filename)
697 {
698 return !isdirectory(filename);
699 }
700
701
702 /************* Test for a terminal in Win32 **********/
703
704 /* I don't know how to do this; assume never */
705
706 static BOOL
707 is_stdout_tty(void)
708 {
709 return FALSE;
710 }
711
712 static BOOL
713 is_file_tty(FILE *f)
714 {
715 return FALSE;
716 }
717
718
719 /************* Directory scanning when we can't do it ***********/
720
721 /* The type is void, and apart from isdirectory(), the functions do nothing. */
722
723 #else
724
725 #define FILESEP 0
726 typedef void directory_type;
727
728 int isdirectory(char *filename) { return 0; }
729 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
730 char *readdirectory(directory_type *dir) { return (char*)0;}
731 void closedirectory(directory_type *dir) {}
732
733
734 /************* Test for regular when we can't do it **********/
735
736 /* Assume all files are regular. */
737
738 int isregfile(char *filename) { return 1; }
739
740
741 /************* Test for a terminal when we can't do it **********/
742
743 static BOOL
744 is_stdout_tty(void)
745 {
746 return FALSE;
747 }
748
749 static BOOL
750 is_file_tty(FILE *f)
751 {
752 return FALSE;
753 }
754
755 #endif
756
757
758
759 #ifndef HAVE_STRERROR
760 /*************************************************
761 * Provide strerror() for non-ANSI libraries *
762 *************************************************/
763
764 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
765 in their libraries, but can provide the same facility by this simple
766 alternative function. */
767
768 extern int sys_nerr;
769 extern char *sys_errlist[];
770
771 char *
772 strerror(int n)
773 {
774 if (n < 0 || n >= sys_nerr) return "unknown error number";
775 return sys_errlist[n];
776 }
777 #endif /* HAVE_STRERROR */
778
779
780
781 /*************************************************
782 * Test exclude/includes *
783 *************************************************/
784
785 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
786 there are no includes, the path must match an include pattern.
787
788 Arguments:
789 path the path to be matched
790 ip the chain of include patterns
791 ep the chain of exclude patterns
792
793 Returns: TRUE if the path is not excluded
794 */
795
796 static BOOL
797 test_incexc(char *path, patstr *ip, patstr *ep)
798 {
799 int plen = strlen(path);
800
801 for (; ep != NULL; ep = ep->next)
802 {
803 if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
804 return FALSE;
805 }
806
807 if (ip == NULL) return TRUE;
808
809 for (; ip != NULL; ip = ip->next)
810 {
811 if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
812 return TRUE;
813 }
814
815 return FALSE;
816 }
817
818
819
820 /*************************************************
821 * Read one line of input *
822 *************************************************/
823
824 /* Normally, input is read using fread() into a large buffer, so many lines may
825 be read at once. However, doing this for tty input means that no output appears
826 until a lot of input has been typed. Instead, tty input is handled line by
827 line. We cannot use fgets() for this, because it does not stop at a binary
828 zero, and therefore there is no way of telling how many characters it has read,
829 because there may be binary zeros embedded in the data.
830
831 Arguments:
832 buffer the buffer to read into
833 length the maximum number of characters to read
834 f the file
835
836 Returns: the number of characters read, zero at end of file
837 */
838
839 static unsigned int
840 read_one_line(char *buffer, int length, FILE *f)
841 {
842 int c;
843 int yield = 0;
844 while ((c = fgetc(f)) != EOF)
845 {
846 buffer[yield++] = c;
847 if (c == '\n' || yield >= length) break;
848 }
849 return yield;
850 }
851
852
853
854 /*************************************************
855 * Find end of line *
856 *************************************************/
857
858 /* The length of the endline sequence that is found is set via lenptr. This may
859 be zero at the very end of the file if there is no line-ending sequence there.
860
861 Arguments:
862 p current position in line
863 endptr end of available data
864 lenptr where to put the length of the eol sequence
865
866 Returns: pointer after the last byte of the line,
867 including the newline byte(s)
868 */
869
870 static char *
871 end_of_line(char *p, char *endptr, int *lenptr)
872 {
873 switch(endlinetype)
874 {
875 default: /* Just in case */
876 case EL_LF:
877 while (p < endptr && *p != '\n') p++;
878 if (p < endptr)
879 {
880 *lenptr = 1;
881 return p + 1;
882 }
883 *lenptr = 0;
884 return endptr;
885
886 case EL_CR:
887 while (p < endptr && *p != '\r') p++;
888 if (p < endptr)
889 {
890 *lenptr = 1;
891 return p + 1;
892 }
893 *lenptr = 0;
894 return endptr;
895
896 case EL_CRLF:
897 for (;;)
898 {
899 while (p < endptr && *p != '\r') p++;
900 if (++p >= endptr)
901 {
902 *lenptr = 0;
903 return endptr;
904 }
905 if (*p == '\n')
906 {
907 *lenptr = 2;
908 return p + 1;
909 }
910 }
911 break;
912
913 case EL_ANYCRLF:
914 while (p < endptr)
915 {
916 int extra = 0;
917 register int c = *((unsigned char *)p);
918
919 if (utf8 && c >= 0xc0)
920 {
921 int gcii, gcss;
922 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
923 gcss = 6*extra;
924 c = (c & utf8_table3[extra]) << gcss;
925 for (gcii = 1; gcii <= extra; gcii++)
926 {
927 gcss -= 6;
928 c |= (p[gcii] & 0x3f) << gcss;
929 }
930 }
931
932 p += 1 + extra;
933
934 switch (c)
935 {
936 case '\n':
937 *lenptr = 1;
938 return p;
939
940 case '\r':
941 if (p < endptr && *p == '\n')
942 {
943 *lenptr = 2;
944 p++;
945 }
946 else *lenptr = 1;
947 return p;
948
949 default:
950 break;
951 }
952 } /* End of loop for ANYCRLF case */
953
954 *lenptr = 0; /* Must have hit the end */
955 return endptr;
956
957 case EL_ANY:
958 while (p < endptr)
959 {
960 int extra = 0;
961 register int c = *((unsigned char *)p);
962
963 if (utf8 && c >= 0xc0)
964 {
965 int gcii, gcss;
966 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
967 gcss = 6*extra;
968 c = (c & utf8_table3[extra]) << gcss;
969 for (gcii = 1; gcii <= extra; gcii++)
970 {
971 gcss -= 6;
972 c |= (p[gcii] & 0x3f) << gcss;
973 }
974 }
975
976 p += 1 + extra;
977
978 switch (c)
979 {
980 case '\n': /* LF */
981 case '\v': /* VT */
982 case '\f': /* FF */
983 *lenptr = 1;
984 return p;
985
986 case '\r': /* CR */
987 if (p < endptr && *p == '\n')
988 {
989 *lenptr = 2;
990 p++;
991 }
992 else *lenptr = 1;
993 return p;
994
995 #ifndef EBCDIC
996 case 0x85: /* Unicode NEL */
997 *lenptr = utf8? 2 : 1;
998 return p;
999
1000 case 0x2028: /* Unicode LS */
1001 case 0x2029: /* Unicode PS */
1002 *lenptr = 3;
1003 return p;
1004 #endif /* Not EBCDIC */
1005
1006 default:
1007 break;
1008 }
1009 } /* End of loop for ANY case */
1010
1011 *lenptr = 0; /* Must have hit the end */
1012 return endptr;
1013 } /* End of overall switch */
1014 }
1015
1016
1017
1018 /*************************************************
1019 * Find start of previous line *
1020 *************************************************/
1021
1022 /* This is called when looking back for before lines to print.
1023
1024 Arguments:
1025 p start of the subsequent line
1026 startptr start of available data
1027
1028 Returns: pointer to the start of the previous line
1029 */
1030
1031 static char *
1032 previous_line(char *p, char *startptr)
1033 {
1034 switch(endlinetype)
1035 {
1036 default: /* Just in case */
1037 case EL_LF:
1038 p--;
1039 while (p > startptr && p[-1] != '\n') p--;
1040 return p;
1041
1042 case EL_CR:
1043 p--;
1044 while (p > startptr && p[-1] != '\n') p--;
1045 return p;
1046
1047 case EL_CRLF:
1048 for (;;)
1049 {
1050 p -= 2;
1051 while (p > startptr && p[-1] != '\n') p--;
1052 if (p <= startptr + 1 || p[-2] == '\r') return p;
1053 }
1054 return p; /* But control should never get here */
1055
1056 case EL_ANY:
1057 case EL_ANYCRLF:
1058 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1059 if (utf8) while ((*p & 0xc0) == 0x80) p--;
1060
1061 while (p > startptr)
1062 {
1063 register int c;
1064 char *pp = p - 1;
1065
1066 if (utf8)
1067 {
1068 int extra = 0;
1069 while ((*pp & 0xc0) == 0x80) pp--;
1070 c = *((unsigned char *)pp);
1071 if (c >= 0xc0)
1072 {
1073 int gcii, gcss;
1074 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1075 gcss = 6*extra;
1076 c = (c & utf8_table3[extra]) << gcss;
1077 for (gcii = 1; gcii <= extra; gcii++)
1078 {
1079 gcss -= 6;
1080 c |= (pp[gcii] & 0x3f) << gcss;
1081 }
1082 }
1083 }
1084 else c = *((unsigned char *)pp);
1085
1086 if (endlinetype == EL_ANYCRLF) switch (c)
1087 {
1088 case '\n': /* LF */
1089 case '\r': /* CR */
1090 return p;
1091
1092 default:
1093 break;
1094 }
1095
1096 else switch (c)
1097 {
1098 case '\n': /* LF */
1099 case '\v': /* VT */
1100 case '\f': /* FF */
1101 case '\r': /* CR */
1102 #ifndef EBCDIE
1103 case 0x85: /* Unicode NEL */
1104 case 0x2028: /* Unicode LS */
1105 case 0x2029: /* Unicode PS */
1106 #endif /* Not EBCDIC */
1107 return p;
1108
1109 default:
1110 break;
1111 }
1112
1113 p = pp; /* Back one character */
1114 } /* End of loop for ANY case */
1115
1116 return startptr; /* Hit start of data */
1117 } /* End of overall switch */
1118 }
1119
1120
1121
1122
1123
1124 /*************************************************
1125 * Print the previous "after" lines *
1126 *************************************************/
1127
1128 /* This is called if we are about to lose said lines because of buffer filling,
1129 and at the end of the file. The data in the line is written using fwrite() so
1130 that a binary zero does not terminate it.
1131
1132 Arguments:
1133 lastmatchnumber the number of the last matching line, plus one
1134 lastmatchrestart where we restarted after the last match
1135 endptr end of available data
1136 printname filename for printing
1137
1138 Returns: nothing
1139 */
1140
1141 static void
1142 do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1143 char *printname)
1144 {
1145 if (after_context > 0 && lastmatchnumber > 0)
1146 {
1147 int count = 0;
1148 while (lastmatchrestart < endptr && count++ < after_context)
1149 {
1150 int ellength;
1151 char *pp = lastmatchrestart;
1152 if (printname != NULL) fprintf(stdout, "%s-", printname);
1153 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1154 pp = end_of_line(pp, endptr, &ellength);
1155 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1156 lastmatchrestart = pp;
1157 }
1158 hyphenpending = TRUE;
1159 }
1160 }
1161
1162
1163
1164 /*************************************************
1165 * Apply patterns to subject till one matches *
1166 *************************************************/
1167
1168 /* This function is called to run through all patterns, looking for a match. It
1169 is used multiple times for the same subject when colouring is enabled, in order
1170 to find all possible matches.
1171
1172 Arguments:
1173 matchptr the start of the subject
1174 length the length of the subject to match
1175 startoffset where to start matching
1176 offsets the offets vector to fill in
1177 mrc address of where to put the result of pcre_exec()
1178
1179 Returns: TRUE if there was a match
1180 FALSE if there was no match
1181 invert if there was a non-fatal error
1182 */
1183
1184 static BOOL
1185 match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
1186 int *mrc)
1187 {
1188 int i;
1189 size_t slen = length;
1190 patstr *p = patterns;
1191 const char *msg = "this text:\n\n";
1192
1193 if (slen > 200)
1194 {
1195 slen = 200;
1196 msg = "text that starts:\n\n";
1197 }
1198 for (i = 1; p != NULL; p = p->next, i++)
1199 {
1200 *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1201 startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
1202 if (*mrc >= 0) return TRUE;
1203 if (*mrc == PCRE_ERROR_NOMATCH) continue;
1204 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1205 if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1206 fprintf(stderr, "%s", msg);
1207 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
1208 fprintf(stderr, "\n\n");
1209 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1210 *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1211 resource_error = TRUE;
1212 if (error_count++ > 20)
1213 {
1214 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1215 pcregrep_exit(2);
1216 }
1217 return invert; /* No more matching; don't show the line again */
1218 }
1219
1220 return FALSE; /* No match, no errors */
1221 }
1222
1223
1224
1225 /*************************************************
1226 * Grep an individual file *
1227 *************************************************/
1228
1229 /* This is called from grep_or_recurse() below. It uses a buffer that is three
1230 times the value of bufthird. The matching point is never allowed to stray into
1231 the top third of the buffer, thus keeping more of the file available for
1232 context printing or for multiline scanning. For large files, the pointer will
1233 be in the middle third most of the time, so the bottom third is available for
1234 "before" context printing.
1235
1236 Arguments:
1237 handle the fopened FILE stream for a normal file
1238 the gzFile pointer when reading is via libz
1239 the BZFILE pointer when reading is via libbz2
1240 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1241 filename the file name or NULL (for errors)
1242 printname the file name if it is to be printed for each match
1243 or NULL if the file name is not to be printed
1244 it cannot be NULL if filenames[_nomatch]_only is set
1245
1246 Returns: 0 if there was at least one match
1247 1 otherwise (no matches)
1248 2 if an overlong line is encountered
1249 3 if there is a read error on a .bz2 file
1250 */
1251
1252 static int
1253 pcregrep(void *handle, int frtype, char *filename, char *printname)
1254 {
1255 int rc = 1;
1256 int linenumber = 1;
1257 int lastmatchnumber = 0;
1258 int count = 0;
1259 int filepos = 0;
1260 int offsets[OFFSET_SIZE];
1261 char *lastmatchrestart = NULL;
1262 char *ptr = main_buffer;
1263 char *endptr;
1264 size_t bufflength;
1265 BOOL binary = FALSE;
1266 BOOL endhyphenpending = FALSE;
1267 BOOL input_line_buffered = line_buffered;
1268 FILE *in = NULL; /* Ensure initialized */
1269
1270 #ifdef SUPPORT_LIBZ
1271 gzFile ingz = NULL;
1272 #endif
1273
1274 #ifdef SUPPORT_LIBBZ2
1275 BZFILE *inbz2 = NULL;
1276 #endif
1277
1278
1279 /* Do the first read into the start of the buffer and set up the pointer to end
1280 of what we have. In the case of libz, a non-zipped .gz file will be read as a
1281 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1282 fail. */
1283
1284 #ifdef SUPPORT_LIBZ
1285 if (frtype == FR_LIBZ)
1286 {
1287 ingz = (gzFile)handle;
1288 bufflength = gzread (ingz, main_buffer, bufsize);
1289 }
1290 else
1291 #endif
1292
1293 #ifdef SUPPORT_LIBBZ2
1294 if (frtype == FR_LIBBZ2)
1295 {
1296 inbz2 = (BZFILE *)handle;
1297 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1298 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1299 } /* without the cast it is unsigned. */
1300 else
1301 #endif
1302
1303 {
1304 in = (FILE *)handle;
1305 if (is_file_tty(in)) input_line_buffered = TRUE;
1306 bufflength = input_line_buffered?
1307 read_one_line(main_buffer, bufsize, in) :
1308 fread(main_buffer, 1, bufsize, in);
1309 }
1310
1311 endptr = main_buffer + bufflength;
1312
1313 /* Unless binary-files=text, see if we have a binary file. This uses the same
1314 rule as GNU grep, namely, a search for a binary zero byte near the start of the
1315 file. */
1316
1317 if (binary_files != BIN_TEXT)
1318 {
1319 binary =
1320 memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1321 if (binary && binary_files == BIN_NOMATCH) return 1;
1322 }
1323
1324 /* Loop while the current pointer is not at the end of the file. For large
1325 files, endptr will be at the end of the buffer when we are in the middle of the
1326 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1327 way, the buffer is shifted left and re-filled. */
1328
1329 while (ptr < endptr)
1330 {
1331 int endlinelength;
1332 int mrc = 0;
1333 int startoffset = 0;
1334 BOOL match;
1335 char *matchptr = ptr;
1336 char *t = ptr;
1337 size_t length, linelength;
1338
1339 /* At this point, ptr is at the start of a line. We need to find the length
1340 of the subject string to pass to pcre_exec(). In multiline mode, it is the
1341 length remainder of the data in the buffer. Otherwise, it is the length of
1342 the next line, excluding the terminating newline. After matching, we always
1343 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1344 option is used for compiling, so that any match is constrained to be in the
1345 first line. */
1346
1347 t = end_of_line(t, endptr, &endlinelength);
1348 linelength = t - ptr - endlinelength;
1349 length = multiline? (size_t)(endptr - ptr) : linelength;
1350
1351 /* Check to see if the line we are looking at extends right to the very end
1352 of the buffer without a line terminator. This means the line is too long to
1353 handle. */
1354
1355 if (endlinelength == 0 && t == main_buffer + bufsize)
1356 {
1357 fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1358 "pcregrep: check the --buffer-size option\n",
1359 linenumber,
1360 (filename == NULL)? "" : " of file ",
1361 (filename == NULL)? "" : filename);
1362 return 2;
1363 }
1364
1365 /* Extra processing for Jeffrey Friedl's debugging. */
1366
1367 #ifdef JFRIEDL_DEBUG
1368 if (jfriedl_XT || jfriedl_XR)
1369 {
1370 #include <sys/time.h>
1371 #include <time.h>
1372 struct timeval start_time, end_time;
1373 struct timezone dummy;
1374 int i;
1375
1376 if (jfriedl_XT)
1377 {
1378 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1379 const char *orig = ptr;
1380 ptr = malloc(newlen + 1);
1381 if (!ptr) {
1382 printf("out of memory");
1383 pcregrep_exit(2);
1384 }
1385 endptr = ptr;
1386 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1387 for (i = 0; i < jfriedl_XT; i++) {
1388 strncpy(endptr, orig, length);
1389 endptr += length;
1390 }
1391 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1392 length = newlen;
1393 }
1394
1395 if (gettimeofday(&start_time, &dummy) != 0)
1396 perror("bad gettimeofday");
1397
1398
1399 for (i = 0; i < jfriedl_XR; i++)
1400 match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1401 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1402
1403 if (gettimeofday(&end_time, &dummy) != 0)
1404 perror("bad gettimeofday");
1405
1406 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1407 -
1408 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1409
1410 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1411 return 0;
1412 }
1413 #endif
1414
1415 /* We come back here after a match when the -o option (only_matching) is set,
1416 in order to find any further matches in the same line. */
1417
1418 ONLY_MATCHING_RESTART:
1419
1420 /* Run through all the patterns until one matches or there is an error other
1421 than NOMATCH. This code is in a subroutine so that it can be re-used for
1422 finding subsequent matches when colouring matched lines. */
1423
1424 match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1425
1426 /* If it's a match or a not-match (as required), do what's wanted. */
1427
1428 if (match != invert)
1429 {
1430 BOOL hyphenprinted = FALSE;
1431
1432 /* We've failed if we want a file that doesn't have any matches. */
1433
1434 if (filenames == FN_NOMATCH_ONLY) return 1;
1435
1436 /* Just count if just counting is wanted. */
1437
1438 if (count_only) count++;
1439
1440 /* When handling a binary file and binary-files==binary, the "binary"
1441 variable will be set true (it's false in all other cases). In this
1442 situation we just want to output the file name. No need to scan further. */
1443
1444 else if (binary)
1445 {
1446 fprintf(stdout, "Binary file %s matches\n", filename);
1447 return 0;
1448 }
1449
1450 /* If all we want is a file name, there is no need to scan any more lines
1451 in the file. */
1452
1453 else if (filenames == FN_MATCH_ONLY)
1454 {
1455 fprintf(stdout, "%s\n", printname);
1456 return 0;
1457 }
1458
1459 /* Likewise, if all we want is a yes/no answer. */
1460
1461 else if (quiet) return 0;
1462
1463 /* The --only-matching option prints just the substring that matched, or a
1464 captured portion of it, as long as this string is not empty, and the
1465 --file-offsets and --line-offsets options output offsets for the matching
1466 substring (they both force --only-matching = 0). None of these options
1467 prints any context. Afterwards, adjust the start and then jump back to look
1468 for further matches in the same line. If we are in invert mode, however,
1469 nothing is printed and we do not restart - this could still be useful
1470 because the return code is set. */
1471
1472 else if (only_matching >= 0)
1473 {
1474 if (!invert)
1475 {
1476 if (printname != NULL) fprintf(stdout, "%s:", printname);
1477 if (number) fprintf(stdout, "%d:", linenumber);
1478 if (line_offsets)
1479 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1480 offsets[1] - offsets[0]);
1481 else if (file_offsets)
1482 fprintf(stdout, "%d,%d\n",
1483 (int)(filepos + matchptr + offsets[0] - ptr),
1484 offsets[1] - offsets[0]);
1485 else if (only_matching < mrc)
1486 {
1487 int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1488 if (plen > 0)
1489 {
1490 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1491 FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1492 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1493 fprintf(stdout, "\n");
1494 }
1495 }
1496 else if (printname != NULL || number) fprintf(stdout, "\n");
1497 match = FALSE;
1498 if (line_buffered) fflush(stdout);
1499 rc = 0; /* Had some success */
1500 startoffset = offsets[1]; /* Restart after the match */
1501 goto ONLY_MATCHING_RESTART;
1502 }
1503 }
1504
1505 /* This is the default case when none of the above options is set. We print
1506 the matching lines(s), possibly preceded and/or followed by other lines of
1507 context. */
1508
1509 else
1510 {
1511 /* See if there is a requirement to print some "after" lines from a
1512 previous match. We never print any overlaps. */
1513
1514 if (after_context > 0 && lastmatchnumber > 0)
1515 {
1516 int ellength;
1517 int linecount = 0;
1518 char *p = lastmatchrestart;
1519
1520 while (p < ptr && linecount < after_context)
1521 {
1522 p = end_of_line(p, ptr, &ellength);
1523 linecount++;
1524 }
1525
1526 /* It is important to advance lastmatchrestart during this printing so
1527 that it interacts correctly with any "before" printing below. Print
1528 each line's data using fwrite() in case there are binary zeroes. */
1529
1530 while (lastmatchrestart < p)
1531 {
1532 char *pp = lastmatchrestart;
1533 if (printname != NULL) fprintf(stdout, "%s-", printname);
1534 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1535 pp = end_of_line(pp, endptr, &ellength);
1536 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1537 lastmatchrestart = pp;
1538 }
1539 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1540 }
1541
1542 /* If there were non-contiguous lines printed above, insert hyphens. */
1543
1544 if (hyphenpending)
1545 {
1546 fprintf(stdout, "--\n");
1547 hyphenpending = FALSE;
1548 hyphenprinted = TRUE;
1549 }
1550
1551 /* See if there is a requirement to print some "before" lines for this
1552 match. Again, don't print overlaps. */
1553
1554 if (before_context > 0)
1555 {
1556 int linecount = 0;
1557 char *p = ptr;
1558
1559 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1560 linecount < before_context)
1561 {
1562 linecount++;
1563 p = previous_line(p, main_buffer);
1564 }
1565
1566 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1567 fprintf(stdout, "--\n");
1568
1569 while (p < ptr)
1570 {
1571 int ellength;
1572 char *pp = p;
1573 if (printname != NULL) fprintf(stdout, "%s-", printname);
1574 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1575 pp = end_of_line(pp, endptr, &ellength);
1576 FWRITE(p, 1, pp - p, stdout);
1577 p = pp;
1578 }
1579 }
1580
1581 /* Now print the matching line(s); ensure we set hyphenpending at the end
1582 of the file if any context lines are being output. */
1583
1584 if (after_context > 0 || before_context > 0)
1585 endhyphenpending = TRUE;
1586
1587 if (printname != NULL) fprintf(stdout, "%s:", printname);
1588 if (number) fprintf(stdout, "%d:", linenumber);
1589
1590 /* In multiline mode, we want to print to the end of the line in which
1591 the end of the matched string is found, so we adjust linelength and the
1592 line number appropriately, but only when there actually was a match
1593 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1594 the match will always be before the first newline sequence. */
1595
1596 if (multiline & !invert)
1597 {
1598 char *endmatch = ptr + offsets[1];
1599 t = ptr;
1600 while (t < endmatch)
1601 {
1602 t = end_of_line(t, endptr, &endlinelength);
1603 if (t < endmatch) linenumber++; else break;
1604 }
1605 linelength = t - ptr - endlinelength;
1606 }
1607
1608 /*** NOTE: Use only fwrite() to output the data line, so that binary
1609 zeroes are treated as just another data character. */
1610
1611 /* This extra option, for Jeffrey Friedl's debugging requirements,
1612 replaces the matched string, or a specific captured string if it exists,
1613 with X. When this happens, colouring is ignored. */
1614
1615 #ifdef JFRIEDL_DEBUG
1616 if (S_arg >= 0 && S_arg < mrc)
1617 {
1618 int first = S_arg * 2;
1619 int last = first + 1;
1620 FWRITE(ptr, 1, offsets[first], stdout);
1621 fprintf(stdout, "X");
1622 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1623 }
1624 else
1625 #endif
1626
1627 /* We have to split the line(s) up if colouring, and search for further
1628 matches, but not of course if the line is a non-match. */
1629
1630 if (do_colour && !invert)
1631 {
1632 int plength;
1633 FWRITE(ptr, 1, offsets[0], stdout);
1634 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1635 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1636 fprintf(stdout, "%c[00m", 0x1b);
1637 for (;;)
1638 {
1639 startoffset = offsets[1];
1640 if (startoffset >= (int)linelength + endlinelength ||
1641 !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1642 break;
1643 FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1644 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1645 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1646 fprintf(stdout, "%c[00m", 0x1b);
1647 }
1648
1649 /* In multiline mode, we may have already printed the complete line
1650 and its line-ending characters (if they matched the pattern), so there
1651 may be no more to print. */
1652
1653 plength = (int)((linelength + endlinelength) - startoffset);
1654 if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1655 }
1656
1657 /* Not colouring; no need to search for further matches */
1658
1659 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1660 }
1661
1662 /* End of doing what has to be done for a match. If --line-buffered was
1663 given, flush the output. */
1664
1665 if (line_buffered) fflush(stdout);
1666 rc = 0; /* Had some success */
1667
1668 /* Remember where the last match happened for after_context. We remember
1669 where we are about to restart, and that line's number. */
1670
1671 lastmatchrestart = ptr + linelength + endlinelength;
1672 lastmatchnumber = linenumber + 1;
1673 }
1674
1675 /* For a match in multiline inverted mode (which of course did not cause
1676 anything to be printed), we have to move on to the end of the match before
1677 proceeding. */
1678
1679 if (multiline && invert && match)
1680 {
1681 int ellength;
1682 char *endmatch = ptr + offsets[1];
1683 t = ptr;
1684 while (t < endmatch)
1685 {
1686 t = end_of_line(t, endptr, &ellength);
1687 if (t <= endmatch) linenumber++; else break;
1688 }
1689 endmatch = end_of_line(endmatch, endptr, &ellength);
1690 linelength = endmatch - ptr - ellength;
1691 }
1692
1693 /* Advance to after the newline and increment the line number. The file
1694 offset to the current line is maintained in filepos. */
1695
1696 ptr += linelength + endlinelength;
1697 filepos += (int)(linelength + endlinelength);
1698 linenumber++;
1699
1700 /* If input is line buffered, and the buffer is not yet full, read another
1701 line and add it into the buffer. */
1702
1703 if (input_line_buffered && bufflength < (size_t)bufsize)
1704 {
1705 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1706 bufflength += add;
1707 endptr += add;
1708 }
1709
1710 /* If we haven't yet reached the end of the file (the buffer is full), and
1711 the current point is in the top 1/3 of the buffer, slide the buffer down by
1712 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1713 about to be lost, print them. */
1714
1715 if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1716 {
1717 if (after_context > 0 &&
1718 lastmatchnumber > 0 &&
1719 lastmatchrestart < main_buffer + bufthird)
1720 {
1721 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1722 lastmatchnumber = 0;
1723 }
1724
1725 /* Now do the shuffle */
1726
1727 memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1728 ptr -= bufthird;
1729
1730 #ifdef SUPPORT_LIBZ
1731 if (frtype == FR_LIBZ)
1732 bufflength = 2*bufthird +
1733 gzread (ingz, main_buffer + 2*bufthird, bufthird);
1734 else
1735 #endif
1736
1737 #ifdef SUPPORT_LIBBZ2
1738 if (frtype == FR_LIBBZ2)
1739 bufflength = 2*bufthird +
1740 BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1741 else
1742 #endif
1743
1744 bufflength = 2*bufthird +
1745 (input_line_buffered?
1746 read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1747 fread(main_buffer + 2*bufthird, 1, bufthird, in));
1748 endptr = main_buffer + bufflength;
1749
1750 /* Adjust any last match point */
1751
1752 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1753 }
1754 } /* Loop through the whole file */
1755
1756 /* End of file; print final "after" lines if wanted; do_after_lines sets
1757 hyphenpending if it prints something. */
1758
1759 if (only_matching < 0 && !count_only)
1760 {
1761 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1762 hyphenpending |= endhyphenpending;
1763 }
1764
1765 /* Print the file name if we are looking for those without matches and there
1766 were none. If we found a match, we won't have got this far. */
1767
1768 if (filenames == FN_NOMATCH_ONLY)
1769 {
1770 fprintf(stdout, "%s\n", printname);
1771 return 0;
1772 }
1773
1774 /* Print the match count if wanted */
1775
1776 if (count_only)
1777 {
1778 if (count > 0 || !omit_zero_count)
1779 {
1780 if (printname != NULL && filenames != FN_NONE)
1781 fprintf(stdout, "%s:", printname);
1782 fprintf(stdout, "%d\n", count);
1783 }
1784 }
1785
1786 return rc;
1787 }
1788
1789
1790
1791 /*************************************************
1792 * Grep a file or recurse into a directory *
1793 *************************************************/
1794
1795 /* Given a path name, if it's a directory, scan all the files if we are
1796 recursing; if it's a file, grep it.
1797
1798 Arguments:
1799 pathname the path to investigate
1800 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1801 only_one_at_top TRUE if the path is the only one at toplevel
1802
1803 Returns: -1 the file/directory was skipped
1804 0 if there was at least one match
1805 1 if there were no matches
1806 2 there was some kind of error
1807
1808 However, file opening failures are suppressed if "silent" is set.
1809 */
1810
1811 static int
1812 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1813 {
1814 int rc = 1;
1815 int frtype;
1816 void *handle;
1817 char *lastcomp;
1818 FILE *in = NULL; /* Ensure initialized */
1819
1820 #ifdef SUPPORT_LIBZ
1821 gzFile ingz = NULL;
1822 #endif
1823
1824 #ifdef SUPPORT_LIBBZ2
1825 BZFILE *inbz2 = NULL;
1826 #endif
1827
1828 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1829 int pathlen;
1830 #endif
1831
1832 /* If the file name is "-" we scan stdin */
1833
1834 if (strcmp(pathname, "-") == 0)
1835 {
1836 return pcregrep(stdin, FR_PLAIN, stdin_name,
1837 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1838 stdin_name : NULL);
1839 }
1840
1841 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
1842 directories, whereas --include and --exclude apply to everything else. The test
1843 is against the final component of the path. */
1844
1845 lastcomp = strrchr(pathname, FILESEP);
1846 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
1847
1848 /* If the file is a directory, skip if not recursing or if explicitly excluded.
1849 Otherwise, scan the directory and recurse for each path within it. The scanning
1850 code is localized so it can be made system-specific. */
1851
1852 if (isdirectory(pathname))
1853 {
1854 if (dee_action == dee_SKIP ||
1855 !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
1856 return -1;
1857
1858 if (dee_action == dee_RECURSE)
1859 {
1860 char buffer[1024];
1861 char *nextfile;
1862 directory_type *dir = opendirectory(pathname);
1863
1864 if (dir == NULL)
1865 {
1866 if (!silent)
1867 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1868 strerror(errno));
1869 return 2;
1870 }
1871
1872 while ((nextfile = readdirectory(dir)) != NULL)
1873 {
1874 int frc;
1875 sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
1876 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1877 if (frc > 1) rc = frc;
1878 else if (frc == 0 && rc == 1) rc = 0;
1879 }
1880
1881 closedirectory(dir);
1882 return rc;
1883 }
1884 }
1885
1886 /* If the file is not a directory and not a regular file, skip it if that's
1887 been requested. Otherwise, check for explicit include/exclude. */
1888
1889 else if ((!isregfile(pathname) && DEE_action == DEE_SKIP) ||
1890 !test_incexc(lastcomp, include_patterns, exclude_patterns))
1891 return -1;
1892
1893 /* Control reaches here if we have a regular file, or if we have a directory
1894 and recursion or skipping was not requested, or if we have anything else and
1895 skipping was not requested. The scan proceeds. If this is the first and only
1896 argument at top level, we don't show the file name, unless we are only showing
1897 the file name, or the filename was forced (-H). */
1898
1899 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1900 pathlen = (int)(strlen(pathname));
1901 #endif
1902
1903 /* Open using zlib if it is supported and the file name ends with .gz. */
1904
1905 #ifdef SUPPORT_LIBZ
1906 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1907 {
1908 ingz = gzopen(pathname, "rb");
1909 if (ingz == NULL)
1910 {
1911 if (!silent)
1912 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1913 strerror(errno));
1914 return 2;
1915 }
1916 handle = (void *)ingz;
1917 frtype = FR_LIBZ;
1918 }
1919 else
1920 #endif
1921
1922 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1923
1924 #ifdef SUPPORT_LIBBZ2
1925 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1926 {
1927 inbz2 = BZ2_bzopen(pathname, "rb");
1928 handle = (void *)inbz2;
1929 frtype = FR_LIBBZ2;
1930 }
1931 else
1932 #endif
1933
1934 /* Otherwise use plain fopen(). The label is so that we can come back here if
1935 an attempt to read a .bz2 file indicates that it really is a plain file. */
1936
1937 #ifdef SUPPORT_LIBBZ2
1938 PLAIN_FILE:
1939 #endif
1940 {
1941 in = fopen(pathname, "rb");
1942 handle = (void *)in;
1943 frtype = FR_PLAIN;
1944 }
1945
1946 /* All the opening methods return errno when they fail. */
1947
1948 if (handle == NULL)
1949 {
1950 if (!silent)
1951 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1952 strerror(errno));
1953 return 2;
1954 }
1955
1956 /* Now grep the file */
1957
1958 rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1959 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1960
1961 /* Close in an appropriate manner. */
1962
1963 #ifdef SUPPORT_LIBZ
1964 if (frtype == FR_LIBZ)
1965 gzclose(ingz);
1966 else
1967 #endif
1968
1969 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1970 read failed. If the error indicates that the file isn't in fact bzipped, try
1971 again as a normal file. */
1972
1973 #ifdef SUPPORT_LIBBZ2
1974 if (frtype == FR_LIBBZ2)
1975 {
1976 if (rc == 3)
1977 {
1978 int errnum;
1979 const char *err = BZ2_bzerror(inbz2, &errnum);
1980 if (errnum == BZ_DATA_ERROR_MAGIC)
1981 {
1982 BZ2_bzclose(inbz2);
1983 goto PLAIN_FILE;
1984 }
1985 else if (!silent)
1986 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1987 pathname, err);
1988 rc = 2; /* The normal "something went wrong" code */
1989 }
1990 BZ2_bzclose(inbz2);
1991 }
1992 else
1993 #endif
1994
1995 /* Normal file close */
1996
1997 fclose(in);
1998
1999 /* Pass back the yield from pcregrep(). */
2000
2001 return rc;
2002 }
2003
2004
2005
2006
2007 /*************************************************
2008 * Usage function *
2009 *************************************************/
2010
2011 static int
2012 usage(int rc)
2013 {
2014 option_item *op;
2015 fprintf(stderr, "Usage: pcregrep [-");
2016 for (op = optionlist; op->one_char != 0; op++)
2017 {
2018 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
2019 }
2020 fprintf(stderr, "] [long options] [pattern] [files]\n");
2021 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
2022 "options.\n");
2023 return rc;
2024 }
2025
2026
2027
2028
2029 /*************************************************
2030 * Help function *
2031 *************************************************/
2032
2033 static void
2034 help(void)
2035 {
2036 option_item *op;
2037
2038 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
2039 printf("Search for PATTERN in each FILE or standard input.\n");
2040 printf("PATTERN must be present if neither -e nor -f is used.\n");
2041 printf("\"-\" can be used as a file name to mean STDIN.\n");
2042
2043 #ifdef SUPPORT_LIBZ
2044 printf("Files whose names end in .gz are read using zlib.\n");
2045 #endif
2046
2047 #ifdef SUPPORT_LIBBZ2
2048 printf("Files whose names end in .bz2 are read using bzlib2.\n");
2049 #endif
2050
2051 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2052 printf("Other files and the standard input are read as plain files.\n\n");
2053 #else
2054 printf("All files are read as plain files, without any interpretation.\n\n");
2055 #endif
2056
2057 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
2058 printf("Options:\n");
2059
2060 for (op = optionlist; op->one_char != 0; op++)
2061 {
2062 int n;
2063 char s[4];
2064
2065 /* Two options were accidentally implemented and documented with underscores
2066 instead of hyphens in their names, something that was not noticed for quite a
2067 few releases. When fixing this, I left the underscored versions in the list
2068 in case people were using them. However, we don't want to display them in the
2069 help data. There are no other options that contain underscores, and we do not
2070 expect ever to implement such options. Therefore, just omit any option that
2071 contains an underscore. */
2072
2073 if (strchr(op->long_name, '_') != NULL) continue;
2074
2075 if (op->one_char > 0 && (op->long_name)[0] == 0)
2076 n = 31 - printf(" -%c", op->one_char);
2077 else
2078 {
2079 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
2080 else strcpy(s, " ");
2081 n = 31 - printf(" %s --%s", s, op->long_name);
2082 }
2083
2084 if (n < 1) n = 1;
2085 printf("%.*s%s\n", n, " ", op->help_text);
2086 }
2087
2088 printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
2089 printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
2090 printf("When reading patterns or file names from a file, trailing white\n");
2091 printf("space is removed and blank lines are ignored.\n");
2092 printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
2093
2094 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
2095 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
2096 }
2097
2098
2099
2100
2101 /*************************************************
2102 * Handle a single-letter, no data option *
2103 *************************************************/
2104
2105 static int
2106 handle_option(int letter, int options)
2107 {
2108 switch(letter)
2109 {
2110 case N_FOFFSETS: file_offsets = TRUE; break;
2111 case N_HELP: help(); pcregrep_exit(0);
2112 case N_LBUFFER: line_buffered = TRUE; break;
2113 case N_LOFFSETS: line_offsets = number = TRUE; break;
2114 case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2115 case 'a': binary_files = BIN_TEXT; break;
2116 case 'c': count_only = TRUE; break;
2117 case 'F': process_options |= PO_FIXED_STRINGS; break;
2118 case 'H': filenames = FN_FORCE; break;
2119 case 'I': binary_files = BIN_NOMATCH; break;
2120 case 'h': filenames = FN_NONE; break;
2121 case 'i': options |= PCRE_CASELESS; break;
2122 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2123 case 'L': filenames = FN_NOMATCH_ONLY; break;
2124 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2125 case 'n': number = TRUE; break;
2126 case 'o': only_matching = 0; break;
2127 case 'q': quiet = TRUE; break;
2128 case 'r': dee_action = dee_RECURSE; break;
2129 case 's': silent = TRUE; break;
2130 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
2131 case 'v': invert = TRUE; break;
2132 case 'w': process_options |= PO_WORD_MATCH; break;
2133 case 'x': process_options |= PO_LINE_MATCH; break;
2134
2135 case 'V':
2136 fprintf(stdout, "pcregrep version %s\n", pcre_version());
2137 pcregrep_exit(0);
2138 break;
2139
2140 default:
2141 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2142 pcregrep_exit(usage(2));
2143 }
2144
2145 return options;
2146 }
2147
2148
2149
2150
2151 /*************************************************
2152 * Construct printed ordinal *
2153 *************************************************/
2154
2155 /* This turns a number into "1st", "3rd", etc. */
2156
2157 static char *
2158 ordin(int n)
2159 {
2160 static char buffer[8];
2161 char *p = buffer;
2162 sprintf(p, "%d", n);
2163 while (*p != 0) p++;
2164 switch (n%10)
2165 {
2166 case 1: strcpy(p, "st"); break;
2167 case 2: strcpy(p, "nd"); break;
2168 case 3: strcpy(p, "rd"); break;
2169 default: strcpy(p, "th"); break;
2170 }
2171 return buffer;
2172 }
2173
2174
2175
2176 /*************************************************
2177 * Compile a single pattern *
2178 *************************************************/
2179
2180 /* Do nothing if the pattern has already been compiled. This is the case for
2181 include/exclude patterns read from a file.
2182
2183 When the -F option has been used, each "pattern" may be a list of strings,
2184 separated by line breaks. They will be matched literally. We split such a
2185 string and compile the first substring, inserting an additional block into the
2186 pattern chain.
2187
2188 Arguments:
2189 p points to the pattern block
2190 options the PCRE options
2191 popts the processing options
2192 fromfile TRUE if the pattern was read from a file
2193 fromtext file name or identifying text (e.g. "include")
2194 count 0 if this is the only command line pattern, or
2195 number of the command line pattern, or
2196 linenumber for a pattern from a file
2197
2198 Returns: TRUE on success, FALSE after an error
2199 */
2200
2201 static BOOL
2202 compile_pattern(patstr *p, int options, int popts, int fromfile,
2203 const char *fromtext, int count)
2204 {
2205 char buffer[PATBUFSIZE];
2206 const char *error;
2207 char *ps = p->string;
2208 int patlen = strlen(ps);
2209 int errptr;
2210
2211 if (p->compiled != NULL) return TRUE;
2212
2213 if ((popts & PO_FIXED_STRINGS) != 0)
2214 {
2215 int ellength;
2216 char *eop = ps + patlen;
2217 char *pe = end_of_line(ps, eop, &ellength);
2218
2219 if (ellength != 0)
2220 {
2221 if (add_pattern(pe, p) == NULL) return FALSE;
2222 patlen = (int)(pe - ps - ellength);
2223 }
2224 }
2225
2226 sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2227 p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2228 if (p->compiled != NULL) return TRUE;
2229
2230 /* Handle compile errors */
2231
2232 errptr -= (int)strlen(prefix[popts]);
2233 if (errptr > patlen) errptr = patlen;
2234
2235 if (fromfile)
2236 {
2237 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2238 "at offset %d: %s\n", count, fromtext, errptr, error);
2239 }
2240 else
2241 {
2242 if (count == 0)
2243 fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2244 fromtext, errptr, error);
2245 else
2246 fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2247 ordin(count), fromtext, errptr, error);
2248 }
2249
2250 return FALSE;
2251 }
2252
2253
2254
2255 /*************************************************
2256 * Read and compile a file of patterns *
2257 *************************************************/
2258
2259 /* This is used for --filelist, --include-from, and --exclude-from.
2260
2261 Arguments:
2262 name the name of the file; "-" is stdin
2263 patptr pointer to the pattern chain anchor
2264 patlastptr pointer to the last pattern pointer
2265 popts the process options to pass to pattern_compile()
2266
2267 Returns: TRUE if all went well
2268 */
2269
2270 static BOOL
2271 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2272 {
2273 int linenumber = 0;
2274 FILE *f;
2275 char *filename;
2276 char buffer[PATBUFSIZE];
2277
2278 if (strcmp(name, "-") == 0)
2279 {
2280 f = stdin;
2281 filename = stdin_name;
2282 }
2283 else
2284 {
2285 f = fopen(name, "r");
2286 if (f == NULL)
2287 {
2288 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2289 return FALSE;
2290 }
2291 filename = name;
2292 }
2293
2294 while (fgets(buffer, PATBUFSIZE, f) != NULL)
2295 {
2296 char *s = buffer + (int)strlen(buffer);
2297 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2298 *s = 0;
2299 linenumber++;
2300 if (buffer[0] == 0) continue; /* Skip blank lines */
2301
2302 /* Note: this call to add_pattern() puts a pointer to the local variable
2303 "buffer" into the pattern chain. However, that pointer is used only when
2304 compiling the pattern, which happens immediately below, so we flatten it
2305 afterwards, as a precaution against any later code trying to use it. */
2306
2307 *patlastptr = add_pattern(buffer, *patlastptr);
2308 if (*patlastptr == NULL) return FALSE;
2309 if (*patptr == NULL) *patptr = *patlastptr;
2310
2311 /* This loop is needed because compiling a "pattern" when -F is set may add
2312 on additional literal patterns if the original contains a newline. In the
2313 common case, it never will, because fgets() stops at a newline. However,
2314 the -N option can be used to give pcregrep a different newline setting. */
2315
2316 for(;;)
2317 {
2318 if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2319 linenumber))
2320 return FALSE;
2321 (*patlastptr)->string = NULL; /* Insurance */
2322 if ((*patlastptr)->next == NULL) break;
2323 *patlastptr = (*patlastptr)->next;
2324 }
2325 }
2326
2327 if (f != stdin) fclose(f);
2328 return TRUE;
2329 }
2330
2331
2332
2333 /*************************************************
2334 * Main program *
2335 *************************************************/
2336
2337 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2338
2339 int
2340 main(int argc, char **argv)
2341 {
2342 int i, j;
2343 int rc = 1;
2344 BOOL only_one_at_top;
2345 patstr *cp;
2346 fnstr *fn;
2347 const char *locale_from = "--locale";
2348 const char *error;
2349
2350 #ifdef SUPPORT_PCREGREP_JIT
2351 pcre_jit_stack *jit_stack = NULL;
2352 #endif
2353
2354 /* Set the default line ending value from the default in the PCRE library;
2355 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2356 Note that the return values from pcre_config(), though derived from the ASCII
2357 codes, are the same in EBCDIC environments, so we must use the actual values
2358 rather than escapes such as as '\r'. */
2359
2360 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2361 switch(i)
2362 {
2363 default: newline = (char *)"lf"; break;
2364 case 13: newline = (char *)"cr"; break;
2365 case (13 << 8) | 10: newline = (char *)"crlf"; break;
2366 case -1: newline = (char *)"any"; break;
2367 case -2: newline = (char *)"anycrlf"; break;
2368 }
2369
2370 /* Process the options */
2371
2372 for (i = 1; i < argc; i++)
2373 {
2374 option_item *op = NULL;
2375 char *option_data = (char *)""; /* default to keep compiler happy */
2376 BOOL longop;
2377 BOOL longopwasequals = FALSE;
2378
2379 if (argv[i][0] != '-') break;
2380
2381 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2382 but only if we have previously had -e or -f to define the patterns. */
2383
2384 if (argv[i][1] == 0)
2385 {
2386 if (pattern_files != NULL || patterns != NULL) break;
2387 else pcregrep_exit(usage(2));
2388 }
2389
2390 /* Handle a long name option, or -- to terminate the options */
2391
2392 if (argv[i][1] == '-')
2393 {
2394 char *arg = argv[i] + 2;
2395 char *argequals = strchr(arg, '=');
2396
2397 if (*arg == 0) /* -- terminates options */
2398 {
2399 i++;
2400 break; /* out of the options-handling loop */
2401 }
2402
2403 longop = TRUE;
2404
2405 /* Some long options have data that follows after =, for example file=name.
2406 Some options have variations in the long name spelling: specifically, we
2407 allow "regexp" because GNU grep allows it, though I personally go along
2408 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2409 These options are entered in the table as "regex(p)". Options can be in
2410 both these categories. */
2411
2412 for (op = optionlist; op->one_char != 0; op++)
2413 {
2414 char *opbra = strchr(op->long_name, '(');
2415 char *equals = strchr(op->long_name, '=');
2416
2417 /* Handle options with only one spelling of the name */
2418
2419 if (opbra == NULL) /* Does not contain '(' */
2420 {
2421 if (equals == NULL) /* Not thing=data case */
2422 {
2423 if (strcmp(arg, op->long_name) == 0) break;
2424 }
2425 else /* Special case xxx=data */
2426 {
2427 int oplen = (int)(equals - op->long_name);
2428 int arglen = (argequals == NULL)?
2429 (int)strlen(arg) : (int)(argequals - arg);
2430 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2431 {
2432 option_data = arg + arglen;
2433 if (*option_data == '=')
2434 {
2435 option_data++;
2436 longopwasequals = TRUE;
2437 }
2438 break;
2439 }
2440 }
2441 }
2442
2443 /* Handle options with an alternate spelling of the name */
2444
2445 else
2446 {
2447 char buff1[24];
2448 char buff2[24];
2449
2450 int baselen = (int)(opbra - op->long_name);
2451 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2452 int arglen = (argequals == NULL || equals == NULL)?
2453 (int)strlen(arg) : (int)(argequals - arg);
2454
2455 sprintf(buff1, "%.*s", baselen, op->long_name);
2456 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2457
2458 if (strncmp(arg, buff1, arglen) == 0 ||
2459 strncmp(arg, buff2, arglen) == 0)
2460 {
2461 if (equals != NULL && argequals != NULL)
2462 {
2463 option_data = argequals;
2464 if (*option_data == '=')
2465 {
2466 option_data++;
2467 longopwasequals = TRUE;
2468 }
2469 }
2470 break;
2471 }
2472 }
2473 }
2474
2475 if (op->one_char == 0)
2476 {
2477 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2478 pcregrep_exit(usage(2));
2479 }
2480 }
2481
2482 /* Jeffrey Friedl's debugging harness uses these additional options which
2483 are not in the right form for putting in the option table because they use
2484 only one hyphen, yet are more than one character long. By putting them
2485 separately here, they will not get displayed as part of the help() output,
2486 but I don't think Jeffrey will care about that. */
2487
2488 #ifdef JFRIEDL_DEBUG
2489 else if (strcmp(argv[i], "-pre") == 0) {
2490 jfriedl_prefix = argv[++i];
2491 continue;
2492 } else if (strcmp(argv[i], "-post") == 0) {
2493 jfriedl_postfix = argv[++i];
2494 continue;
2495 } else if (strcmp(argv[i], "-XT") == 0) {
2496 sscanf(argv[++i], "%d", &jfriedl_XT);
2497 continue;
2498 } else if (strcmp(argv[i], "-XR") == 0) {
2499 sscanf(argv[++i], "%d", &jfriedl_XR);
2500 continue;
2501 }
2502 #endif
2503
2504
2505 /* One-char options; many that have no data may be in a single argument; we
2506 continue till we hit the last one or one that needs data. */
2507
2508 else
2509 {
2510 char *s = argv[i] + 1;
2511 longop = FALSE;
2512 while (*s != 0)
2513 {
2514 for (op = optionlist; op->one_char != 0; op++)
2515 {
2516 if (*s == op->one_char) break;
2517 }
2518 if (op->one_char == 0)
2519 {
2520 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2521 *s, argv[i]);
2522 pcregrep_exit(usage(2));
2523 }
2524
2525 /* Check for a single-character option that has data: OP_OP_NUMBER
2526 is used for one that either has a numerical number or defaults, i.e. the
2527 data is optional. If a digit follows, there is data; if not, carry on
2528 with other single-character options in the same string. */
2529
2530 option_data = s+1;
2531 if (op->type == OP_OP_NUMBER)
2532 {
2533 if (isdigit((unsigned char)s[1])) break;
2534 }
2535 else /* Check for end or a dataless option */
2536 {
2537 if (op->type != OP_NODATA || s[1] == 0) break;
2538 }
2539
2540 /* Handle a single-character option with no data, then loop for the
2541 next character in the string. */
2542
2543 pcre_options = handle_option(*s++, pcre_options);
2544 }
2545 }
2546
2547 /* At this point we should have op pointing to a matched option. If the type
2548 is NO_DATA, it means that there is no data, and the option might set
2549 something in the PCRE options. */
2550
2551 if (op->type == OP_NODATA)
2552 {
2553 pcre_options = handle_option(op->one_char, pcre_options);
2554 continue;
2555 }
2556
2557 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2558 either has a value or defaults to something. It cannot have data in a
2559 separate item. At the moment, the only such options are "colo(u)r",
2560 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2561
2562 if (*option_data == 0 &&
2563 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2564 {
2565 switch (op->one_char)
2566 {
2567 case N_COLOUR:
2568 colour_option = (char *)"auto";
2569 break;
2570
2571 case 'o':
2572 only_matching = 0;
2573 break;
2574
2575 #ifdef JFRIEDL_DEBUG
2576 case 'S':
2577 S_arg = 0;
2578 break;
2579 #endif
2580 }
2581 continue;
2582 }
2583
2584 /* Otherwise, find the data string for the option. */
2585
2586 if (*option_data == 0)
2587 {
2588 if (i >= argc - 1 || longopwasequals)
2589 {
2590 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2591 pcregrep_exit(usage(2));
2592 }
2593 option_data = argv[++i];
2594 }
2595
2596 /* If the option type is OP_PATLIST, it's the -e option, or one of the
2597 include/exclude options, which can be called multiple times to create lists
2598 of patterns. */
2599
2600 if (op->type == OP_PATLIST)
2601 {
2602 patdatastr *pd = (patdatastr *)op->dataptr;
2603 *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2604 if (*(pd->lastptr) == NULL) goto EXIT2;
2605 if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2606 }
2607
2608 /* If the option type is OP_FILELIST, it's one of the options that names a
2609 file. */
2610
2611 else if (op->type == OP_FILELIST)
2612 {
2613 fndatastr *fd = (fndatastr *)op->dataptr;
2614 fn = (fnstr *)malloc(sizeof(fnstr));
2615 if (fn == NULL)
2616 {
2617 fprintf(stderr, "pcregrep: malloc failed\n");
2618 goto EXIT2;
2619 }
2620 fn->next = NULL;
2621 fn->name = option_data;
2622 if (*(fd->anchor) == NULL)
2623 *(fd->anchor) = fn;
2624 else
2625 (*(fd->lastptr))->next = fn;
2626 *(fd->lastptr) = fn;
2627 }
2628
2629 /* Handle OP_BINARY_FILES */
2630
2631 else if (op->type == OP_BINFILES)
2632 {
2633 if (strcmp(option_data, "binary") == 0)
2634 binary_files = BIN_BINARY;
2635 else if (strcmp(option_data, "without-match") == 0)
2636 binary_files = BIN_NOMATCH;
2637 else if (strcmp(option_data, "text") == 0)
2638 binary_files = BIN_TEXT;
2639 else
2640 {
2641 fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2642 option_data);
2643 pcregrep_exit(usage(2));
2644 }
2645 }
2646
2647 /* Otherwise, deal with single string or numeric data values. */
2648
2649 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2650 op->type != OP_OP_NUMBER)
2651 {
2652 *((char **)op->dataptr) = option_data;
2653 }
2654
2655 /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2656 only for unpicking arguments, so just keep it simple. */
2657
2658 else
2659 {
2660 unsigned long int n = 0;
2661 char *endptr = option_data;
2662 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2663 while (isdigit((unsigned char)(*endptr)))
2664 n = n * 10 + (int)(*endptr++ - '0');
2665 if (toupper(*endptr) == 'K')
2666 {
2667 n *= 1024;
2668 endptr++;
2669 }
2670 else if (toupper(*endptr) == 'M')
2671 {
2672 n *= 1024*1024;
2673 endptr++;
2674 }
2675 if (*endptr != 0)
2676 {
2677 if (longop)
2678 {
2679 char *equals = strchr(op->long_name, '=');
2680 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2681 (int)(equals - op->long_name);
2682 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2683 option_data, nlen, op->long_name);
2684 }
2685 else
2686 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2687 option_data, op->one_char);
2688 pcregrep_exit(usage(2));
2689 }
2690 if (op->type == OP_LONGNUMBER)
2691 *((unsigned long int *)op->dataptr) = n;
2692 else
2693 *((int *)op->dataptr) = n;
2694 }
2695 }
2696
2697 /* Options have been decoded. If -C was used, its value is used as a default
2698 for -A and -B. */
2699
2700 if (both_context > 0)
2701 {
2702 if (after_context == 0) after_context = both_context;
2703 if (before_context == 0) before_context = both_context;
2704 }
2705
2706 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2707 However, the latter two set only_matching. */
2708
2709 if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2710 (file_offsets && line_offsets))
2711 {
2712 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2713 "and/or --line-offsets\n");
2714 pcregrep_exit(usage(2));
2715 }
2716
2717 if (file_offsets || line_offsets) only_matching = 0;
2718
2719 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2720 LC_ALL environment variable is set, and if so, use it. */
2721
2722 if (locale == NULL)
2723 {
2724 locale = getenv("LC_ALL");
2725 locale_from = "LCC_ALL";
2726 }
2727
2728 if (locale == NULL)
2729 {
2730 locale = getenv("LC_CTYPE");
2731 locale_from = "LC_CTYPE";
2732 }
2733
2734 /* If a locale has been provided, set it, and generate the tables the PCRE
2735 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2736
2737 if (locale != NULL)
2738 {
2739 if (setlocale(LC_CTYPE, locale) == NULL)
2740 {
2741 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2742 locale, locale_from);
2743 return 2;
2744 }
2745 pcretables = pcre_maketables();
2746 }
2747
2748 /* Sort out colouring */
2749
2750 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2751 {
2752 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2753 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2754 else
2755 {
2756 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2757 colour_option);
2758 return 2;
2759 }
2760 if (do_colour)
2761 {
2762 char *cs = getenv("PCREGREP_COLOUR");
2763 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2764 if (cs != NULL) colour_string = cs;
2765 }
2766 }
2767
2768 /* Interpret the newline type; the default settings are Unix-like. */
2769
2770 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2771 {
2772 pcre_options |= PCRE_NEWLINE_CR;
2773 endlinetype = EL_CR;
2774 }
2775 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2776 {
2777 pcre_options |= PCRE_NEWLINE_LF;
2778 endlinetype = EL_LF;
2779 }
2780 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2781 {
2782 pcre_options |= PCRE_NEWLINE_CRLF;
2783 endlinetype = EL_CRLF;
2784 }
2785 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2786 {
2787 pcre_options |= PCRE_NEWLINE_ANY;
2788 endlinetype = EL_ANY;
2789 }
2790 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2791 {
2792 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2793 endlinetype = EL_ANYCRLF;
2794 }
2795 else
2796 {
2797 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2798 return 2;
2799 }
2800
2801 /* Interpret the text values for -d and -D */
2802
2803 if (dee_option != NULL)
2804 {
2805 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2806 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2807 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2808 else
2809 {
2810 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2811 return 2;
2812 }
2813 }
2814
2815 if (DEE_option != NULL)
2816 {
2817 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2818 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2819 else
2820 {
2821 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2822 return 2;
2823 }
2824 }
2825
2826 /* Check the values for Jeffrey Friedl's debugging options. */
2827
2828 #ifdef JFRIEDL_DEBUG
2829 if (S_arg > 9)
2830 {
2831 fprintf(stderr, "pcregrep: bad value for -S option\n");
2832 return 2;
2833 }
2834 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2835 {
2836 if (jfriedl_XT == 0) jfriedl_XT = 1;
2837 if (jfriedl_XR == 0) jfriedl_XR = 1;
2838 }
2839 #endif
2840
2841 /* Get memory for the main buffer. */
2842
2843 bufsize = 3*bufthird;
2844 main_buffer = (char *)malloc(bufsize);
2845
2846 if (main_buffer == NULL)
2847 {
2848 fprintf(stderr, "pcregrep: malloc failed\n");
2849 goto EXIT2;
2850 }
2851
2852 /* If no patterns were provided by -e, and there are no files provided by -f,
2853 the first argument is the one and only pattern, and it must exist. */
2854
2855 if (patterns == NULL && pattern_files == NULL)
2856 {
2857 if (i >= argc) return usage(2);
2858 patterns = patterns_last = add_pattern(argv[i++], NULL);
2859 if (patterns == NULL) goto EXIT2;
2860 }
2861
2862 /* Compile the patterns that were provided on the command line, either by
2863 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
2864 after all the command-line options are read so that we know which PCRE options
2865 to use. When -F is used, compile_pattern() may add another block into the
2866 chain, so we must not access the next pointer till after the compile. */
2867
2868 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
2869 {
2870 if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
2871 (j == 1 && patterns->next == NULL)? 0 : j))
2872 goto EXIT2;
2873 }
2874
2875 /* Read and compile the regular expressions that are provided in files. */
2876
2877 for (fn = pattern_files; fn != NULL; fn = fn->next)
2878 {
2879 if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
2880 goto EXIT2;
2881 }
2882
2883 /* Study the regular expressions, as we will be running them many times. If an
2884 extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
2885 returned, even if studying produces no data. */
2886
2887 if (match_limit > 0 || match_limit_recursion > 0)
2888 study_options |= PCRE_STUDY_EXTRA_NEEDED;
2889
2890 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
2891
2892 #ifdef SUPPORT_PCREGREP_JIT
2893 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2894 jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
2895 #endif
2896
2897 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
2898 {
2899 cp->hint = pcre_study(cp->compiled, study_options, &error);
2900 if (error != NULL)
2901 {
2902 char s[16];
2903 if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
2904 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2905 goto EXIT2;
2906 }
2907 #ifdef SUPPORT_PCREGREP_JIT
2908 if (jit_stack != NULL && cp->hint != NULL)
2909 pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
2910 #endif
2911 }
2912
2913 /* If --match-limit or --recursion-limit was set, put the value(s) into the
2914 pcre_extra block for each pattern. There will always be an extra block because
2915 of the use of PCRE_STUDY_EXTRA_NEEDED above. */
2916
2917 for (cp = patterns; cp != NULL; cp = cp->next)
2918 {
2919 if (match_limit > 0)
2920 {
2921 cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
2922 cp->hint->match_limit = match_limit;
2923 }
2924
2925 if (match_limit_recursion > 0)
2926 {
2927 cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2928 cp->hint->match_limit_recursion = match_limit_recursion;
2929 }
2930 }
2931
2932 /* If there are include or exclude patterns read from the command line, compile
2933 them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
2934 0. */
2935
2936 for (j = 0; j < 4; j++)
2937 {
2938 int k;
2939 for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
2940 {
2941 if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
2942 (k == 1 && cp->next == NULL)? 0 : k))
2943 goto EXIT2;
2944 }
2945 }
2946
2947 /* Read and compile include/exclude patterns from files. */
2948
2949 for (fn = include_from; fn != NULL; fn = fn->next)
2950 {
2951 if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
2952 goto EXIT2;
2953 }
2954
2955 for (fn = exclude_from; fn != NULL; fn = fn->next)
2956 {
2957 if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
2958 goto EXIT2;
2959 }
2960
2961 /* If there are no files that contain lists of files to search, and there are
2962 no file arguments, search stdin, and then exit. */
2963
2964 if (file_lists == NULL && i >= argc)
2965 {
2966 rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2967 (filenames > FN_DEFAULT)? stdin_name : NULL);
2968 goto EXIT;
2969 }
2970
2971 /* If any files that contains a list of files to search have been specified,
2972 read them line by line and search the given files. */
2973
2974 for (fn = file_lists; fn != NULL; fn = fn->next)
2975 {
2976 char buffer[PATBUFSIZE];
2977 FILE *fl;
2978 if (strcmp(fn->name, "-") == 0) fl = stdin; else
2979 {
2980 fl = fopen(fn->name, "rb");
2981 if (fl == NULL)
2982 {
2983 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
2984 strerror(errno));
2985 goto EXIT2;
2986 }
2987 }
2988 while (fgets(buffer, PATBUFSIZE, fl) != NULL)
2989 {
2990 int frc;
2991 char *end = buffer + (int)strlen(buffer);
2992 while (end > buffer && isspace(end[-1])) end--;
2993 *end = 0;
2994 if (*buffer != 0)
2995 {
2996 frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
2997 if (frc > 1) rc = frc;
2998 else if (frc == 0 && rc == 1) rc = 0;
2999 }
3000 }
3001 if (fl != stdin) fclose(fl);
3002 }
3003
3004 /* After handling file-list, work through remaining arguments. Pass in the fact
3005 that there is only one argument at top level - this suppresses the file name if
3006 the argument is not a directory and filenames are not otherwise forced. */
3007
3008 only_one_at_top = i == argc - 1 && file_lists == NULL;
3009
3010 for (; i < argc; i++)
3011 {
3012 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
3013 only_one_at_top);
3014 if (frc > 1) rc = frc;
3015 else if (frc == 0 && rc == 1) rc = 0;
3016 }
3017
3018 EXIT:
3019 #ifdef SUPPORT_PCREGREP_JIT
3020 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3021 #endif
3022
3023 if (main_buffer != NULL) free(main_buffer);
3024
3025 free_pattern_chain(patterns);
3026 free_pattern_chain(include_patterns);
3027 free_pattern_chain(include_dir_patterns);
3028 free_pattern_chain(exclude_patterns);
3029 free_pattern_chain(exclude_dir_patterns);
3030
3031 free_file_chain(exclude_from);
3032 free_file_chain(include_from);
3033 free_file_chain(pattern_files);
3034 free_file_chain(file_lists);
3035
3036 pcregrep_exit(rc);
3037
3038 EXIT2:
3039 rc = 2;
3040 goto EXIT;
3041 }
3042
3043 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5