/[pcre2]/code/trunk/src/pcre2grep.c
ViewVC logotype

Contents of /code/trunk/src/pcre2grep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 651 - (show annotations)
Wed Jan 11 17:10:28 2017 UTC (4 days, 22 hours ago) by ph10
File MIME type: text/plain
File size: 109475 byte(s)
Ignore all JIT compile errors in pcre2grep.
1 /*************************************************
2 * pcre2grep program *
3 *************************************************/
4
5 /* This is a grep program that uses the 8-bit PCRE regular expression library
6 via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows,
7 and native z/OS systems it can recurse into directories, and in z/OS it can
8 handle PDS files.
9
10 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
11 additional header is required. That header is not included in the main PCRE2
12 distribution because other apparatus is needed to compile pcre2grep for z/OS.
13 The header can be found in the special z/OS distribution, which is available
14 from www.zaconsultants.net or from www.cbttape.org.
15
16 Copyright (c) 1997-2016 University of Cambridge
17
18 -----------------------------------------------------------------------------
19 Redistribution and use in source and binary forms, with or without
20 modification, are permitted provided that the following conditions are met:
21
22 * Redistributions of source code must retain the above copyright notice,
23 this list of conditions and the following disclaimer.
24
25 * Redistributions in binary form must reproduce the above copyright
26 notice, this list of conditions and the following disclaimer in the
27 documentation and/or other materials provided with the distribution.
28
29 * Neither the name of the University of Cambridge nor the names of its
30 contributors may be used to endorse or promote products derived from
31 this software without specific prior written permission.
32
33 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43 POSSIBILITY OF SUCH DAMAGE.
44 -----------------------------------------------------------------------------
45 */
46
47 #ifdef HAVE_CONFIG_H
48 #include "config.h"
49 #endif
50
51 #include <ctype.h>
52 #include <locale.h>
53 #include <stdio.h>
54 #include <string.h>
55 #include <stdlib.h>
56 #include <errno.h>
57
58 #include <sys/types.h>
59 #include <sys/stat.h>
60
61 #if (defined _WIN32 || (defined HAVE_WINDOWS_H && HAVE_WINDOWS_H)) && !defined WIN32
62 #define WIN32
63 #endif
64
65 #ifdef WIN32
66 #include <io.h> /* For _setmode() */
67 #include <fcntl.h> /* For _O_BINARY */
68 #endif
69
70 #ifdef SUPPORT_PCRE2GREP_CALLOUT
71 #ifdef WIN32
72 #include <process.h>
73 #else
74 #include <sys/wait.h>
75 #endif
76 #endif
77
78 #ifdef HAVE_UNISTD_H
79 #include <unistd.h>
80 #endif
81
82 #ifdef SUPPORT_LIBZ
83 #include <zlib.h>
84 #endif
85
86 #ifdef SUPPORT_LIBBZ2
87 #include <bzlib.h>
88 #endif
89
90 #define PCRE2_CODE_UNIT_WIDTH 8
91 #include "pcre2.h"
92
93 #define FALSE 0
94 #define TRUE 1
95
96 typedef int BOOL;
97
98 #define OFFSET_SIZE 33
99
100 #if BUFSIZ > 8192
101 #define MAXPATLEN BUFSIZ
102 #else
103 #define MAXPATLEN 8192
104 #endif
105
106 #define PATBUFSIZE (MAXPATLEN + 10) /* Allows for prefix+suffix */
107
108 /* Values for the "filenames" variable, which specifies options for file name
109 output. The order is important; it is assumed that a file name is wanted for
110 all values greater than FN_DEFAULT. */
111
112 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
113
114 /* File reading styles */
115
116 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
117
118 /* Actions for the -d and -D options */
119
120 enum { dee_READ, dee_SKIP, dee_RECURSE };
121 enum { DEE_READ, DEE_SKIP };
122
123 /* Actions for special processing options (flag bits) */
124
125 #define PO_WORD_MATCH 0x0001
126 #define PO_LINE_MATCH 0x0002
127 #define PO_FIXED_STRINGS 0x0004
128
129 /* Binary file options */
130
131 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
132
133 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
134 environments), a warning is issued if the value of fwrite() is ignored.
135 Unfortunately, casting to (void) does not suppress the warning. To get round
136 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
137 apply to fprintf(). */
138
139 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
140
141 /* Under Windows, we have to set stdout to be binary, so that it does not
142 convert \r\n at the ends of output lines to \r\r\n. However, that means that
143 any messages written to stdout must have \r\n as their line terminator. This is
144 handled by using STDOUT_NL as the newline string. */
145
146 #ifdef WIN32
147 #define STDOUT_NL "\r\n"
148 #else
149 #define STDOUT_NL "\n"
150 #endif
151
152
153
154 /*************************************************
155 * Global variables *
156 *************************************************/
157
158 /* Jeffrey Friedl has some debugging requirements that are not part of the
159 regular code. */
160
161 #ifdef JFRIEDL_DEBUG
162 static int S_arg = -1;
163 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
164 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
165 static const char *jfriedl_prefix = "";
166 static const char *jfriedl_postfix = "";
167 #endif
168
169 static const char *colour_string = "1;31";
170 static const char *colour_option = NULL;
171 static const char *dee_option = NULL;
172 static const char *DEE_option = NULL;
173 static const char *locale = NULL;
174 static const char *newline_arg = NULL;
175 static const char *om_separator = "";
176 static const char *stdin_name = "(standard input)";
177
178 static char *main_buffer = NULL;
179
180 static int after_context = 0;
181 static int before_context = 0;
182 static int binary_files = BIN_BINARY;
183 static int both_context = 0;
184 static int bufthird = PCRE2GREP_BUFSIZE;
185 static int max_bufthird = PCRE2GREP_MAX_BUFSIZE;
186 static int bufsize = 3*PCRE2GREP_BUFSIZE;
187 static int endlinetype;
188 static int total_count = 0;
189 static int counts_printed = 0;
190
191 #ifdef WIN32
192 static int dee_action = dee_SKIP;
193 #else
194 static int dee_action = dee_READ;
195 #endif
196 static int DEE_action = DEE_READ;
197 static int error_count = 0;
198 static int filenames = FN_DEFAULT;
199
200 #ifdef SUPPORT_PCRE2GREP_JIT
201 static BOOL use_jit = TRUE;
202 #else
203 static BOOL use_jit = FALSE;
204 #endif
205
206 static const uint8_t *character_tables = NULL;
207
208 static uint32_t pcre2_options = 0;
209 static uint32_t process_options = 0;
210 static uint32_t match_limit = 0;
211 static uint32_t recursion_limit = 0;
212
213 static pcre2_compile_context *compile_context;
214 static pcre2_match_context *match_context;
215 static pcre2_match_data *match_data;
216 static PCRE2_SIZE *offsets;
217
218 static BOOL count_only = FALSE;
219 static BOOL do_colour = FALSE;
220 #ifdef WIN32
221 static BOOL do_ansi = FALSE;
222 #endif
223 static BOOL file_offsets = FALSE;
224 static BOOL hyphenpending = FALSE;
225 static BOOL invert = FALSE;
226 static BOOL line_buffered = FALSE;
227 static BOOL line_offsets = FALSE;
228 static BOOL multiline = FALSE;
229 static BOOL number = FALSE;
230 static BOOL omit_zero_count = FALSE;
231 static BOOL resource_error = FALSE;
232 static BOOL quiet = FALSE;
233 static BOOL show_only_matching = FALSE;
234 static BOOL show_total_count = FALSE;
235 static BOOL silent = FALSE;
236 static BOOL utf = FALSE;
237
238 /* Structure for list of --only-matching capturing numbers. */
239
240 typedef struct omstr {
241 struct omstr *next;
242 int groupnum;
243 } omstr;
244
245 static omstr *only_matching = NULL;
246 static omstr *only_matching_last = NULL;
247
248 /* Structure for holding the two variables that describe a number chain. */
249
250 typedef struct omdatastr {
251 omstr **anchor;
252 omstr **lastptr;
253 } omdatastr;
254
255 static omdatastr only_matching_data = { &only_matching, &only_matching_last };
256
257 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
258
259 typedef struct fnstr {
260 struct fnstr *next;
261 char *name;
262 } fnstr;
263
264 static fnstr *exclude_from = NULL;
265 static fnstr *exclude_from_last = NULL;
266 static fnstr *include_from = NULL;
267 static fnstr *include_from_last = NULL;
268
269 static fnstr *file_lists = NULL;
270 static fnstr *file_lists_last = NULL;
271 static fnstr *pattern_files = NULL;
272 static fnstr *pattern_files_last = NULL;
273
274 /* Structure for holding the two variables that describe a file name chain. */
275
276 typedef struct fndatastr {
277 fnstr **anchor;
278 fnstr **lastptr;
279 } fndatastr;
280
281 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
282 static fndatastr include_from_data = { &include_from, &include_from_last };
283 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
284 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
285
286 /* Structure for pattern and its compiled form; used for matching patterns and
287 also for include/exclude patterns. */
288
289 typedef struct patstr {
290 struct patstr *next;
291 char *string;
292 pcre2_code *compiled;
293 } patstr;
294
295 static patstr *patterns = NULL;
296 static patstr *patterns_last = NULL;
297 static patstr *include_patterns = NULL;
298 static patstr *include_patterns_last = NULL;
299 static patstr *exclude_patterns = NULL;
300 static patstr *exclude_patterns_last = NULL;
301 static patstr *include_dir_patterns = NULL;
302 static patstr *include_dir_patterns_last = NULL;
303 static patstr *exclude_dir_patterns = NULL;
304 static patstr *exclude_dir_patterns_last = NULL;
305
306 /* Structure holding the two variables that describe a pattern chain. A pointer
307 to such structures is used for each appropriate option. */
308
309 typedef struct patdatastr {
310 patstr **anchor;
311 patstr **lastptr;
312 } patdatastr;
313
314 static patdatastr match_patdata = { &patterns, &patterns_last };
315 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
316 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
317 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
318 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
319
320 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
321 &include_dir_patterns, &exclude_dir_patterns };
322
323 static const char *incexname[4] = { "--include", "--exclude",
324 "--include-dir", "--exclude-dir" };
325
326 /* Structure for options and list of them */
327
328 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER,
329 OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
330
331 typedef struct option_item {
332 int type;
333 int one_char;
334 void *dataptr;
335 const char *long_name;
336 const char *help_text;
337 } option_item;
338
339 /* Options without a single-letter equivalent get a negative value. This can be
340 used to identify them. */
341
342 #define N_COLOUR (-1)
343 #define N_EXCLUDE (-2)
344 #define N_EXCLUDE_DIR (-3)
345 #define N_HELP (-4)
346 #define N_INCLUDE (-5)
347 #define N_INCLUDE_DIR (-6)
348 #define N_LABEL (-7)
349 #define N_LOCALE (-8)
350 #define N_NULL (-9)
351 #define N_LOFFSETS (-10)
352 #define N_FOFFSETS (-11)
353 #define N_LBUFFER (-12)
354 #define N_M_LIMIT (-13)
355 #define N_M_LIMIT_REC (-14)
356 #define N_BUFSIZE (-15)
357 #define N_NOJIT (-16)
358 #define N_FILE_LIST (-17)
359 #define N_BINARY_FILES (-18)
360 #define N_EXCLUDE_FROM (-19)
361 #define N_INCLUDE_FROM (-20)
362 #define N_OM_SEPARATOR (-21)
363 #define N_MAX_BUFSIZE (-22)
364
365 static option_item optionlist[] = {
366 { OP_NODATA, N_NULL, NULL, "", "terminate options" },
367 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
368 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
369 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
370 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
371 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
372 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer starting size" },
373 { OP_NUMBER, N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number", "set processing buffer maximum size" },
374 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
375 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
376 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
377 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
378 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
379 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
380 { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
381 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
382 { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
383 { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
384 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
385 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
386 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
387 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
388 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
389 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
390 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
391 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
392 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
393 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
394 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
395 { OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
396 { OP_U32NUMBER, N_M_LIMIT_REC, &recursion_limit, "recursion-limit=number", "set PCRE match recursion limit option" },
397 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
398 { OP_STRING, 'N', &newline_arg, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
399 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
400 #ifdef SUPPORT_PCRE2GREP_JIT
401 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
402 #else
403 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" },
404 #endif
405 { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
406 { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
407 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
408 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
409 { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
410 { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
411 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
412 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
413 { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
414 { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
415 #ifdef JFRIEDL_DEBUG
416 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
417 #endif
418 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
419 { OP_NODATA, 't', NULL, "total-count", "print total count of matching lines" },
420 { OP_NODATA, 'u', NULL, "utf", "use UTF mode" },
421 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
422 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
423 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
424 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
425 { OP_NODATA, 0, NULL, NULL, NULL }
426 };
427
428 /* Table of names for newline types. Must be kept in step with the definitions
429 of PCRE2_NEWLINE_xx in pcre2.h. */
430
431 static const char *newlines[] = {
432 "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF" };
433
434 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
435 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
436 that the combination of -w and -x has the same effect as -x on its own, so we
437 can treat them as the same. Note that the MAXPATLEN macro assumes the longest
438 prefix+suffix is 10 characters; if anything longer is added, it must be
439 adjusted. */
440
441 static const char *prefix[] = {
442 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
443
444 static const char *suffix[] = {
445 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
446
447 /* UTF-8 tables - used only when the newline setting is "any". */
448
449 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
450
451 const char utf8_table4[] = {
452 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
453 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
454 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
455 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
456
457
458
459 /*************************************************
460 * Case-independent string compare *
461 *************************************************/
462
463 static int
464 strcmpic(const char *str1, const char *str2)
465 {
466 unsigned int c1, c2;
467 while (*str1 != '\0' || *str2 != '\0')
468 {
469 c1 = tolower(*str1++);
470 c2 = tolower(*str2++);
471 if (c1 != c2) return ((c1 > c2) << 1) - 1;
472 }
473 return 0;
474 }
475
476
477 /*************************************************
478 * Parse GREP_COLORS *
479 *************************************************/
480
481 /* Extract ms or mt from GREP_COLORS.
482
483 Argument: the string, possibly NULL
484 Returns: the value of ms or mt, or NULL if neither present
485 */
486
487 static char *
488 parse_grep_colors(const char *gc)
489 {
490 static char seq[16];
491 char *col;
492 uint32_t len;
493 if (gc == NULL) return NULL;
494 col = strstr(gc, "ms=");
495 if (col == NULL) col = strstr(gc, "mt=");
496 if (col == NULL) return NULL;
497 len = 0;
498 col += 3;
499 while (*col != ':' && *col != 0 && len < sizeof(seq)-1)
500 seq[len++] = *col++;
501 seq[len] = 0;
502 return seq;
503 }
504
505
506 /*************************************************
507 * Exit from the program *
508 *************************************************/
509
510 /* If there has been a resource error, give a suitable message.
511
512 Argument: the return code
513 Returns: does not return
514 */
515
516 static void
517 pcre2grep_exit(int rc)
518 {
519 if (resource_error)
520 {
521 fprintf(stderr, "pcre2grep: Error %d, %d or %d means that a resource limit "
522 "was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
523 PCRE2_ERROR_RECURSIONLIMIT);
524 fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
525 }
526 exit(rc);
527 }
528
529
530 /*************************************************
531 * Add item to chain of patterns *
532 *************************************************/
533
534 /* Used to add an item onto a chain, or just return an unconnected item if the
535 "after" argument is NULL.
536
537 Arguments:
538 s pattern string to add
539 after if not NULL points to item to insert after
540
541 Returns: new pattern block or NULL on error
542 */
543
544 static patstr *
545 add_pattern(char *s, patstr *after)
546 {
547 patstr *p = (patstr *)malloc(sizeof(patstr));
548 if (p == NULL)
549 {
550 fprintf(stderr, "pcre2grep: malloc failed\n");
551 pcre2grep_exit(2);
552 }
553 if (strlen(s) > MAXPATLEN)
554 {
555 fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
556 MAXPATLEN);
557 free(p);
558 return NULL;
559 }
560 p->next = NULL;
561 p->string = s;
562 p->compiled = NULL;
563
564 if (after != NULL)
565 {
566 p->next = after->next;
567 after->next = p;
568 }
569 return p;
570 }
571
572
573 /*************************************************
574 * Free chain of patterns *
575 *************************************************/
576
577 /* Used for several chains of patterns.
578
579 Argument: pointer to start of chain
580 Returns: nothing
581 */
582
583 static void
584 free_pattern_chain(patstr *pc)
585 {
586 while (pc != NULL)
587 {
588 patstr *p = pc;
589 pc = p->next;
590 if (p->compiled != NULL) pcre2_code_free(p->compiled);
591 free(p);
592 }
593 }
594
595
596 /*************************************************
597 * Free chain of file names *
598 *************************************************/
599
600 /*
601 Argument: pointer to start of chain
602 Returns: nothing
603 */
604
605 static void
606 free_file_chain(fnstr *fn)
607 {
608 while (fn != NULL)
609 {
610 fnstr *f = fn;
611 fn = f->next;
612 free(f);
613 }
614 }
615
616
617 /*************************************************
618 * OS-specific functions *
619 *************************************************/
620
621 /* These functions are defined so that they can be made system specific.
622 At present there are versions for Unix-style environments, Windows, native
623 z/OS, and "no support". */
624
625
626 /************* Directory scanning Unix-style and z/OS ***********/
627
628 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
629 #include <sys/types.h>
630 #include <sys/stat.h>
631 #include <dirent.h>
632
633 #if defined NATIVE_ZOS
634 /************* Directory and PDS/E scanning for z/OS ***********/
635 /************* z/OS looks mostly like Unix with USS ************/
636 /* However, z/OS needs the #include statements in this header */
637 #include "pcrzosfs.h"
638 /* That header is not included in the main PCRE distribution because
639 other apparatus is needed to compile pcre2grep for z/OS. The header
640 can be found in the special z/OS distribution, which is available
641 from www.zaconsultants.net or from www.cbttape.org. */
642 #endif
643
644 typedef DIR directory_type;
645 #define FILESEP '/'
646
647 static int
648 isdirectory(char *filename)
649 {
650 struct stat statbuf;
651 if (stat(filename, &statbuf) < 0)
652 return 0; /* In the expectation that opening as a file will fail */
653 return S_ISDIR(statbuf.st_mode);
654 }
655
656 static directory_type *
657 opendirectory(char *filename)
658 {
659 return opendir(filename);
660 }
661
662 static char *
663 readdirectory(directory_type *dir)
664 {
665 for (;;)
666 {
667 struct dirent *dent = readdir(dir);
668 if (dent == NULL) return NULL;
669 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
670 return dent->d_name;
671 }
672 /* Control never reaches here */
673 }
674
675 static void
676 closedirectory(directory_type *dir)
677 {
678 closedir(dir);
679 }
680
681
682 /************* Test for regular file, Unix-style **********/
683
684 static int
685 isregfile(char *filename)
686 {
687 struct stat statbuf;
688 if (stat(filename, &statbuf) < 0)
689 return 1; /* In the expectation that opening as a file will fail */
690 return S_ISREG(statbuf.st_mode);
691 }
692
693
694 #if defined NATIVE_ZOS
695 /************* Test for a terminal in z/OS **********/
696 /* isatty() does not work in a TSO environment, so always give FALSE.*/
697
698 static BOOL
699 is_stdout_tty(void)
700 {
701 return FALSE;
702 }
703
704 static BOOL
705 is_file_tty(FILE *f)
706 {
707 return FALSE;
708 }
709
710
711 /************* Test for a terminal, Unix-style **********/
712
713 #else
714 static BOOL
715 is_stdout_tty(void)
716 {
717 return isatty(fileno(stdout));
718 }
719
720 static BOOL
721 is_file_tty(FILE *f)
722 {
723 return isatty(fileno(f));
724 }
725 #endif
726
727
728 /************* Print optionally coloured match Unix-style and z/OS **********/
729
730 static void
731 print_match(const char* buf, int length)
732 {
733 if (length == 0) return;
734 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
735 FWRITE(buf, 1, length, stdout);
736 if (do_colour) fprintf(stdout, "%c[0m", 0x1b);
737 }
738
739 /* End of Unix-style or native z/OS environment functions. */
740
741
742 /************* Directory scanning in Windows ***********/
743
744 /* I (Philip Hazel) have no means of testing this code. It was contributed by
745 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
746 when it did not exist. David Byron added a patch that moved the #include of
747 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after. */
748
749 #elif defined WIN32
750
751 #ifndef STRICT
752 # define STRICT
753 #endif
754 #ifndef WIN32_LEAN_AND_MEAN
755 # define WIN32_LEAN_AND_MEAN
756 #endif
757
758 #include <windows.h>
759
760 #ifndef INVALID_FILE_ATTRIBUTES
761 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
762 #endif
763
764 /* Allow opendirectory to provide globbing, since Microsoft started doing it
765 wrong (expanding quoted arguments). */
766
767 #define iswild(name) (strpbrk(name, "*?") != NULL)
768
769 typedef struct directory_type
770 {
771 HANDLE handle;
772 BOOL first;
773 WIN32_FIND_DATA data;
774 } directory_type;
775
776 #define FILESEP '/'
777
778 int
779 isdirectory(char *filename)
780 {
781 DWORD attr = GetFileAttributes(filename);
782 if (attr == INVALID_FILE_ATTRIBUTES)
783 return 0;
784 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
785 }
786
787 directory_type *
788 opendirectory(char *filename)
789 {
790 size_t len;
791 char *pattern;
792 directory_type *dir;
793 DWORD err;
794 len = strlen(filename);
795 pattern = (char *)malloc(len + 3);
796 dir = (directory_type *)malloc(sizeof(*dir));
797 if ((pattern == NULL) || (dir == NULL))
798 {
799 fprintf(stderr, "pcre2grep: malloc failed\n");
800 pcre2grep_exit(2);
801 }
802 memcpy(pattern, filename, len);
803 if (iswild(filename))
804 pattern[len] = 0;
805 else
806 memcpy(&(pattern[len]), "\\*", 3);
807 dir->handle = FindFirstFile(pattern, &(dir->data));
808 if (dir->handle != INVALID_HANDLE_VALUE)
809 {
810 free(pattern);
811 dir->first = TRUE;
812 return dir;
813 }
814 err = GetLastError();
815 free(pattern);
816 free(dir);
817 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
818 return NULL;
819 }
820
821 char *
822 readdirectory(directory_type *dir)
823 {
824 for (;;)
825 {
826 if (!dir->first)
827 {
828 if (!FindNextFile(dir->handle, &(dir->data)))
829 return NULL;
830 }
831 else
832 {
833 dir->first = FALSE;
834 }
835 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
836 return dir->data.cFileName;
837 }
838 #ifndef _MSC_VER
839 return NULL; /* Keep compiler happy; never executed */
840 #endif
841 }
842
843 void
844 closedirectory(directory_type *dir)
845 {
846 FindClose(dir->handle);
847 free(dir);
848 }
849
850
851 /************* Test for regular file in Windows **********/
852
853 /* I don't know how to do this, or if it can be done; assume all paths are
854 regular if they are not directories. */
855
856 int isregfile(char *filename)
857 {
858 return !isdirectory(filename);
859 }
860
861
862 /************* Test for a terminal in Windows **********/
863
864 static BOOL
865 is_stdout_tty(void)
866 {
867 return _isatty(_fileno(stdout));
868 }
869
870 static BOOL
871 is_file_tty(FILE *f)
872 {
873 return _isatty(_fileno(f));
874 }
875
876
877 /************* Print optionally coloured match in Windows **********/
878
879 static HANDLE hstdout;
880 static CONSOLE_SCREEN_BUFFER_INFO csbi;
881 static WORD match_colour;
882
883 static void
884 print_match(const char* buf, int length)
885 {
886 if (length == 0) return;
887 if (do_colour)
888 {
889 if (do_ansi) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
890 else SetConsoleTextAttribute(hstdout, match_colour);
891 }
892 FWRITE(buf, 1, length, stdout);
893 if (do_colour)
894 {
895 if (do_ansi) fprintf(stdout, "%c[0m", 0x1b);
896 else SetConsoleTextAttribute(hstdout, csbi.wAttributes);
897 }
898 }
899
900 /* Convert ANSI BGR format to RGB used by Windows */
901 #define BGR_RGB(x) ((x & 1 ? 4 : 0) | (x & 2) | (x & 4 ? 1 : 0))
902
903 static WORD
904 decode_ANSI_colour(const char *cs)
905 {
906 WORD result = csbi.wAttributes;
907 while (*cs)
908 {
909 if (isdigit(*cs))
910 {
911 int code = atoi(cs);
912 if (code == 1) result |= 0x08;
913 else if (code == 4) result |= 0x8000;
914 else if (code == 5) result |= 0x80;
915 else if (code >= 30 && code <= 37) result = (result & 0xF8) | BGR_RGB(code - 30);
916 else if (code == 39) result = (result & 0xF0) | (csbi.wAttributes & 0x0F);
917 else if (code >= 40 && code <= 47) result = (result & 0x8F) | (BGR_RGB(code - 40) << 4);
918 else if (code == 49) result = (result & 0x0F) | (csbi.wAttributes & 0xF0);
919 /* aixterm high intensity colour codes */
920 else if (code >= 90 && code <= 97) result = (result & 0xF0) | BGR_RGB(code - 90) | 0x08;
921 else if (code >= 100 && code <= 107) result = (result & 0x0F) | (BGR_RGB(code - 100) << 4) | 0x80;
922
923 while (isdigit(*cs)) cs++;
924 }
925
926 if (*cs) cs++;
927 }
928
929 return result;
930 }
931
932 static void
933 init_colour_output()
934 {
935 if (do_colour)
936 {
937 hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
938 /* This fails when redirected to con; try again if so. */
939 if (!GetConsoleScreenBufferInfo(hstdout, &csbi) && !do_ansi)
940 {
941 HANDLE hcon = CreateFile("CONOUT$", GENERIC_READ | GENERIC_WRITE,
942 FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL);
943 GetConsoleScreenBufferInfo(hcon, &csbi);
944 CloseHandle(hcon);
945 }
946 match_colour = decode_ANSI_colour(colour_string);
947 /* No valid colour found - turn off colouring */
948 if (!match_colour) do_colour = FALSE;
949 }
950 }
951
952 /* End of Windows functions */
953
954
955 /************* Directory scanning when we can't do it ***********/
956
957 /* The type is void, and apart from isdirectory(), the functions do nothing. */
958
959 #else
960
961 #define FILESEP 0
962 typedef void directory_type;
963
964 int isdirectory(char *filename) { return 0; }
965 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
966 char *readdirectory(directory_type *dir) { return (char*)0;}
967 void closedirectory(directory_type *dir) {}
968
969
970 /************* Test for regular file when we can't do it **********/
971
972 /* Assume all files are regular. */
973
974 int isregfile(char *filename) { return 1; }
975
976
977 /************* Test for a terminal when we can't do it **********/
978
979 static BOOL
980 is_stdout_tty(void)
981 {
982 return FALSE;
983 }
984
985 static BOOL
986 is_file_tty(FILE *f)
987 {
988 return FALSE;
989 }
990
991
992 /************* Print optionally coloured match when we can't do it **********/
993
994 static void
995 print_match(const char* buf, int length)
996 {
997 if (length == 0) return;
998 FWRITE(buf, 1, length, stdout);
999 }
1000
1001 #endif /* End of system-specific functions */
1002
1003
1004
1005 #ifndef HAVE_STRERROR
1006 /*************************************************
1007 * Provide strerror() for non-ANSI libraries *
1008 *************************************************/
1009
1010 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1011 in their libraries, but can provide the same facility by this simple
1012 alternative function. */
1013
1014 extern int sys_nerr;
1015 extern char *sys_errlist[];
1016
1017 char *
1018 strerror(int n)
1019 {
1020 if (n < 0 || n >= sys_nerr) return "unknown error number";
1021 return sys_errlist[n];
1022 }
1023 #endif /* HAVE_STRERROR */
1024
1025
1026
1027 /*************************************************
1028 * Usage function *
1029 *************************************************/
1030
1031 static int
1032 usage(int rc)
1033 {
1034 option_item *op;
1035 fprintf(stderr, "Usage: pcre2grep [-");
1036 for (op = optionlist; op->one_char != 0; op++)
1037 {
1038 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1039 }
1040 fprintf(stderr, "] [long options] [pattern] [files]\n");
1041 fprintf(stderr, "Type `pcre2grep --help' for more information and the long "
1042 "options.\n");
1043 return rc;
1044 }
1045
1046
1047
1048 /*************************************************
1049 * Help function *
1050 *************************************************/
1051
1052 static void
1053 help(void)
1054 {
1055 option_item *op;
1056
1057 printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL);
1058 printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
1059 printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
1060
1061 #ifdef SUPPORT_PCRE2GREP_CALLOUT
1062 printf("Callout scripts in patterns are supported." STDOUT_NL);
1063 #else
1064 printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
1065 #endif
1066
1067 printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL);
1068
1069 #ifdef SUPPORT_LIBZ
1070 printf("Files whose names end in .gz are read using zlib." STDOUT_NL);
1071 #endif
1072
1073 #ifdef SUPPORT_LIBBZ2
1074 printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL);
1075 #endif
1076
1077 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1078 printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL);
1079 #else
1080 printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL);
1081 #endif
1082
1083 printf("Example: pcre2grep -i 'hello.*world' menu.h main.c" STDOUT_NL STDOUT_NL);
1084 printf("Options:" STDOUT_NL);
1085
1086 for (op = optionlist; op->one_char != 0; op++)
1087 {
1088 int n;
1089 char s[4];
1090
1091 if (op->one_char > 0 && (op->long_name)[0] == 0)
1092 n = 31 - printf(" -%c", op->one_char);
1093 else
1094 {
1095 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
1096 else strcpy(s, " ");
1097 n = 31 - printf(" %s --%s", s, op->long_name);
1098 }
1099
1100 if (n < 1) n = 1;
1101 printf("%.*s%s" STDOUT_NL, n, " ", op->help_text);
1102 }
1103
1104 printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --max-buffer-size=100K." STDOUT_NL);
1105 printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
1106 printf("The default value for --max-buffer-size is %d." STDOUT_NL, PCRE2GREP_MAX_BUFSIZE);
1107 printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
1108 printf("space is removed and blank lines are ignored." STDOUT_NL);
1109 printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
1110
1111 printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL);
1112 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL);
1113 }
1114
1115
1116
1117 /*************************************************
1118 * Test exclude/includes *
1119 *************************************************/
1120
1121 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
1122 there are no includes, the path must match an include pattern.
1123
1124 Arguments:
1125 path the path to be matched
1126 ip the chain of include patterns
1127 ep the chain of exclude patterns
1128
1129 Returns: TRUE if the path is not excluded
1130 */
1131
1132 static BOOL
1133 test_incexc(char *path, patstr *ip, patstr *ep)
1134 {
1135 int plen = strlen((const char *)path);
1136
1137 for (; ep != NULL; ep = ep->next)
1138 {
1139 if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1140 return FALSE;
1141 }
1142
1143 if (ip == NULL) return TRUE;
1144
1145 for (; ip != NULL; ip = ip->next)
1146 {
1147 if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1148 return TRUE;
1149 }
1150
1151 return FALSE;
1152 }
1153
1154
1155
1156 /*************************************************
1157 * Decode integer argument value *
1158 *************************************************/
1159
1160 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
1161 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
1162 just keep it simple.
1163
1164 Arguments:
1165 option_data the option data string
1166 op the option item (for error messages)
1167 longop TRUE if option given in long form
1168
1169 Returns: a long integer
1170 */
1171
1172 static long int
1173 decode_number(char *option_data, option_item *op, BOOL longop)
1174 {
1175 unsigned long int n = 0;
1176 char *endptr = option_data;
1177 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
1178 while (isdigit((unsigned char)(*endptr)))
1179 n = n * 10 + (int)(*endptr++ - '0');
1180 if (toupper(*endptr) == 'K')
1181 {
1182 n *= 1024;
1183 endptr++;
1184 }
1185 else if (toupper(*endptr) == 'M')
1186 {
1187 n *= 1024*1024;
1188 endptr++;
1189 }
1190
1191 if (*endptr != 0) /* Error */
1192 {
1193 if (longop)
1194 {
1195 char *equals = strchr(op->long_name, '=');
1196 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1197 (int)(equals - op->long_name);
1198 fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n",
1199 option_data, nlen, op->long_name);
1200 }
1201 else
1202 fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n",
1203 option_data, op->one_char);
1204 pcre2grep_exit(usage(2));
1205 }
1206
1207 return n;
1208 }
1209
1210
1211
1212 /*************************************************
1213 * Add item to a chain of numbers *
1214 *************************************************/
1215
1216 /* Used to add an item onto a chain, or just return an unconnected item if the
1217 "after" argument is NULL.
1218
1219 Arguments:
1220 n the number to add
1221 after if not NULL points to item to insert after
1222
1223 Returns: new number block
1224 */
1225
1226 static omstr *
1227 add_number(int n, omstr *after)
1228 {
1229 omstr *om = (omstr *)malloc(sizeof(omstr));
1230
1231 if (om == NULL)
1232 {
1233 fprintf(stderr, "pcre2grep: malloc failed\n");
1234 pcre2grep_exit(2);
1235 }
1236 om->next = NULL;
1237 om->groupnum = n;
1238
1239 if (after != NULL)
1240 {
1241 om->next = after->next;
1242 after->next = om;
1243 }
1244 return om;
1245 }
1246
1247
1248
1249 /*************************************************
1250 * Read one line of input *
1251 *************************************************/
1252
1253 /* Normally, input is read using fread() (or gzread, or BZ2_read) into a large
1254 buffer, so many lines may be read at once. However, doing this for tty input
1255 means that no output appears until a lot of input has been typed. Instead, tty
1256 input is handled line by line. We cannot use fgets() for this, because it does
1257 not stop at a binary zero, and therefore there is no way of telling how many
1258 characters it has read, because there may be binary zeros embedded in the data.
1259
1260 Arguments:
1261 buffer the buffer to read into
1262 length the maximum number of characters to read
1263 f the file
1264
1265 Returns: the number of characters read, zero at end of file
1266 */
1267
1268 static unsigned int
1269 read_one_line(char *buffer, int length, FILE *f)
1270 {
1271 int c;
1272 int yield = 0;
1273 while ((c = fgetc(f)) != EOF)
1274 {
1275 buffer[yield++] = c;
1276 if (c == '\n' || yield >= length) break;
1277 }
1278 return yield;
1279 }
1280
1281
1282
1283 /*************************************************
1284 * Find end of line *
1285 *************************************************/
1286
1287 /* The length of the endline sequence that is found is set via lenptr. This may
1288 be zero at the very end of the file if there is no line-ending sequence there.
1289
1290 Arguments:
1291 p current position in line
1292 endptr end of available data
1293 lenptr where to put the length of the eol sequence
1294
1295 Returns: pointer after the last byte of the line,
1296 including the newline byte(s)
1297 */
1298
1299 static char *
1300 end_of_line(char *p, char *endptr, int *lenptr)
1301 {
1302 switch(endlinetype)
1303 {
1304 default: /* Just in case */
1305 case PCRE2_NEWLINE_LF:
1306 while (p < endptr && *p != '\n') p++;
1307 if (p < endptr)
1308 {
1309 *lenptr = 1;
1310 return p + 1;
1311 }
1312 *lenptr = 0;
1313 return endptr;
1314
1315 case PCRE2_NEWLINE_CR:
1316 while (p < endptr && *p != '\r') p++;
1317 if (p < endptr)
1318 {
1319 *lenptr = 1;
1320 return p + 1;
1321 }
1322 *lenptr = 0;
1323 return endptr;
1324
1325 case PCRE2_NEWLINE_CRLF:
1326 for (;;)
1327 {
1328 while (p < endptr && *p != '\r') p++;
1329 if (++p >= endptr)
1330 {
1331 *lenptr = 0;
1332 return endptr;
1333 }
1334 if (*p == '\n')
1335 {
1336 *lenptr = 2;
1337 return p + 1;
1338 }
1339 }
1340 break;
1341
1342 case PCRE2_NEWLINE_ANYCRLF:
1343 while (p < endptr)
1344 {
1345 int extra = 0;
1346 int c = *((unsigned char *)p);
1347
1348 if (utf && c >= 0xc0)
1349 {
1350 int gcii, gcss;
1351 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1352 gcss = 6*extra;
1353 c = (c & utf8_table3[extra]) << gcss;
1354 for (gcii = 1; gcii <= extra; gcii++)
1355 {
1356 gcss -= 6;
1357 c |= (p[gcii] & 0x3f) << gcss;
1358 }
1359 }
1360
1361 p += 1 + extra;
1362
1363 switch (c)
1364 {
1365 case '\n':
1366 *lenptr = 1;
1367 return p;
1368
1369 case '\r':
1370 if (p < endptr && *p == '\n')
1371 {
1372 *lenptr = 2;
1373 p++;
1374 }
1375 else *lenptr = 1;
1376 return p;
1377
1378 default:
1379 break;
1380 }
1381 } /* End of loop for ANYCRLF case */
1382
1383 *lenptr = 0; /* Must have hit the end */
1384 return endptr;
1385
1386 case PCRE2_NEWLINE_ANY:
1387 while (p < endptr)
1388 {
1389 int extra = 0;
1390 int c = *((unsigned char *)p);
1391
1392 if (utf && c >= 0xc0)
1393 {
1394 int gcii, gcss;
1395 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1396 gcss = 6*extra;
1397 c = (c & utf8_table3[extra]) << gcss;
1398 for (gcii = 1; gcii <= extra; gcii++)
1399 {
1400 gcss -= 6;
1401 c |= (p[gcii] & 0x3f) << gcss;
1402 }
1403 }
1404
1405 p += 1 + extra;
1406
1407 switch (c)
1408 {
1409 case '\n': /* LF */
1410 case '\v': /* VT */
1411 case '\f': /* FF */
1412 *lenptr = 1;
1413 return p;
1414
1415 case '\r': /* CR */
1416 if (p < endptr && *p == '\n')
1417 {
1418 *lenptr = 2;
1419 p++;
1420 }
1421 else *lenptr = 1;
1422 return p;
1423
1424 #ifndef EBCDIC
1425 case 0x85: /* Unicode NEL */
1426 *lenptr = utf? 2 : 1;
1427 return p;
1428
1429 case 0x2028: /* Unicode LS */
1430 case 0x2029: /* Unicode PS */
1431 *lenptr = 3;
1432 return p;
1433 #endif /* Not EBCDIC */
1434
1435 default:
1436 break;
1437 }
1438 } /* End of loop for ANY case */
1439
1440 *lenptr = 0; /* Must have hit the end */
1441 return endptr;
1442 } /* End of overall switch */
1443 }
1444
1445
1446
1447 /*************************************************
1448 * Find start of previous line *
1449 *************************************************/
1450
1451 /* This is called when looking back for before lines to print.
1452
1453 Arguments:
1454 p start of the subsequent line
1455 startptr start of available data
1456
1457 Returns: pointer to the start of the previous line
1458 */
1459
1460 static char *
1461 previous_line(char *p, char *startptr)
1462 {
1463 switch(endlinetype)
1464 {
1465 default: /* Just in case */
1466 case PCRE2_NEWLINE_LF:
1467 p--;
1468 while (p > startptr && p[-1] != '\n') p--;
1469 return p;
1470
1471 case PCRE2_NEWLINE_CR:
1472 p--;
1473 while (p > startptr && p[-1] != '\n') p--;
1474 return p;
1475
1476 case PCRE2_NEWLINE_CRLF:
1477 for (;;)
1478 {
1479 p -= 2;
1480 while (p > startptr && p[-1] != '\n') p--;
1481 if (p <= startptr + 1 || p[-2] == '\r') return p;
1482 }
1483 /* Control can never get here */
1484
1485 case PCRE2_NEWLINE_ANY:
1486 case PCRE2_NEWLINE_ANYCRLF:
1487 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1488 if (utf) while ((*p & 0xc0) == 0x80) p--;
1489
1490 while (p > startptr)
1491 {
1492 unsigned int c;
1493 char *pp = p - 1;
1494
1495 if (utf)
1496 {
1497 int extra = 0;
1498 while ((*pp & 0xc0) == 0x80) pp--;
1499 c = *((unsigned char *)pp);
1500 if (c >= 0xc0)
1501 {
1502 int gcii, gcss;
1503 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1504 gcss = 6*extra;
1505 c = (c & utf8_table3[extra]) << gcss;
1506 for (gcii = 1; gcii <= extra; gcii++)
1507 {
1508 gcss -= 6;
1509 c |= (pp[gcii] & 0x3f) << gcss;
1510 }
1511 }
1512 }
1513 else c = *((unsigned char *)pp);
1514
1515 if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c)
1516 {
1517 case '\n': /* LF */
1518 case '\r': /* CR */
1519 return p;
1520
1521 default:
1522 break;
1523 }
1524
1525 else switch (c)
1526 {
1527 case '\n': /* LF */
1528 case '\v': /* VT */
1529 case '\f': /* FF */
1530 case '\r': /* CR */
1531 #ifndef EBCDIE
1532 case 0x85: /* Unicode NEL */
1533 case 0x2028: /* Unicode LS */
1534 case 0x2029: /* Unicode PS */
1535 #endif /* Not EBCDIC */
1536 return p;
1537
1538 default:
1539 break;
1540 }
1541
1542 p = pp; /* Back one character */
1543 } /* End of loop for ANY case */
1544
1545 return startptr; /* Hit start of data */
1546 } /* End of overall switch */
1547 }
1548
1549
1550
1551
1552
1553 /*************************************************
1554 * Print the previous "after" lines *
1555 *************************************************/
1556
1557 /* This is called if we are about to lose said lines because of buffer filling,
1558 and at the end of the file. The data in the line is written using fwrite() so
1559 that a binary zero does not terminate it.
1560
1561 Arguments:
1562 lastmatchnumber the number of the last matching line, plus one
1563 lastmatchrestart where we restarted after the last match
1564 endptr end of available data
1565 printname filename for printing
1566
1567 Returns: nothing
1568 */
1569
1570 static void
1571 do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1572 const char *printname)
1573 {
1574 if (after_context > 0 && lastmatchnumber > 0)
1575 {
1576 int count = 0;
1577 while (lastmatchrestart < endptr && count < after_context)
1578 {
1579 int ellength;
1580 char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
1581 if (ellength == 0 && pp == main_buffer + bufsize) break;
1582 if (printname != NULL) fprintf(stdout, "%s-", printname);
1583 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1584 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1585 lastmatchrestart = pp;
1586 count++;
1587 }
1588 if (count > 0) hyphenpending = TRUE;
1589 }
1590 }
1591
1592
1593
1594 /*************************************************
1595 * Apply patterns to subject till one matches *
1596 *************************************************/
1597
1598 /* This function is called to run through all patterns, looking for a match. It
1599 is used multiple times for the same subject when colouring is enabled, in order
1600 to find all possible matches.
1601
1602 Arguments:
1603 matchptr the start of the subject
1604 length the length of the subject to match
1605 options options for pcre_exec
1606 startoffset where to start matching
1607 mrc address of where to put the result of pcre2_match()
1608
1609 Returns: TRUE if there was a match
1610 FALSE if there was no match
1611 invert if there was a non-fatal error
1612 */
1613
1614 static BOOL
1615 match_patterns(char *matchptr, size_t length, unsigned int options,
1616 size_t startoffset, int *mrc)
1617 {
1618 int i;
1619 size_t slen = length;
1620 patstr *p = patterns;
1621 const char *msg = "this text:\n\n";
1622
1623 if (slen > 200)
1624 {
1625 slen = 200;
1626 msg = "text that starts:\n\n";
1627 }
1628 for (i = 1; p != NULL; p = p->next, i++)
1629 {
1630 *mrc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length,
1631 startoffset, options, match_data, match_context);
1632 if (*mrc >= 0) return TRUE;
1633 if (*mrc == PCRE2_ERROR_NOMATCH) continue;
1634 fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", *mrc);
1635 if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1636 fprintf(stderr, "%s", msg);
1637 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
1638 fprintf(stderr, "\n\n");
1639 if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_RECURSIONLIMIT ||
1640 *mrc == PCRE2_ERROR_JIT_STACKLIMIT)
1641 resource_error = TRUE;
1642 if (error_count++ > 20)
1643 {
1644 fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n");
1645 pcre2grep_exit(2);
1646 }
1647 return invert; /* No more matching; don't show the line again */
1648 }
1649
1650 return FALSE; /* No match, no errors */
1651 }
1652
1653
1654 #ifdef SUPPORT_PCRE2GREP_CALLOUT
1655
1656 /*************************************************
1657 * Parse and execute callout scripts *
1658 *************************************************/
1659
1660 /* This function parses a callout string block and executes the
1661 program specified by the string. The string is a list of substrings
1662 separated by pipe characters. The first substring represents the
1663 executable name, and the following substrings specify the arguments:
1664
1665 program_name|param1|param2|...
1666
1667 Any substirng (including the program name) can contain escape sequences
1668 started by the dollar character. The escape sequences are substituted as
1669 follows:
1670
1671 $<digits> or ${<digits>} is replaced by the captured substring of the given
1672 decimal number, which must be greater than zero. If the number is greater
1673 than the number of capturing substrings, or if the capture is unset, the
1674 replacement is empty.
1675
1676 Any other character is substituted by itself. E.g: $$ is replaced by a single
1677 dollar or $| replaced by a pipe character.
1678
1679 Example:
1680
1681 echo -e "abcde\n12345" | pcre2grep \
1682 '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
1683
1684 Output:
1685
1686 Arg1: [a] [bcd] [d] Arg2: |a| ()
1687 abcde
1688 Arg1: [1] [234] [4] Arg2: |1| ()
1689 12345
1690
1691 Arguments:
1692 blockptr the callout block
1693
1694 Returns: currently it always returns with 0
1695 */
1696
1697 static int
1698 pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused)
1699 {
1700 PCRE2_SIZE length = calloutptr->callout_string_length;
1701 PCRE2_SPTR string = calloutptr->callout_string;
1702 PCRE2_SPTR subject = calloutptr->subject;
1703 PCRE2_SIZE *ovector = calloutptr->offset_vector;
1704 PCRE2_SIZE capture_top = calloutptr->capture_top;
1705 PCRE2_SIZE argsvectorlen = 2;
1706 PCRE2_SIZE argslen = 1;
1707 char *args;
1708 char *argsptr;
1709 char **argsvector;
1710 char **argsvectorptr;
1711 #ifndef WIN32
1712 pid_t pid;
1713 #endif
1714 int result = 0;
1715
1716 (void)unused; /* Avoid compiler warning */
1717
1718 /* Only callout with strings are supported. */
1719 if (string == NULL || length == 0) return 0;
1720
1721 /* Checking syntax and compute the number of string fragments. Callout strings
1722 are ignored in case of a syntax error. */
1723
1724 while (length > 0)
1725 {
1726 if (*string == '|')
1727 {
1728 argsvectorlen++;
1729
1730 /* Maximum 10000 arguments allowed. */
1731 if (argsvectorlen > 10000) return 0;
1732 }
1733 else if (*string == '$')
1734 {
1735 PCRE2_SIZE capture_id = 0;
1736
1737 string++;
1738 length--;
1739
1740 /* Syntax error: a character must be present after $. */
1741 if (length == 0) return 0;
1742
1743 if (*string >= '1' && *string <= '9')
1744 {
1745 do
1746 {
1747 /* Maximum capture id is 65535. */
1748 if (capture_id <= 65535)
1749 capture_id = capture_id * 10 + (*string - '0');
1750
1751 string++;
1752 length--;
1753 }
1754 while (length > 0 && *string >= '0' && *string <= '9');
1755
1756 /* To negate the effect of string++ below. */
1757 string--;
1758 length++;
1759 }
1760 else if (*string == '{')
1761 {
1762 /* Must be a decimal number in braces, e.g: {5} or {38} */
1763 string++;
1764 length--;
1765
1766 /* Syntax error: a decimal number required. */
1767 if (length == 0) return 0;
1768 if (*string < '1' || *string > '9') return 0;
1769
1770 do
1771 {
1772 /* Maximum capture id is 65535. */
1773 if (capture_id <= 65535)
1774 capture_id = capture_id * 10 + (*string - '0');
1775
1776 string++;
1777 length--;
1778
1779 /* Syntax error: no more characters */
1780 if (length == 0) return 0;
1781 }
1782 while (*string >= '0' && *string <= '9');
1783
1784 /* Syntax error: closing brace is missing. */
1785 if (*string != '}') return 0;
1786 }
1787
1788 if (capture_id > 0)
1789 {
1790 if (capture_id < capture_top)
1791 {
1792 capture_id *= 2;
1793 argslen += ovector[capture_id + 1] - ovector[capture_id];
1794 }
1795
1796 /* To negate the effect of argslen++ below. */
1797 argslen--;
1798 }
1799 }
1800
1801 string++;
1802 length--;
1803 argslen++;
1804 }
1805
1806 args = (char*)malloc(argslen);
1807 if (args == NULL) return 0;
1808
1809 argsvector = (char**)malloc(argsvectorlen * sizeof(char*));
1810 if (argsvector == NULL)
1811 {
1812 free(args);
1813 return 0;
1814 }
1815
1816 argsptr = args;
1817 argsvectorptr = argsvector;
1818
1819 *argsvectorptr++ = argsptr;
1820
1821 length = calloutptr->callout_string_length;
1822 string = calloutptr->callout_string;
1823
1824 while (length > 0)
1825 {
1826 if (*string == '|')
1827 {
1828 *argsptr++ = '\0';
1829 *argsvectorptr++ = argsptr;
1830 }
1831 else if (*string == '$')
1832 {
1833 string++;
1834 length--;
1835
1836 if ((*string >= '1' && *string <= '9') || *string == '{')
1837 {
1838 PCRE2_SIZE capture_id = 0;
1839
1840 if (*string != '{')
1841 {
1842 do
1843 {
1844 /* Maximum capture id is 65535. */
1845 if (capture_id <= 65535)
1846 capture_id = capture_id * 10 + (*string - '0');
1847
1848 string++;
1849 length--;
1850 }
1851 while (length > 0 && *string >= '0' && *string <= '9');
1852
1853 /* To negate the effect of string++ below. */
1854 string--;
1855 length++;
1856 }
1857 else
1858 {
1859 string++;
1860 length--;
1861
1862 do
1863 {
1864 /* Maximum capture id is 65535. */
1865 if (capture_id <= 65535)
1866 capture_id = capture_id * 10 + (*string - '0');
1867
1868 string++;
1869 length--;
1870 }
1871 while (*string != '}');
1872 }
1873
1874 if (capture_id < capture_top)
1875 {
1876 PCRE2_SIZE capturesize;
1877 capture_id *= 2;
1878
1879 capturesize = ovector[capture_id + 1] - ovector[capture_id];
1880 memcpy(argsptr, subject + ovector[capture_id], capturesize);
1881 argsptr += capturesize;
1882 }
1883 }
1884 else
1885 {
1886 *argsptr++ = *string;
1887 }
1888 }
1889 else
1890 {
1891 *argsptr++ = *string;
1892 }
1893
1894 string++;
1895 length--;
1896 }
1897
1898 *argsptr++ = '\0';
1899 *argsvectorptr = NULL;
1900
1901 #ifdef WIN32
1902 result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector);
1903 #else
1904 pid = fork();
1905
1906 if (pid == 0)
1907 {
1908 (void)execv(argsvector[0], argsvector);
1909 /* Control gets here if there is an error, e.g. a non-existent program */
1910 exit(1);
1911 }
1912 else if (pid > 0)
1913 (void)waitpid(pid, &result, 0);
1914 #endif
1915
1916 free(args);
1917 free(argsvector);
1918
1919 /* Currently negative return values are not supported, only zero (match
1920 continues) or non-zero (match fails). */
1921
1922 return result != 0;
1923 }
1924
1925 #endif
1926
1927
1928
1929 /*************************************************
1930 * Read a portion of the file into buffer *
1931 *************************************************/
1932
1933 static int
1934 fill_buffer(void *handle, int frtype, char *buffer, int length,
1935 BOOL input_line_buffered)
1936 {
1937 #ifdef SUPPORT_LIBZ
1938 if (frtype == FR_LIBZ)
1939 return gzread((gzFile)handle, buffer, length);
1940 else
1941 #endif
1942
1943 #ifdef SUPPORT_LIBBZ2
1944 if (frtype == FR_LIBBZ2)
1945 return BZ2_bzread((BZFILE *)handle, buffer, length);
1946 else
1947 #endif
1948
1949 return (input_line_buffered ?
1950 read_one_line(buffer, length, (FILE *)handle) :
1951 fread(buffer, 1, length, (FILE *)handle));
1952 }
1953
1954
1955
1956 /*************************************************
1957 * Grep an individual file *
1958 *************************************************/
1959
1960 /* This is called from grep_or_recurse() below. It uses a buffer that is three
1961 times the value of bufthird. The matching point is never allowed to stray into
1962 the top third of the buffer, thus keeping more of the file available for
1963 context printing or for multiline scanning. For large files, the pointer will
1964 be in the middle third most of the time, so the bottom third is available for
1965 "before" context printing.
1966
1967 Arguments:
1968 handle the fopened FILE stream for a normal file
1969 the gzFile pointer when reading is via libz
1970 the BZFILE pointer when reading is via libbz2
1971 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1972 filename the file name or NULL (for errors)
1973 printname the file name if it is to be printed for each match
1974 or NULL if the file name is not to be printed
1975 it cannot be NULL if filenames[_nomatch]_only is set
1976
1977 Returns: 0 if there was at least one match
1978 1 otherwise (no matches)
1979 2 if an overlong line is encountered
1980 3 if there is a read error on a .bz2 file
1981 */
1982
1983 static int
1984 pcre2grep(void *handle, int frtype, const char *filename, const char *printname)
1985 {
1986 int rc = 1;
1987 int linenumber = 1;
1988 int lastmatchnumber = 0;
1989 int count = 0;
1990 int filepos = 0;
1991 char *lastmatchrestart = NULL;
1992 char *ptr = main_buffer;
1993 char *endptr;
1994 size_t bufflength;
1995 BOOL binary = FALSE;
1996 BOOL endhyphenpending = FALSE;
1997 BOOL input_line_buffered = line_buffered;
1998 FILE *in = NULL; /* Ensure initialized */
1999
2000 /* Do the first read into the start of the buffer and set up the pointer to end
2001 of what we have. In the case of libz, a non-zipped .gz file will be read as a
2002 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
2003 fail. */
2004
2005 if (frtype != FR_LIBZ && frtype != FR_LIBBZ2)
2006 {
2007 in = (FILE *)handle;
2008 if (is_file_tty(in)) input_line_buffered = TRUE;
2009 }
2010
2011 bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
2012 input_line_buffered);
2013
2014 #ifdef SUPPORT_LIBBZ2
2015 if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
2016 #endif
2017
2018 endptr = main_buffer + bufflength;
2019
2020 /* Unless binary-files=text, see if we have a binary file. This uses the same
2021 rule as GNU grep, namely, a search for a binary zero byte near the start of the
2022 file. */
2023
2024 if (binary_files != BIN_TEXT)
2025 {
2026 binary =
2027 memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
2028 if (binary && binary_files == BIN_NOMATCH) return 1;
2029 }
2030
2031 /* Loop while the current pointer is not at the end of the file. For large
2032 files, endptr will be at the end of the buffer when we are in the middle of the
2033 file, but ptr will never get there, because as soon as it gets over 2/3 of the
2034 way, the buffer is shifted left and re-filled. */
2035
2036 while (ptr < endptr)
2037 {
2038 int endlinelength;
2039 int mrc = 0;
2040 unsigned int options = 0;
2041 BOOL match;
2042 char *matchptr = ptr;
2043 char *t = ptr;
2044 size_t length, linelength;
2045 size_t startoffset = 0;
2046
2047 /* At this point, ptr is at the start of a line. We need to find the length
2048 of the subject string to pass to pcre2_match(). In multiline mode, it is the
2049 length remainder of the data in the buffer. Otherwise, it is the length of
2050 the next line, excluding the terminating newline. After matching, we always
2051 advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
2052 option is used for compiling, so that any match is constrained to be in the
2053 first line. */
2054
2055 t = end_of_line(t, endptr, &endlinelength);
2056 linelength = t - ptr - endlinelength;
2057 length = multiline? (size_t)(endptr - ptr) : linelength;
2058
2059 /* Check to see if the line we are looking at extends right to the very end
2060 of the buffer without a line terminator. This means the line is too long to
2061 handle at the current buffer size. Until the buffer reaches its maximum size,
2062 try doubling it and reading more data. */
2063
2064 if (endlinelength == 0 && t == main_buffer + bufsize)
2065 {
2066 if (bufthird < max_bufthird)
2067 {
2068 char *new_buffer;
2069 int new_bufthird = 2*bufthird;
2070
2071 if (new_bufthird > max_bufthird) new_bufthird = max_bufthird;
2072 new_buffer = (char *)malloc(3*new_bufthird);
2073
2074 if (new_buffer == NULL)
2075 {
2076 fprintf(stderr,
2077 "pcre2grep: line %d%s%s is too long for the internal buffer\n"
2078 "pcre2grep: not enough memory to increase the buffer size to %d\n",
2079 linenumber,
2080 (filename == NULL)? "" : " of file ",
2081 (filename == NULL)? "" : filename,
2082 new_bufthird);
2083 return 2;
2084 }
2085
2086 /* Copy the data and adjust pointers to the new buffer location. */
2087
2088 memcpy(new_buffer, main_buffer, bufsize);
2089 bufthird = new_bufthird;
2090 bufsize = 3*bufthird;
2091 ptr = new_buffer + (ptr - main_buffer);
2092 lastmatchrestart = new_buffer + (lastmatchrestart - main_buffer);
2093 free(main_buffer);
2094 main_buffer = new_buffer;
2095
2096 /* Read more data into the buffer and then try to find the line ending
2097 again. */
2098
2099 bufflength += fill_buffer(handle, frtype, main_buffer + bufflength,
2100 bufsize - bufflength, input_line_buffered);
2101 endptr = main_buffer + bufflength;
2102 continue;
2103 }
2104 else
2105 {
2106 fprintf(stderr,
2107 "pcre2grep: line %d%s%s is too long for the internal buffer\n"
2108 "pcre2grep: the maximum buffer size is %d\n"
2109 "pcre2grep: use the --max-buffer-size option to change it\n",
2110 linenumber,
2111 (filename == NULL)? "" : " of file ",
2112 (filename == NULL)? "" : filename,
2113 bufthird);
2114 return 2;
2115 }
2116 }
2117
2118 /* Extra processing for Jeffrey Friedl's debugging. */
2119
2120 #ifdef JFRIEDL_DEBUG
2121 if (jfriedl_XT || jfriedl_XR)
2122 {
2123 # include <sys/time.h>
2124 # include <time.h>
2125 struct timeval start_time, end_time;
2126 struct timezone dummy;
2127 int i;
2128
2129 if (jfriedl_XT)
2130 {
2131 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
2132 const char *orig = ptr;
2133 ptr = malloc(newlen + 1);
2134 if (!ptr) {
2135 printf("out of memory");
2136 pcre2grep_exit(2);
2137 }
2138 endptr = ptr;
2139 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
2140 for (i = 0; i < jfriedl_XT; i++) {
2141 strncpy(endptr, orig, length);
2142 endptr += length;
2143 }
2144 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
2145 length = newlen;
2146 }
2147
2148 if (gettimeofday(&start_time, &dummy) != 0)
2149 perror("bad gettimeofday");
2150
2151
2152 for (i = 0; i < jfriedl_XR; i++)
2153 match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
2154 PCRE2_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
2155
2156 if (gettimeofday(&end_time, &dummy) != 0)
2157 perror("bad gettimeofday");
2158
2159 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
2160 -
2161 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
2162
2163 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
2164 return 0;
2165 }
2166 #endif
2167
2168 /* We come back here after a match when show_only_matching is set, in order
2169 to find any further matches in the same line. This applies to
2170 --only-matching, --file-offsets, and --line-offsets. */
2171
2172 ONLY_MATCHING_RESTART:
2173
2174 /* Run through all the patterns until one matches or there is an error other
2175 than NOMATCH. This code is in a subroutine so that it can be re-used for
2176 finding subsequent matches when colouring matched lines. After finding one
2177 match, set PCRE2_NOTEMPTY to disable any further matches of null strings in
2178 this line. */
2179
2180 match = match_patterns(matchptr, length, options, startoffset, &mrc);
2181 options = PCRE2_NOTEMPTY;
2182
2183 /* If it's a match or a not-match (as required), do what's wanted. */
2184
2185 if (match != invert)
2186 {
2187 BOOL hyphenprinted = FALSE;
2188
2189 /* We've failed if we want a file that doesn't have any matches. */
2190
2191 if (filenames == FN_NOMATCH_ONLY) return 1;
2192
2193 /* If all we want is a yes/no answer, we can return immediately. */
2194
2195 if (quiet) return 0;
2196
2197 /* Just count if just counting is wanted. */
2198
2199 else if (count_only || show_total_count) count++;
2200
2201 /* When handling a binary file and binary-files==binary, the "binary"
2202 variable will be set true (it's false in all other cases). In this
2203 situation we just want to output the file name. No need to scan further. */
2204
2205 else if (binary)
2206 {
2207 fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename);
2208 return 0;
2209 }
2210
2211 /* Likewise, if all we want is a file name, there is no need to scan any
2212 more lines in the file. */
2213
2214 else if (filenames == FN_MATCH_ONLY)
2215 {
2216 fprintf(stdout, "%s" STDOUT_NL, printname);
2217 return 0;
2218 }
2219
2220 /* The --only-matching option prints just the substring that matched,
2221 and/or one or more captured portions of it, as long as these strings are
2222 not empty. The --file-offsets and --line-offsets options output offsets for
2223 the matching substring (all three set show_only_matching). None of these
2224 mutually exclusive options prints any context. Afterwards, adjust the start
2225 and then jump back to look for further matches in the same line. If we are
2226 in invert mode, however, nothing is printed and we do not restart - this
2227 could still be useful because the return code is set. */
2228
2229 else if (show_only_matching)
2230 {
2231 if (!invert)
2232 {
2233 size_t oldstartoffset;
2234
2235 if (printname != NULL) fprintf(stdout, "%s:", printname);
2236 if (number) fprintf(stdout, "%d:", linenumber);
2237
2238 /* Handle --line-offsets */
2239
2240 if (line_offsets)
2241 fprintf(stdout, "%d,%d" STDOUT_NL, (int)(matchptr + offsets[0] - ptr),
2242 (int)(offsets[1] - offsets[0]));
2243
2244 /* Handle --file-offsets */
2245
2246 else if (file_offsets)
2247 fprintf(stdout, "%d,%d" STDOUT_NL,
2248 (int)(filepos + matchptr + offsets[0] - ptr),
2249 (int)(offsets[1] - offsets[0]));
2250
2251 /* Handle --only-matching, which may occur many times */
2252
2253 else
2254 {
2255 BOOL printed = FALSE;
2256 omstr *om;
2257
2258 for (om = only_matching; om != NULL; om = om->next)
2259 {
2260 int n = om->groupnum;
2261 if (n < mrc)
2262 {
2263 int plen = offsets[2*n + 1] - offsets[2*n];
2264 if (plen > 0)
2265 {
2266 if (printed) fprintf(stdout, "%s", om_separator);
2267 print_match(matchptr + offsets[n*2], plen);
2268 printed = TRUE;
2269 }
2270 }
2271 }
2272
2273 if (printed || printname != NULL || number)
2274 fprintf(stdout, STDOUT_NL);
2275 }
2276
2277 /* Prepare to repeat to find the next match in the line. */
2278
2279 match = FALSE;
2280 if (line_buffered) fflush(stdout);
2281 rc = 0; /* Had some success */
2282
2283 /* If the current match ended past the end of the line (only possible
2284 in multiline mode), we are done with this line. */
2285
2286 if (offsets[1] > linelength) goto END_ONE_MATCH;
2287
2288 /* If the pattern contained a lookbehind that included \K, it is
2289 possible that the end of the match might be at or before the actual
2290 starting offset we have just used. In this case, start one character
2291 further on. */
2292
2293 startoffset = offsets[1]; /* Restart after the match */
2294 oldstartoffset = pcre2_get_startchar(match_data);
2295 if (startoffset <= oldstartoffset)
2296 {
2297 if (startoffset >= length) goto END_ONE_MATCH; /* Were at end */
2298 startoffset = oldstartoffset + 1;
2299 if (utf)
2300 while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++;
2301 }
2302 goto ONLY_MATCHING_RESTART;
2303 }
2304 }
2305
2306 /* This is the default case when none of the above options is set. We print
2307 the matching lines(s), possibly preceded and/or followed by other lines of
2308 context. */
2309
2310 else
2311 {
2312 /* See if there is a requirement to print some "after" lines from a
2313 previous match. We never print any overlaps. */
2314
2315 if (after_context > 0 && lastmatchnumber > 0)
2316 {
2317 int ellength;
2318 int linecount = 0;
2319 char *p = lastmatchrestart;
2320
2321 while (p < ptr && linecount < after_context)
2322 {
2323 p = end_of_line(p, ptr, &ellength);
2324 linecount++;
2325 }
2326
2327 /* It is important to advance lastmatchrestart during this printing so
2328 that it interacts correctly with any "before" printing below. Print
2329 each line's data using fwrite() in case there are binary zeroes. */
2330
2331 while (lastmatchrestart < p)
2332 {
2333 char *pp = lastmatchrestart;
2334 if (printname != NULL) fprintf(stdout, "%s-", printname);
2335 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
2336 pp = end_of_line(pp, endptr, &ellength);
2337 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
2338 lastmatchrestart = pp;
2339 }
2340 if (lastmatchrestart != ptr) hyphenpending = TRUE;
2341 }
2342
2343 /* If there were non-contiguous lines printed above, insert hyphens. */
2344
2345 if (hyphenpending)
2346 {
2347 fprintf(stdout, "--" STDOUT_NL);
2348 hyphenpending = FALSE;
2349 hyphenprinted = TRUE;
2350 }
2351
2352 /* See if there is a requirement to print some "before" lines for this
2353 match. Again, don't print overlaps. */
2354
2355 if (before_context > 0)
2356 {
2357 int linecount = 0;
2358 char *p = ptr;
2359
2360 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
2361 linecount < before_context)
2362 {
2363 linecount++;
2364 p = previous_line(p, main_buffer);
2365 }
2366
2367 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
2368 fprintf(stdout, "--" STDOUT_NL);
2369
2370 while (p < ptr)
2371 {
2372 int ellength;
2373 char *pp = p;
2374 if (printname != NULL) fprintf(stdout, "%s-", printname);
2375 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
2376 pp = end_of_line(pp, endptr, &ellength);
2377 FWRITE(p, 1, pp - p, stdout);
2378 p = pp;
2379 }
2380 }
2381
2382 /* Now print the matching line(s); ensure we set hyphenpending at the end
2383 of the file if any context lines are being output. */
2384
2385 if (after_context > 0 || before_context > 0)
2386 endhyphenpending = TRUE;
2387
2388 if (printname != NULL) fprintf(stdout, "%s:", printname);
2389 if (number) fprintf(stdout, "%d:", linenumber);
2390
2391 /* In multiline mode, we want to print to the end of the line in which
2392 the end of the matched string is found, so we adjust linelength and the
2393 line number appropriately, but only when there actually was a match
2394 (invert not set). Because the PCRE2_FIRSTLINE option is set, the start of
2395 the match will always be before the first newline sequence. */
2396
2397 if (multiline & !invert)
2398 {
2399 char *endmatch = ptr + offsets[1];
2400 t = ptr;
2401 while (t <= endmatch)
2402 {
2403 t = end_of_line(t, endptr, &endlinelength);
2404 if (t < endmatch) linenumber++; else break;
2405 }
2406 linelength = t - ptr - endlinelength;
2407 }
2408
2409 /*** NOTE: Use only fwrite() to output the data line, so that binary
2410 zeroes are treated as just another data character. */
2411
2412 /* This extra option, for Jeffrey Friedl's debugging requirements,
2413 replaces the matched string, or a specific captured string if it exists,
2414 with X. When this happens, colouring is ignored. */
2415
2416 #ifdef JFRIEDL_DEBUG
2417 if (S_arg >= 0 && S_arg < mrc)
2418 {
2419 int first = S_arg * 2;
2420 int last = first + 1;
2421 FWRITE(ptr, 1, offsets[first], stdout);
2422 fprintf(stdout, "X");
2423 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
2424 }
2425 else
2426 #endif
2427
2428 /* We have to split the line(s) up if colouring, and search for further
2429 matches, but not of course if the line is a non-match. */
2430
2431 if (do_colour && !invert)
2432 {
2433 int plength;
2434 FWRITE(ptr, 1, offsets[0], stdout);
2435 print_match(ptr + offsets[0], offsets[1] - offsets[0]);
2436 for (;;)
2437 {
2438 startoffset = offsets[1];
2439 if (startoffset >= linelength + endlinelength ||
2440 !match_patterns(matchptr, length, options, startoffset, &mrc))
2441 break;
2442 FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
2443 print_match(matchptr + offsets[0], offsets[1] - offsets[0]);
2444 }
2445
2446 /* In multiline mode, we may have already printed the complete line
2447 and its line-ending characters (if they matched the pattern), so there
2448 may be no more to print. */
2449
2450 plength = (int)((linelength + endlinelength) - startoffset);
2451 if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
2452 }
2453
2454 /* Not colouring; no need to search for further matches */
2455
2456 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
2457 }
2458
2459 /* End of doing what has to be done for a match. If --line-buffered was
2460 given, flush the output. */
2461
2462 if (line_buffered) fflush(stdout);
2463 rc = 0; /* Had some success */
2464
2465 /* Remember where the last match happened for after_context. We remember
2466 where we are about to restart, and that line's number. */
2467
2468 lastmatchrestart = ptr + linelength + endlinelength;
2469 lastmatchnumber = linenumber + 1;
2470 }
2471
2472 /* For a match in multiline inverted mode (which of course did not cause
2473 anything to be printed), we have to move on to the end of the match before
2474 proceeding. */
2475
2476 if (multiline && invert && match)
2477 {
2478 int ellength;
2479 char *endmatch = ptr + offsets[1];
2480 t = ptr;
2481 while (t < endmatch)
2482 {
2483 t = end_of_line(t, endptr, &ellength);
2484 if (t <= endmatch) linenumber++; else break;
2485 }
2486 endmatch = end_of_line(endmatch, endptr, &ellength);
2487 linelength = endmatch - ptr - ellength;
2488 }
2489
2490 /* Advance to after the newline and increment the line number. The file
2491 offset to the current line is maintained in filepos. */
2492
2493 END_ONE_MATCH:
2494 ptr += linelength + endlinelength;
2495 filepos += (int)(linelength + endlinelength);
2496 linenumber++;
2497
2498 /* If input is line buffered, and the buffer is not yet full, read another
2499 line and add it into the buffer. */
2500
2501 if (input_line_buffered && bufflength < (size_t)bufsize)
2502 {
2503 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
2504 bufflength += add;
2505 endptr += add;
2506 }
2507
2508 /* If we haven't yet reached the end of the file (the buffer is full), and
2509 the current point is in the top 1/3 of the buffer, slide the buffer down by
2510 1/3 and refill it. Before we do this, if some unprinted "after" lines are
2511 about to be lost, print them. */
2512
2513 if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
2514 {
2515 if (after_context > 0 &&
2516 lastmatchnumber > 0 &&
2517 lastmatchrestart < main_buffer + bufthird)
2518 {
2519
2520 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2521 lastmatchnumber = 0; /* Indicates no after lines pending */
2522 }
2523
2524 /* Now do the shuffle */
2525
2526 memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
2527 ptr -= bufthird;
2528
2529 bufflength = 2*bufthird + fill_buffer(handle, frtype,
2530 main_buffer + 2*bufthird, bufthird, input_line_buffered);
2531 endptr = main_buffer + bufflength;
2532
2533 /* Adjust any last match point */
2534
2535 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
2536 }
2537 } /* Loop through the whole file */
2538
2539 /* End of file; print final "after" lines if wanted; do_after_lines sets
2540 hyphenpending if it prints something. */
2541
2542 if (!show_only_matching && !(count_only|show_total_count))
2543 {
2544 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2545 hyphenpending |= endhyphenpending;
2546 }
2547
2548 /* Print the file name if we are looking for those without matches and there
2549 were none. If we found a match, we won't have got this far. */
2550
2551 if (filenames == FN_NOMATCH_ONLY)
2552 {
2553 fprintf(stdout, "%s" STDOUT_NL, printname);
2554 return 0;
2555 }
2556
2557 /* Print the match count if wanted */
2558
2559 if (count_only && !quiet)
2560 {
2561 if (count > 0 || !omit_zero_count)
2562 {
2563 if (printname != NULL && filenames != FN_NONE)
2564 fprintf(stdout, "%s:", printname);
2565 fprintf(stdout, "%d" STDOUT_NL, count);
2566 counts_printed++;
2567 }
2568 }
2569
2570 total_count += count; /* Can be set without count_only */
2571 return rc;
2572 }
2573
2574
2575
2576 /*************************************************
2577 * Grep a file or recurse into a directory *
2578 *************************************************/
2579
2580 /* Given a path name, if it's a directory, scan all the files if we are
2581 recursing; if it's a file, grep it.
2582
2583 Arguments:
2584 pathname the path to investigate
2585 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
2586 only_one_at_top TRUE if the path is the only one at toplevel
2587
2588 Returns: -1 the file/directory was skipped
2589 0 if there was at least one match
2590 1 if there were no matches
2591 2 there was some kind of error
2592
2593 However, file opening failures are suppressed if "silent" is set.
2594 */
2595
2596 static int
2597 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
2598 {
2599 int rc = 1;
2600 int frtype;
2601 void *handle;
2602 char *lastcomp;
2603 FILE *in = NULL; /* Ensure initialized */
2604
2605 #ifdef SUPPORT_LIBZ
2606 gzFile ingz = NULL;
2607 #endif
2608
2609 #ifdef SUPPORT_LIBBZ2
2610 BZFILE *inbz2 = NULL;
2611 #endif
2612
2613 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2614 int pathlen;
2615 #endif
2616
2617 #if defined NATIVE_ZOS
2618 int zos_type;
2619 FILE *zos_test_file;
2620 #endif
2621
2622 /* If the file name is "-" we scan stdin */
2623
2624 if (strcmp(pathname, "-") == 0)
2625 {
2626 return pcre2grep(stdin, FR_PLAIN, stdin_name,
2627 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
2628 stdin_name : NULL);
2629 }
2630
2631 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
2632 directories, whereas --include and --exclude apply to everything else. The test
2633 is against the final component of the path. */
2634
2635 lastcomp = strrchr(pathname, FILESEP);
2636 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
2637
2638 /* If the file is a directory, skip if not recursing or if explicitly excluded.
2639 Otherwise, scan the directory and recurse for each path within it. The scanning
2640 code is localized so it can be made system-specific. */
2641
2642
2643 /* For z/OS, determine the file type. */
2644
2645 #if defined NATIVE_ZOS
2646 zos_test_file = fopen(pathname,"rb");
2647
2648 if (zos_test_file == NULL)
2649 {
2650 if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n",
2651 pathname, strerror(errno));
2652 return -1;
2653 }
2654 zos_type = identifyzosfiletype (zos_test_file);
2655 fclose (zos_test_file);
2656
2657 /* Handle a PDS in separate code */
2658
2659 if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
2660 {
2661 return travelonpdsdir (pathname, only_one_at_top);
2662 }
2663
2664 /* Deal with regular files in the normal way below. These types are:
2665 zos_type == __ZOS_PDS_MEMBER
2666 zos_type == __ZOS_PS
2667 zos_type == __ZOS_VSAM_KSDS
2668 zos_type == __ZOS_VSAM_ESDS
2669 zos_type == __ZOS_VSAM_RRDS
2670 */
2671
2672 /* Handle a z/OS directory using common code. */
2673
2674 else if (zos_type == __ZOS_HFS)
2675 {
2676 #endif /* NATIVE_ZOS */
2677
2678
2679 /* Handle directories: common code for all OS */
2680
2681 if (isdirectory(pathname))
2682 {
2683 if (dee_action == dee_SKIP ||
2684 !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
2685 return -1;
2686
2687 if (dee_action == dee_RECURSE)
2688 {
2689 char buffer[1024];
2690 char *nextfile;
2691 directory_type *dir = opendirectory(pathname);
2692
2693 if (dir == NULL)
2694 {
2695 if (!silent)
2696 fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname,
2697 strerror(errno));
2698 return 2;
2699 }
2700
2701 while ((nextfile = readdirectory(dir)) != NULL)
2702 {
2703 int frc;
2704 sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
2705 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
2706 if (frc > 1) rc = frc;
2707 else if (frc == 0 && rc == 1) rc = 0;
2708 }
2709
2710 closedirectory(dir);
2711 return rc;
2712 }
2713 }
2714
2715 #ifdef WIN32
2716 if (iswild(pathname))
2717 {
2718 char buffer[1024];
2719 char *nextfile;
2720 char *name;
2721 directory_type *dir = opendirectory(pathname);
2722
2723 if (dir == NULL)
2724 return 0;
2725
2726 for (nextfile = name = pathname; *nextfile != 0; nextfile++)
2727 if (*nextfile == '/' || *nextfile == '\\')
2728 name = nextfile + 1;
2729 *name = 0;
2730
2731 while ((nextfile = readdirectory(dir)) != NULL)
2732 {
2733 int frc;
2734 sprintf(buffer, "%.512s%.128s", pathname, nextfile);
2735 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
2736 if (frc > 1) rc = frc;
2737 else if (frc == 0 && rc == 1) rc = 0;
2738 }
2739
2740 closedirectory(dir);
2741 return rc;
2742 }
2743 #endif
2744
2745 #if defined NATIVE_ZOS
2746 }
2747 #endif
2748
2749 /* If the file is not a directory, check for a regular file, and if it is not,
2750 skip it if that's been requested. Otherwise, check for an explicit inclusion or
2751 exclusion. */
2752
2753 else if (
2754 #if defined NATIVE_ZOS
2755 (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
2756 #else /* all other OS */
2757 (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
2758 #endif
2759 !test_incexc(lastcomp, include_patterns, exclude_patterns))
2760 return -1; /* File skipped */
2761
2762 /* Control reaches here if we have a regular file, or if we have a directory
2763 and recursion or skipping was not requested, or if we have anything else and
2764 skipping was not requested. The scan proceeds. If this is the first and only
2765 argument at top level, we don't show the file name, unless we are only showing
2766 the file name, or the filename was forced (-H). */
2767
2768 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2769 pathlen = (int)(strlen(pathname));
2770 #endif
2771
2772 /* Open using zlib if it is supported and the file name ends with .gz. */
2773
2774 #ifdef SUPPORT_LIBZ
2775 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
2776 {
2777 ingz = gzopen(pathname, "rb");
2778 if (ingz == NULL)
2779 {
2780 if (!silent)
2781 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
2782 strerror(errno));
2783 return 2;
2784 }
2785 handle = (void *)ingz;
2786 frtype = FR_LIBZ;
2787 }
2788 else
2789 #endif
2790
2791 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
2792
2793 #ifdef SUPPORT_LIBBZ2
2794 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
2795 {
2796 inbz2 = BZ2_bzopen(pathname, "rb");
2797 handle = (void *)inbz2;
2798 frtype = FR_LIBBZ2;
2799 }
2800 else
2801 #endif
2802
2803 /* Otherwise use plain fopen(). The label is so that we can come back here if
2804 an attempt to read a .bz2 file indicates that it really is a plain file. */
2805
2806 #ifdef SUPPORT_LIBBZ2
2807 PLAIN_FILE:
2808 #endif
2809 {
2810 in = fopen(pathname, "rb");
2811 handle = (void *)in;
2812 frtype = FR_PLAIN;
2813 }
2814
2815 /* All the opening methods return errno when they fail. */
2816
2817 if (handle == NULL)
2818 {
2819 if (!silent)
2820 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
2821 strerror(errno));
2822 return 2;
2823 }
2824
2825 /* Now grep the file */
2826
2827 rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
2828 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
2829
2830 /* Close in an appropriate manner. */
2831
2832 #ifdef SUPPORT_LIBZ
2833 if (frtype == FR_LIBZ)
2834 gzclose(ingz);
2835 else
2836 #endif
2837
2838 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
2839 read failed. If the error indicates that the file isn't in fact bzipped, try
2840 again as a normal file. */
2841
2842 #ifdef SUPPORT_LIBBZ2
2843 if (frtype == FR_LIBBZ2)
2844 {
2845 if (rc == 3)
2846 {
2847 int errnum;
2848 const char *err = BZ2_bzerror(inbz2, &errnum);
2849 if (errnum == BZ_DATA_ERROR_MAGIC)
2850 {
2851 BZ2_bzclose(inbz2);
2852 goto PLAIN_FILE;
2853 }
2854 else if (!silent)
2855 fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n",
2856 pathname, err);
2857 rc = 2; /* The normal "something went wrong" code */
2858 }
2859 BZ2_bzclose(inbz2);
2860 }
2861 else
2862 #endif
2863
2864 /* Normal file close */
2865
2866 fclose(in);
2867
2868 /* Pass back the yield from pcre2grep(). */
2869
2870 return rc;
2871 }
2872
2873
2874
2875 /*************************************************
2876 * Handle a single-letter, no data option *
2877 *************************************************/
2878
2879 static int
2880 handle_option(int letter, int options)
2881 {
2882 switch(letter)
2883 {
2884 case N_FOFFSETS: file_offsets = TRUE; break;
2885 case N_HELP: help(); pcre2grep_exit(0);
2886 case N_LBUFFER: line_buffered = TRUE; break;
2887 case N_LOFFSETS: line_offsets = number = TRUE; break;
2888 case N_NOJIT: use_jit = FALSE; break;
2889 case 'a': binary_files = BIN_TEXT; break;
2890 case 'c': count_only = TRUE; break;
2891 case 'F': process_options |= PO_FIXED_STRINGS; break;
2892 case 'H': filenames = FN_FORCE; break;
2893 case 'I': binary_files = BIN_NOMATCH; break;
2894 case 'h': filenames = FN_NONE; break;
2895 case 'i': options |= PCRE2_CASELESS; break;
2896 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2897 case 'L': filenames = FN_NOMATCH_ONLY; break;
2898 case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break;
2899 case 'n': number = TRUE; break;
2900
2901 case 'o':
2902 only_matching_last = add_number(0, only_matching_last);
2903 if (only_matching == NULL) only_matching = only_matching_last;
2904 break;
2905
2906 case 'q': quiet = TRUE; break;
2907 case 'r': dee_action = dee_RECURSE; break;
2908 case 's': silent = TRUE; break;
2909 case 't': show_total_count = TRUE; break;
2910 case 'u': options |= PCRE2_UTF; utf = TRUE; break;
2911 case 'v': invert = TRUE; break;
2912 case 'w': process_options |= PO_WORD_MATCH; break;
2913 case 'x': process_options |= PO_LINE_MATCH; break;
2914
2915 case 'V':
2916 {
2917 unsigned char buffer[128];
2918 (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
2919 fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer);
2920 }
2921 pcre2grep_exit(0);
2922 break;
2923
2924 default:
2925 fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
2926 pcre2grep_exit(usage(2));
2927 }
2928
2929 return options;
2930 }
2931
2932
2933
2934
2935 /*************************************************
2936 * Construct printed ordinal *
2937 *************************************************/
2938
2939 /* This turns a number into "1st", "3rd", etc. */
2940
2941 static char *
2942 ordin(int n)
2943 {
2944 static char buffer[14];
2945 char *p = buffer;
2946 sprintf(p, "%d", n);
2947 while (*p != 0) p++;
2948 switch (n%10)
2949 {
2950 case 1: strcpy(p, "st"); break;
2951 case 2: strcpy(p, "nd"); break;
2952 case 3: strcpy(p, "rd"); break;
2953 default: strcpy(p, "th"); break;
2954 }
2955 return buffer;
2956 }
2957
2958
2959
2960 /*************************************************
2961 * Compile a single pattern *
2962 *************************************************/
2963
2964 /* Do nothing if the pattern has already been compiled. This is the case for
2965 include/exclude patterns read from a file.
2966
2967 When the -F option has been used, each "pattern" may be a list of strings,
2968 separated by line breaks. They will be matched literally. We split such a
2969 string and compile the first substring, inserting an additional block into the
2970 pattern chain.
2971
2972 Arguments:
2973 p points to the pattern block
2974 options the PCRE options
2975 popts the processing options
2976 fromfile TRUE if the pattern was read from a file
2977 fromtext file name or identifying text (e.g. "include")
2978 count 0 if this is the only command line pattern, or
2979 number of the command line pattern, or
2980 linenumber for a pattern from a file
2981
2982 Returns: TRUE on success, FALSE after an error
2983 */
2984
2985 static BOOL
2986 compile_pattern(patstr *p, int options, int popts, int fromfile,
2987 const char *fromtext, int count)
2988 {
2989 unsigned char buffer[PATBUFSIZE];
2990 PCRE2_SIZE erroffset;
2991 char *ps = p->string;
2992 unsigned int patlen = strlen(ps);
2993 int errcode;
2994
2995 if (p->compiled != NULL) return TRUE;
2996
2997 if ((popts & PO_FIXED_STRINGS) != 0)
2998 {
2999 int ellength;
3000 char *eop = ps + patlen;
3001 char *pe = end_of_line(ps, eop, &ellength);
3002
3003 if (ellength != 0)
3004 {
3005 if (add_pattern(pe, p) == NULL) return FALSE;
3006 patlen = (int)(pe - ps - ellength);
3007 }
3008 }
3009
3010 sprintf((char *)buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
3011 p->compiled = pcre2_compile(buffer, PCRE2_ZERO_TERMINATED, options, &errcode,
3012 &erroffset, compile_context);
3013
3014 /* Handle successful compile. Try JIT-compiling if supported and enabled. We
3015 ignore any JIT compiler errors, relying falling back to interpreting if
3016 anything goes wrong with JIT. */
3017
3018 if (p->compiled != NULL)
3019 {
3020 #ifdef SUPPORT_PCRE2GREP_JIT
3021 if (use_jit) (void)pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE);
3022 #endif
3023 return TRUE;
3024 }
3025
3026 /* Handle compile errors */
3027
3028 erroffset -= (int)strlen(prefix[popts]);
3029 if (erroffset > patlen) erroffset = patlen;
3030 pcre2_get_error_message(errcode, buffer, PATBUFSIZE);
3031
3032 if (fromfile)
3033 {
3034 fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
3035 "at offset %d: %s\n", count, fromtext, (int)erroffset, buffer);
3036 }
3037 else
3038 {
3039 if (count == 0)
3040 fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
3041 fromtext, (int)erroffset, buffer);
3042 else
3043 fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
3044 ordin(count), fromtext, (int)erroffset, buffer);
3045 }
3046
3047 return FALSE;
3048 }
3049
3050
3051
3052 /*************************************************
3053 * Read and compile a file of patterns *
3054 *************************************************/
3055
3056 /* This is used for --filelist, --include-from, and --exclude-from.
3057
3058 Arguments:
3059 name the name of the file; "-" is stdin
3060 patptr pointer to the pattern chain anchor
3061 patlastptr pointer to the last pattern pointer
3062 popts the process options to pass to pattern_compile()
3063
3064 Returns: TRUE if all went well
3065 */
3066
3067 static BOOL
3068 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
3069 {
3070 int linenumber = 0;
3071 FILE *f;
3072 const char *filename;
3073 char buffer[PATBUFSIZE];
3074
3075 if (strcmp(name, "-") == 0)
3076 {
3077 f = stdin;
3078 filename = stdin_name;
3079 }
3080 else
3081 {
3082 f = fopen(name, "r");
3083 if (f == NULL)
3084 {
3085 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno));
3086 return FALSE;
3087 }
3088 filename = name;
3089 }
3090
3091 while (fgets(buffer, PATBUFSIZE, f) != NULL)
3092 {
3093 char *s = buffer + (int)strlen(buffer);
3094 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
3095 *s = 0;
3096 linenumber++;
3097 if (buffer[0] == 0) continue; /* Skip blank lines */
3098
3099 /* Note: this call to add_pattern() puts a pointer to the local variable
3100 "buffer" into the pattern chain. However, that pointer is used only when
3101 compiling the pattern, which happens immediately below, so we flatten it
3102 afterwards, as a precaution against any later code trying to use it. */
3103
3104 *patlastptr = add_pattern(buffer, *patlastptr);
3105 if (*patlastptr == NULL)
3106 {
3107 if (f != stdin) fclose(f);
3108 return FALSE;
3109 }
3110 if (*patptr == NULL) *patptr = *patlastptr;
3111
3112 /* This loop is needed because compiling a "pattern" when -F is set may add
3113 on additional literal patterns if the original contains a newline. In the
3114 common case, it never will, because fgets() stops at a newline. However,
3115 the -N option can be used to give pcre2grep a different newline setting. */
3116
3117 for(;;)
3118 {
3119 if (!compile_pattern(*patlastptr, pcre2_options, popts, TRUE, filename,
3120 linenumber))
3121 {
3122 if (f != stdin) fclose(f);
3123 return FALSE;
3124 }
3125 (*patlastptr)->string = NULL; /* Insurance */
3126 if ((*patlastptr)->next == NULL) break;
3127 *patlastptr = (*patlastptr)->next;
3128 }
3129 }
3130
3131 if (f != stdin) fclose(f);
3132 return TRUE;
3133 }
3134
3135
3136
3137 /*************************************************
3138 * Main program *
3139 *************************************************/
3140
3141 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
3142
3143 int
3144 main(int argc, char **argv)
3145 {
3146 int i, j;
3147 int rc = 1;
3148 BOOL only_one_at_top;
3149 patstr *cp;
3150 fnstr *fn;
3151 const char *locale_from = "--locale";
3152
3153 #ifdef SUPPORT_PCRE2GREP_JIT
3154 pcre2_jit_stack *jit_stack = NULL;
3155 #endif
3156
3157 /* In Windows, stdout is set up as a text stream, which means that \n is
3158 converted to \r\n. This causes output lines that are copied from the input to
3159 change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
3160 that stdout is a binary stream. Note that this means all other output to stdout
3161 must use STDOUT_NL to terminate lines. */
3162
3163 #ifdef WIN32
3164 _setmode( _fileno(stdout), _O_BINARY);
3165 #endif
3166
3167 /* Set up a default compile and match contexts and a match data block. */
3168
3169 compile_context = pcre2_compile_context_create(NULL);
3170 match_context = pcre2_match_context_create(NULL);
3171 match_data = pcre2_match_data_create(OFFSET_SIZE, NULL);
3172 offsets = pcre2_get_ovector_pointer(match_data);
3173
3174 /* If string (script) callouts are supported, set up the callout processing
3175 function. */
3176
3177 #ifdef SUPPORT_PCRE2GREP_CALLOUT
3178 pcre2_set_callout(match_context, pcre2grep_callout, NULL);
3179 #endif
3180
3181 /* Process the options */
3182
3183 for (i = 1; i < argc; i++)
3184 {
3185 option_item *op = NULL;
3186 char *option_data = (char *)""; /* default to keep compiler happy */
3187 BOOL longop;
3188 BOOL longopwasequals = FALSE;
3189
3190 if (argv[i][0] != '-') break;
3191
3192 /* If we hit an argument that is just "-", it may be a reference to STDIN,
3193 but only if we have previously had -e or -f to define the patterns. */
3194
3195 if (argv[i][1] == 0)
3196 {
3197 if (pattern_files != NULL || patterns != NULL) break;
3198 else pcre2grep_exit(usage(2));
3199 }
3200
3201 /* Handle a long name option, or -- to terminate the options */
3202
3203 if (argv[i][1] == '-')
3204 {
3205 char *arg = argv[i] + 2;
3206 char *argequals = strchr(arg, '=');
3207
3208 if (*arg == 0) /* -- terminates options */
3209 {
3210 i++;
3211 break; /* out of the options-handling loop */
3212 }
3213
3214 longop = TRUE;
3215
3216 /* Some long options have data that follows after =, for example file=name.
3217 Some options have variations in the long name spelling: specifically, we
3218 allow "regexp" because GNU grep allows it, though I personally go along
3219 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
3220 These options are entered in the table as "regex(p)". Options can be in
3221 both these categories. */
3222
3223 for (op = optionlist; op->one_char != 0; op++)
3224 {
3225 char *opbra = strchr(op->long_name, '(');
3226 char *equals = strchr(op->long_name, '=');
3227
3228 /* Handle options with only one spelling of the name */
3229
3230 if (opbra == NULL) /* Does not contain '(' */
3231 {
3232 if (equals == NULL) /* Not thing=data case */
3233 {
3234 if (strcmp(arg, op->long_name) == 0) break;
3235 }
3236 else /* Special case xxx=data */
3237 {
3238 int oplen = (int)(equals - op->long_name);
3239 int arglen = (argequals == NULL)?
3240 (int)strlen(arg) : (int)(argequals - arg);
3241 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
3242 {
3243 option_data = arg + arglen;
3244 if (*option_data == '=')
3245 {
3246 option_data++;
3247 longopwasequals = TRUE;
3248 }
3249 break;
3250 }
3251 }
3252 }
3253
3254 /* Handle options with an alternate spelling of the name */
3255
3256 else
3257 {
3258 char buff1[24];
3259 char buff2[24];
3260
3261 int baselen = (int)(opbra - op->long_name);
3262 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
3263 int arglen = (argequals == NULL || equals == NULL)?
3264 (int)strlen(arg) : (int)(argequals - arg);
3265
3266 sprintf(buff1, "%.*s", baselen, op->long_name);
3267 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
3268
3269 if (strncmp(arg, buff1, arglen) == 0 ||
3270 strncmp(arg, buff2, arglen) == 0)
3271 {
3272 if (equals != NULL && argequals != NULL)
3273 {
3274 option_data = argequals;
3275 if (*option_data == '=')
3276 {
3277 option_data++;
3278 longopwasequals = TRUE;
3279 }
3280 }
3281 break;
3282 }
3283 }
3284 }
3285
3286 if (op->one_char == 0)
3287 {
3288 fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]);
3289 pcre2grep_exit(usage(2));
3290 }
3291 }
3292
3293 /* Jeffrey Friedl's debugging harness uses these additional options which
3294 are not in the right form for putting in the option table because they use
3295 only one hyphen, yet are more than one character long. By putting them
3296 separately here, they will not get displayed as part of the help() output,
3297 but I don't think Jeffrey will care about that. */
3298
3299 #ifdef JFRIEDL_DEBUG
3300 else if (strcmp(argv[i], "-pre") == 0) {
3301 jfriedl_prefix = argv[++i];
3302 continue;
3303 } else if (strcmp(argv[i], "-post") == 0) {
3304 jfriedl_postfix = argv[++i];
3305 continue;
3306 } else if (strcmp(argv[i], "-XT") == 0) {
3307 sscanf(argv[++i], "%d", &jfriedl_XT);
3308 continue;
3309 } else if (strcmp(argv[i], "-XR") == 0) {
3310 sscanf(argv[++i], "%d", &jfriedl_XR);
3311 continue;
3312 }
3313 #endif
3314
3315
3316 /* One-char options; many that have no data may be in a single argument; we
3317 continue till we hit the last one or one that needs data. */
3318
3319 else
3320 {
3321 char *s = argv[i] + 1;
3322 longop = FALSE;
3323
3324 while (*s != 0)
3325 {
3326 for (op = optionlist; op->one_char != 0; op++)
3327 {
3328 if (*s == op->one_char) break;
3329 }
3330 if (op->one_char == 0)
3331 {
3332 fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n",
3333 *s, argv[i]);
3334 pcre2grep_exit(usage(2));
3335 }
3336
3337 option_data = s+1;
3338
3339 /* Break out if this is the last character in the string; it's handled
3340 below like a single multi-char option. */
3341
3342 if (*option_data == 0) break;
3343
3344 /* Check for a single-character option that has data: OP_OP_NUMBER(S)
3345 are used for ones that either have a numerical number or defaults, i.e.
3346 the data is optional. If a digit follows, there is data; if not, carry on
3347 with other single-character options in the same string. */
3348
3349 if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
3350 {
3351 if (isdigit((unsigned char)s[1])) break;
3352 }
3353 else /* Check for an option with data */
3354 {
3355 if (op->type != OP_NODATA) break;
3356 }
3357
3358 /* Handle a single-character option with no data, then loop for the
3359 next character in the string. */
3360
3361 pcre2_options = handle_option(*s++, pcre2_options);
3362 }
3363 }
3364
3365 /* At this point we should have op pointing to a matched option. If the type
3366 is NO_DATA, it means that there is no data, and the option might set
3367 something in the PCRE options. */
3368
3369 if (op->type == OP_NODATA)
3370 {
3371 pcre2_options = handle_option(op->one_char, pcre2_options);
3372 continue;
3373 }
3374
3375 /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
3376 either has a value or defaults to something. It cannot have data in a
3377 separate item. At the moment, the only such options are "colo(u)r",
3378 "only-matching", and Jeffrey Friedl's special -S debugging option. */
3379
3380 if (*option_data == 0 &&
3381 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
3382 op->type == OP_OP_NUMBERS))
3383 {
3384 switch (op->one_char)
3385 {
3386 case N_COLOUR:
3387 colour_option = "auto";
3388 break;
3389
3390 case 'o':
3391 only_matching_last = add_number(0, only_matching_last);
3392 if (only_matching == NULL) only_matching = only_matching_last;
3393 break;
3394
3395 #ifdef JFRIEDL_DEBUG
3396 case 'S':
3397 S_arg = 0;
3398 break;
3399 #endif
3400 }
3401 continue;
3402 }
3403
3404 /* Otherwise, find the data string for the option. */
3405
3406 if (*option_data == 0)
3407 {
3408 if (i >= argc - 1 || longopwasequals)
3409 {
3410 fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]);
3411 pcre2grep_exit(usage(2));
3412 }
3413 option_data = argv[++i];
3414 }
3415
3416 /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
3417 added to a chain of numbers. */
3418
3419 if (op->type == OP_OP_NUMBERS)
3420 {
3421 unsigned long int n = decode_number(option_data, op, longop);
3422 omdatastr *omd = (omdatastr *)op->dataptr;
3423 *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
3424 if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
3425 }
3426
3427 /* If the option type is OP_PATLIST, it's the -e option, or one of the
3428 include/exclude options, which can be called multiple times to create lists
3429 of patterns. */
3430
3431 else if (op->type == OP_PATLIST)
3432 {
3433 patdatastr *pd = (patdatastr *)op->dataptr;
3434 *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
3435 if (*(pd->lastptr) == NULL) goto EXIT2;
3436 if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
3437 }
3438
3439 /* If the option type is OP_FILELIST, it's one of the options that names a
3440 file. */
3441
3442 else if (op->type == OP_FILELIST)
3443 {
3444 fndatastr *fd = (fndatastr *)op->dataptr;
3445 fn = (fnstr *)malloc(sizeof(fnstr));
3446 if (fn == NULL)
3447 {
3448 fprintf(stderr, "pcre2grep: malloc failed\n");
3449 goto EXIT2;
3450 }
3451 fn->next = NULL;
3452 fn->name = option_data;
3453 if (*(fd->anchor) == NULL)
3454 *(fd->anchor) = fn;
3455 else
3456 (*(fd->lastptr))->next = fn;
3457 *(fd->lastptr) = fn;
3458 }
3459
3460 /* Handle OP_BINARY_FILES */
3461
3462 else if (op->type == OP_BINFILES)
3463 {
3464 if (strcmp(option_data, "binary") == 0)
3465 binary_files = BIN_BINARY;
3466 else if (strcmp(option_data, "without-match") == 0)
3467 binary_files = BIN_NOMATCH;
3468 else if (strcmp(option_data, "text") == 0)
3469 binary_files = BIN_TEXT;
3470 else
3471 {
3472 fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n",
3473 option_data);
3474 pcre2grep_exit(usage(2));
3475 }
3476 }
3477
3478 /* Otherwise, deal with a single string or numeric data value. */
3479
3480 else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
3481 op->type != OP_OP_NUMBER)
3482 {
3483 *((char **)op->dataptr) = option_data;
3484 }
3485 else
3486 {
3487 unsigned long int n = decode_number(option_data, op, longop);
3488 if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
3489 else *((int *)op->dataptr) = n;
3490 }
3491 }
3492
3493 /* Options have been decoded. If -C was used, its value is used as a default
3494 for -A and -B. */
3495
3496 if (both_context > 0)
3497 {
3498 if (after_context == 0) after_context = both_context;
3499 if (before_context == 0) before_context = both_context;
3500 }
3501
3502 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
3503 However, all three set show_only_matching because they display, each in their
3504 own way, only the data that has matched. */
3505
3506 if ((only_matching != NULL && (file_offsets || line_offsets)) ||
3507 (file_offsets && line_offsets))
3508 {
3509 fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --file-offsets "
3510 "and/or --line-offsets\n");
3511 pcre2grep_exit(usage(2));
3512 }
3513
3514 /* Put limits into the match data block. */
3515
3516 if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
3517 if (recursion_limit > 0) pcre2_set_recursion_limit(match_context, recursion_limit);
3518
3519 if (only_matching != NULL || file_offsets || line_offsets)
3520 show_only_matching = TRUE;
3521
3522 /* If a locale has not been provided as an option, see if the LC_CTYPE or
3523 LC_ALL environment variable is set, and if so, use it. */
3524
3525 if (locale == NULL)
3526 {
3527 locale = getenv("LC_ALL");
3528 locale_from = "LCC_ALL";
3529 }
3530
3531 if (locale == NULL)
3532 {
3533 locale = getenv("LC_CTYPE");
3534 locale_from = "LC_CTYPE";
3535 }
3536
3537 /* If a locale is set, use it to generate the tables the PCRE needs. Passing
3538 NULL to pcre2_maketables() means that malloc() is used to get the memory. */
3539
3540 if (locale != NULL)
3541 {
3542 if (setlocale(LC_CTYPE, locale) == NULL)
3543 {
3544 fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n",
3545 locale, locale_from);
3546 goto EXIT2;
3547 }
3548 character_tables = pcre2_maketables(NULL);
3549 pcre2_set_character_tables(compile_context, character_tables);
3550 }
3551
3552 /* Sort out colouring */
3553
3554 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
3555 {
3556 if (strcmp(colour_option, "always") == 0)
3557 #ifdef WIN32
3558 do_ansi = !is_stdout_tty(),
3559 #endif
3560 do_colour = TRUE;
3561 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
3562 else
3563 {
3564 fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n",
3565 colour_option);
3566 goto EXIT2;
3567 }
3568 if (do_colour)
3569 {
3570 char *cs = getenv("PCRE2GREP_COLOUR");
3571 if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
3572 if (cs == NULL) cs = getenv("PCREGREP_COLOUR");
3573 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
3574 if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS"));
3575 if (cs == NULL) cs = getenv("GREP_COLOR");
3576 if (cs != NULL)
3577 {
3578 if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs;
3579 }
3580 #ifdef WIN32
3581 init_colour_output();
3582 #endif
3583 }
3584 }
3585
3586 /* Sort out a newline setting. */
3587
3588 if (newline_arg != NULL)
3589 {
3590 for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
3591 endlinetype++)
3592 {
3593 if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
3594 }
3595 if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
3596 pcre2_set_newline(compile_context, endlinetype);
3597 else
3598 {
3599 fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
3600 newline_arg);
3601 goto EXIT2;
3602 }
3603 }
3604
3605 /* Find default newline convention */
3606
3607 else
3608 {
3609 (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
3610 }
3611
3612 /* Interpret the text values for -d and -D */
3613
3614 if (dee_option != NULL)
3615 {
3616 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
3617 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
3618 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
3619 else
3620 {
3621 fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option);
3622 goto EXIT2;
3623 }
3624 }
3625
3626 if (DEE_option != NULL)
3627 {
3628 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
3629 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
3630 else
3631 {
3632 fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option);
3633 goto EXIT2;
3634 }
3635 }
3636
3637 /* Check the values for Jeffrey Friedl's debugging options. */
3638
3639 #ifdef JFRIEDL_DEBUG
3640 if (S_arg > 9)
3641 {
3642 fprintf(stderr, "pcre2grep: bad value for -S option\n");
3643 return 2;
3644 }
3645 if (jfriedl_XT != 0 || jfriedl_XR != 0)
3646 {
3647 if (jfriedl_XT == 0) jfriedl_XT = 1;
3648 if (jfriedl_XR == 0) jfriedl_XR = 1;
3649 }
3650 #endif
3651
3652 /* If use_jit is set, check whether JIT is available. If not, do not try
3653 to use JIT. */
3654
3655 if (use_jit)
3656 {
3657 uint32_t answer;
3658 (void)pcre2_config(PCRE2_CONFIG_JIT, &answer);
3659 if (!answer) use_jit = FALSE;
3660 }
3661
3662 /* Get memory for the main buffer. */
3663
3664 if (bufthird <= 0)
3665 {
3666 fprintf(stderr, "pcre2grep: --buffer-size must be greater than zero\n");
3667 goto EXIT2;
3668 }
3669
3670 bufsize = 3*bufthird;
3671 main_buffer = (char *)malloc(bufsize);
3672
3673 if (main_buffer == NULL)
3674 {
3675 fprintf(stderr, "pcre2grep: malloc failed\n");
3676 goto EXIT2;
3677 }
3678
3679 /* If no patterns were provided by -e, and there are no files provided by -f,
3680 the first argument is the one and only pattern, and it must exist. */
3681
3682 if (patterns == NULL && pattern_files == NULL)
3683 {
3684 if (i >= argc) return usage(2);
3685 patterns = patterns_last = add_pattern(argv[i++], NULL);
3686 if (patterns == NULL) goto EXIT2;
3687 }
3688
3689 /* Compile the patterns that were provided on the command line, either by
3690 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
3691 after all the command-line options are read so that we know which PCRE options
3692 to use. When -F is used, compile_pattern() may add another block into the
3693 chain, so we must not access the next pointer till after the compile. */
3694
3695 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3696 {
3697 if (!compile_pattern(cp, pcre2_options, process_options, FALSE, "command-line",
3698 (j == 1 && patterns->next == NULL)? 0 : j))
3699 goto EXIT2;
3700 }
3701
3702 /* Read and compile the regular expressions that are provided in files. */
3703
3704 for (fn = pattern_files; fn != NULL; fn = fn->next)
3705 {
3706 if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
3707 goto EXIT2;
3708 }
3709
3710 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
3711
3712 #ifdef SUPPORT_PCRE2GREP_JIT
3713 if (use_jit)
3714 jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL);
3715 #endif
3716
3717 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3718 {
3719 #ifdef SUPPORT_PCRE2GREP_JIT
3720 if (jit_stack != NULL && cp->compiled != NULL)
3721 pcre2_jit_stack_assign(match_context, NULL, jit_stack);
3722 #endif
3723 }
3724
3725 /* If there are include or exclude patterns read from the command line, compile
3726 them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
3727 0. */
3728
3729 for (j = 0; j < 4; j++)
3730 {
3731 int k;
3732 for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
3733 {
3734 if (!compile_pattern(cp, pcre2_options, 0, FALSE, incexname[j],
3735 (k == 1 && cp->next == NULL)? 0 : k))
3736 goto EXIT2;
3737 }
3738 }
3739
3740 /* Read and compile include/exclude patterns from files. */
3741
3742 for (fn = include_from; fn != NULL; fn = fn->next)
3743 {
3744 if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
3745 goto EXIT2;
3746 }
3747
3748 for (fn = exclude_from; fn != NULL; fn = fn->next)
3749 {
3750 if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
3751 goto EXIT2;
3752 }
3753
3754 /* If there are no files that contain lists of files to search, and there are
3755 no file arguments, search stdin, and then exit. */
3756
3757 if (file_lists == NULL && i >= argc)
3758 {
3759 rc = pcre2grep(stdin, FR_PLAIN, stdin_name,
3760 (filenames > FN_DEFAULT)? stdin_name : NULL);
3761 goto EXIT;
3762 }
3763
3764 /* If any files that contains a list of files to search have been specified,
3765 read them line by line and search the given files. */
3766
3767 for (fn = file_lists; fn != NULL; fn = fn->next)
3768 {
3769 char buffer[PATBUFSIZE];
3770 FILE *fl;
3771 if (strcmp(fn->name, "-") == 0) fl = stdin; else
3772 {
3773 fl = fopen(fn->name, "rb");
3774 if (fl == NULL)
3775 {
3776 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name,
3777 strerror(errno));
3778 goto EXIT2;
3779 }
3780 }
3781 while (fgets(buffer, PATBUFSIZE, fl) != NULL)
3782 {
3783 int frc;
3784 char *end = buffer + (int)strlen(buffer);
3785 while (end > buffer && isspace(end[-1])) end--;
3786 *end = 0;
3787 if (*buffer != 0)
3788 {
3789 frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3790 if (frc > 1) rc = frc;
3791 else if (frc == 0 && rc == 1) rc = 0;
3792 }
3793 }
3794 if (fl != stdin) fclose(fl);
3795 }
3796
3797 /* After handling file-list, work through remaining arguments. Pass in the fact
3798 that there is only one argument at top level - this suppresses the file name if
3799 the argument is not a directory and filenames are not otherwise forced. */
3800
3801 only_one_at_top = i == argc - 1 && file_lists == NULL;
3802
3803 for (; i < argc; i++)
3804 {
3805 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
3806 only_one_at_top);
3807 if (frc > 1) rc = frc;
3808 else if (frc == 0 && rc == 1) rc = 0;
3809 }
3810
3811 /* Show the total number of matches if requested, but not if only one file's
3812 count was printed. */
3813
3814 if (show_total_count && counts_printed != 1 && filenames != FN_NOMATCH_ONLY)
3815 {
3816 if (counts_printed != 0 && filenames >= FN_DEFAULT)
3817 fprintf(stdout, "TOTAL:");
3818 fprintf(stdout, "%d" STDOUT_NL, total_count);
3819 }
3820
3821 EXIT:
3822 #ifdef SUPPORT_PCRE2GREP_JIT
3823 if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack);
3824 #endif
3825
3826 free(main_buffer);
3827 free((void *)character_tables);
3828
3829 pcre2_compile_context_free(compile_context);
3830 pcre2_match_context_free(match_context);
3831 pcre2_match_data_free(match_data);
3832
3833 free_pattern_chain(patterns);
3834 free_pattern_chain(include_patterns);
3835 free_pattern_chain(include_dir_patterns);
3836 free_pattern_chain(exclude_patterns);
3837 free_pattern_chain(exclude_dir_patterns);
3838
3839 free_file_chain(exclude_from);
3840 free_file_chain(include_from);
3841 free_file_chain(pattern_files);
3842 free_file_chain(file_lists);
3843
3844 while (only_matching != NULL)
3845 {
3846 omstr *this = only_matching;
3847 only_matching = this->next;
3848 free(this);
3849 }
3850
3851 pcre2grep_exit(rc);
3852
3853 EXIT2:
3854 rc = 2;
3855 goto EXIT;
3856 }
3857
3858 /* End of pcre2grep */

  ViewVC Help
Powered by ViewVC 1.1.5