/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 904 - (show annotations)
Mon Jan 23 17:30:49 2012 UTC (7 years, 10 months ago) by ph10
File MIME type: text/plain
File size: 80046 byte(s)
Additional casts to avoid compiler warnings, originally from a MS compiler, but 
also given by gcc if you turn on enough warnings.
1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2012 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65
66 #include "pcre.h"
67
68 #define FALSE 0
69 #define TRUE 1
70
71 typedef int BOOL;
72
73 #define MAX_PATTERN_COUNT 100
74 #define OFFSET_SIZE 99
75
76 #if BUFSIZ > 8192
77 #define PATBUFSIZE BUFSIZ
78 #else
79 #define PATBUFSIZE 8192
80 #endif
81
82 /* Values for the "filenames" variable, which specifies options for file name
83 output. The order is important; it is assumed that a file name is wanted for
84 all values greater than FN_DEFAULT. */
85
86 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87
88 /* File reading styles */
89
90 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91
92 /* Actions for the -d and -D options */
93
94 enum { dee_READ, dee_SKIP, dee_RECURSE };
95 enum { DEE_READ, DEE_SKIP };
96
97 /* Actions for special processing options (flag bits) */
98
99 #define PO_WORD_MATCH 0x0001
100 #define PO_LINE_MATCH 0x0002
101 #define PO_FIXED_STRINGS 0x0004
102
103 /* Line ending types */
104
105 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106
107 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108 environments), a warning is issued if the value of fwrite() is ignored.
109 Unfortunately, casting to (void) does not suppress the warning. To get round
110 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 apply to fprintf(). */
112
113 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114
115
116
117 /*************************************************
118 * Global variables *
119 *************************************************/
120
121 /* Jeffrey Friedl has some debugging requirements that are not part of the
122 regular code. */
123
124 #ifdef JFRIEDL_DEBUG
125 static int S_arg = -1;
126 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128 static const char *jfriedl_prefix = "";
129 static const char *jfriedl_postfix = "";
130 #endif
131
132 static int endlinetype;
133
134 static char *colour_string = (char *)"1;31";
135 static char *colour_option = NULL;
136 static char *dee_option = NULL;
137 static char *DEE_option = NULL;
138 static char *main_buffer = NULL;
139 static char *newline = NULL;
140 static char *pattern_filename = NULL;
141 static char *stdin_name = (char *)"(standard input)";
142 static char *locale = NULL;
143
144 static const unsigned char *pcretables = NULL;
145
146 static int pattern_count = 0;
147 static pcre **pattern_list = NULL;
148 static pcre_extra **hints_list = NULL;
149
150 static char *include_pattern = NULL;
151 static char *exclude_pattern = NULL;
152 static char *include_dir_pattern = NULL;
153 static char *exclude_dir_pattern = NULL;
154
155 static pcre *include_compiled = NULL;
156 static pcre *exclude_compiled = NULL;
157 static pcre *include_dir_compiled = NULL;
158 static pcre *exclude_dir_compiled = NULL;
159
160 static int after_context = 0;
161 static int before_context = 0;
162 static int both_context = 0;
163 static int bufthird = PCREGREP_BUFSIZE;
164 static int bufsize = 3*PCREGREP_BUFSIZE;
165 static int dee_action = dee_READ;
166 static int DEE_action = DEE_READ;
167 static int error_count = 0;
168 static int filenames = FN_DEFAULT;
169 static int only_matching = -1;
170 static int process_options = 0;
171
172 #ifdef SUPPORT_PCREGREP_JIT
173 static int study_options = PCRE_STUDY_JIT_COMPILE;
174 #else
175 static int study_options = 0;
176 #endif
177
178 static unsigned long int match_limit = 0;
179 static unsigned long int match_limit_recursion = 0;
180
181 static BOOL count_only = FALSE;
182 static BOOL do_colour = FALSE;
183 static BOOL file_offsets = FALSE;
184 static BOOL hyphenpending = FALSE;
185 static BOOL invert = FALSE;
186 static BOOL line_buffered = FALSE;
187 static BOOL line_offsets = FALSE;
188 static BOOL multiline = FALSE;
189 static BOOL number = FALSE;
190 static BOOL omit_zero_count = FALSE;
191 static BOOL resource_error = FALSE;
192 static BOOL quiet = FALSE;
193 static BOOL silent = FALSE;
194 static BOOL utf8 = FALSE;
195
196 /* Structure for options and list of them */
197
198 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
199 OP_OP_NUMBER, OP_PATLIST };
200
201 typedef struct option_item {
202 int type;
203 int one_char;
204 void *dataptr;
205 const char *long_name;
206 const char *help_text;
207 } option_item;
208
209 /* Options without a single-letter equivalent get a negative value. This can be
210 used to identify them. */
211
212 #define N_COLOUR (-1)
213 #define N_EXCLUDE (-2)
214 #define N_EXCLUDE_DIR (-3)
215 #define N_HELP (-4)
216 #define N_INCLUDE (-5)
217 #define N_INCLUDE_DIR (-6)
218 #define N_LABEL (-7)
219 #define N_LOCALE (-8)
220 #define N_NULL (-9)
221 #define N_LOFFSETS (-10)
222 #define N_FOFFSETS (-11)
223 #define N_LBUFFER (-12)
224 #define N_M_LIMIT (-13)
225 #define N_M_LIMIT_REC (-14)
226 #define N_BUFSIZE (-15)
227 #define N_NOJIT (-16)
228
229 static option_item optionlist[] = {
230 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
231 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
232 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
233 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
234 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" },
235 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
236 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
237 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
238 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
239 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
240 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
241 { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
242 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
243 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
244 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
245 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
246 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
247 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
248 #ifdef SUPPORT_PCREGREP_JIT
249 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
250 #else
251 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcregrep does not support JIT" },
252 #endif
253 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
254 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
255 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
256 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
257 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
258 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
259 { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
260 { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
261 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
262 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
263 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
264 { OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" },
265 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
266 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
267 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
268 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
269 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
270 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
271
272 /* These two were accidentally implemented with underscores instead of
273 hyphens in the option names. As this was not discovered for several releases,
274 the incorrect versions are left in the table for compatibility. However, the
275 --help function misses out any option that has an underscore in its name. */
276
277 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
278 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
279
280 #ifdef JFRIEDL_DEBUG
281 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
282 #endif
283 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
284 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
285 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
286 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
287 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
288 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
289 { OP_NODATA, 0, NULL, NULL, NULL }
290 };
291
292 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
293 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
294 that the combination of -w and -x has the same effect as -x on its own, so we
295 can treat them as the same. */
296
297 static const char *prefix[] = {
298 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
299
300 static const char *suffix[] = {
301 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
302
303 /* UTF-8 tables - used only when the newline setting is "any". */
304
305 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
306
307 const char utf8_table4[] = {
308 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
309 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
310 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
311 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
312
313
314
315 /*************************************************
316 * Exit from the program *
317 *************************************************/
318
319 /* If there has been a resource error, give a suitable message.
320
321 Argument: the return code
322 Returns: does not return
323 */
324
325 static void
326 pcregrep_exit(int rc)
327 {
328 if (resource_error)
329 {
330 fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
331 "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
332 PCRE_ERROR_JIT_STACKLIMIT);
333 fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
334 }
335
336 exit(rc);
337 }
338
339
340 /*************************************************
341 * OS-specific functions *
342 *************************************************/
343
344 /* These functions are defined so that they can be made system specific,
345 although at present the only ones are for Unix, Win32, and for "no support". */
346
347
348 /************* Directory scanning in Unix ***********/
349
350 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
351 #include <sys/types.h>
352 #include <sys/stat.h>
353 #include <dirent.h>
354
355 typedef DIR directory_type;
356
357 static int
358 isdirectory(char *filename)
359 {
360 struct stat statbuf;
361 if (stat(filename, &statbuf) < 0)
362 return 0; /* In the expectation that opening as a file will fail */
363 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
364 }
365
366 static directory_type *
367 opendirectory(char *filename)
368 {
369 return opendir(filename);
370 }
371
372 static char *
373 readdirectory(directory_type *dir)
374 {
375 for (;;)
376 {
377 struct dirent *dent = readdir(dir);
378 if (dent == NULL) return NULL;
379 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
380 return dent->d_name;
381 }
382 /* Control never reaches here */
383 }
384
385 static void
386 closedirectory(directory_type *dir)
387 {
388 closedir(dir);
389 }
390
391
392 /************* Test for regular file in Unix **********/
393
394 static int
395 isregfile(char *filename)
396 {
397 struct stat statbuf;
398 if (stat(filename, &statbuf) < 0)
399 return 1; /* In the expectation that opening as a file will fail */
400 return (statbuf.st_mode & S_IFMT) == S_IFREG;
401 }
402
403
404 /************* Test for a terminal in Unix **********/
405
406 static BOOL
407 is_stdout_tty(void)
408 {
409 return isatty(fileno(stdout));
410 }
411
412 static BOOL
413 is_file_tty(FILE *f)
414 {
415 return isatty(fileno(f));
416 }
417
418
419 /************* Directory scanning in Win32 ***********/
420
421 /* I (Philip Hazel) have no means of testing this code. It was contributed by
422 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
423 when it did not exist. David Byron added a patch that moved the #include of
424 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
425 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
426 undefined when it is indeed undefined. */
427
428 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
429
430 #ifndef STRICT
431 # define STRICT
432 #endif
433 #ifndef WIN32_LEAN_AND_MEAN
434 # define WIN32_LEAN_AND_MEAN
435 #endif
436
437 #include <windows.h>
438
439 #ifndef INVALID_FILE_ATTRIBUTES
440 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
441 #endif
442
443 typedef struct directory_type
444 {
445 HANDLE handle;
446 BOOL first;
447 WIN32_FIND_DATA data;
448 } directory_type;
449
450 int
451 isdirectory(char *filename)
452 {
453 DWORD attr = GetFileAttributes(filename);
454 if (attr == INVALID_FILE_ATTRIBUTES)
455 return 0;
456 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
457 }
458
459 directory_type *
460 opendirectory(char *filename)
461 {
462 size_t len;
463 char *pattern;
464 directory_type *dir;
465 DWORD err;
466 len = strlen(filename);
467 pattern = (char *) malloc(len + 3);
468 dir = (directory_type *) malloc(sizeof(*dir));
469 if ((pattern == NULL) || (dir == NULL))
470 {
471 fprintf(stderr, "pcregrep: malloc failed\n");
472 pcregrep_exit(2);
473 }
474 memcpy(pattern, filename, len);
475 memcpy(&(pattern[len]), "\\*", 3);
476 dir->handle = FindFirstFile(pattern, &(dir->data));
477 if (dir->handle != INVALID_HANDLE_VALUE)
478 {
479 free(pattern);
480 dir->first = TRUE;
481 return dir;
482 }
483 err = GetLastError();
484 free(pattern);
485 free(dir);
486 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
487 return NULL;
488 }
489
490 char *
491 readdirectory(directory_type *dir)
492 {
493 for (;;)
494 {
495 if (!dir->first)
496 {
497 if (!FindNextFile(dir->handle, &(dir->data)))
498 return NULL;
499 }
500 else
501 {
502 dir->first = FALSE;
503 }
504 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
505 return dir->data.cFileName;
506 }
507 #ifndef _MSC_VER
508 return NULL; /* Keep compiler happy; never executed */
509 #endif
510 }
511
512 void
513 closedirectory(directory_type *dir)
514 {
515 FindClose(dir->handle);
516 free(dir);
517 }
518
519
520 /************* Test for regular file in Win32 **********/
521
522 /* I don't know how to do this, or if it can be done; assume all paths are
523 regular if they are not directories. */
524
525 int isregfile(char *filename)
526 {
527 return !isdirectory(filename);
528 }
529
530
531 /************* Test for a terminal in Win32 **********/
532
533 /* I don't know how to do this; assume never */
534
535 static BOOL
536 is_stdout_tty(void)
537 {
538 return FALSE;
539 }
540
541 static BOOL
542 is_file_tty(FILE *f)
543 {
544 return FALSE;
545 }
546
547
548 /************* Directory scanning when we can't do it ***********/
549
550 /* The type is void, and apart from isdirectory(), the functions do nothing. */
551
552 #else
553
554 typedef void directory_type;
555
556 int isdirectory(char *filename) { return 0; }
557 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
558 char *readdirectory(directory_type *dir) { return (char*)0;}
559 void closedirectory(directory_type *dir) {}
560
561
562 /************* Test for regular when we can't do it **********/
563
564 /* Assume all files are regular. */
565
566 int isregfile(char *filename) { return 1; }
567
568
569 /************* Test for a terminal when we can't do it **********/
570
571 static BOOL
572 is_stdout_tty(void)
573 {
574 return FALSE;
575 }
576
577 static BOOL
578 is_file_tty(FILE *f)
579 {
580 return FALSE;
581 }
582
583 #endif
584
585
586
587 #ifndef HAVE_STRERROR
588 /*************************************************
589 * Provide strerror() for non-ANSI libraries *
590 *************************************************/
591
592 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
593 in their libraries, but can provide the same facility by this simple
594 alternative function. */
595
596 extern int sys_nerr;
597 extern char *sys_errlist[];
598
599 char *
600 strerror(int n)
601 {
602 if (n < 0 || n >= sys_nerr) return "unknown error number";
603 return sys_errlist[n];
604 }
605 #endif /* HAVE_STRERROR */
606
607
608
609 /*************************************************
610 * Read one line of input *
611 *************************************************/
612
613 /* Normally, input is read using fread() into a large buffer, so many lines may
614 be read at once. However, doing this for tty input means that no output appears
615 until a lot of input has been typed. Instead, tty input is handled line by
616 line. We cannot use fgets() for this, because it does not stop at a binary
617 zero, and therefore there is no way of telling how many characters it has read,
618 because there may be binary zeros embedded in the data.
619
620 Arguments:
621 buffer the buffer to read into
622 length the maximum number of characters to read
623 f the file
624
625 Returns: the number of characters read, zero at end of file
626 */
627
628 static unsigned int
629 read_one_line(char *buffer, int length, FILE *f)
630 {
631 int c;
632 int yield = 0;
633 while ((c = fgetc(f)) != EOF)
634 {
635 buffer[yield++] = c;
636 if (c == '\n' || yield >= length) break;
637 }
638 return yield;
639 }
640
641
642
643 /*************************************************
644 * Find end of line *
645 *************************************************/
646
647 /* The length of the endline sequence that is found is set via lenptr. This may
648 be zero at the very end of the file if there is no line-ending sequence there.
649
650 Arguments:
651 p current position in line
652 endptr end of available data
653 lenptr where to put the length of the eol sequence
654
655 Returns: pointer after the last byte of the line,
656 including the newline byte(s)
657 */
658
659 static char *
660 end_of_line(char *p, char *endptr, int *lenptr)
661 {
662 switch(endlinetype)
663 {
664 default: /* Just in case */
665 case EL_LF:
666 while (p < endptr && *p != '\n') p++;
667 if (p < endptr)
668 {
669 *lenptr = 1;
670 return p + 1;
671 }
672 *lenptr = 0;
673 return endptr;
674
675 case EL_CR:
676 while (p < endptr && *p != '\r') p++;
677 if (p < endptr)
678 {
679 *lenptr = 1;
680 return p + 1;
681 }
682 *lenptr = 0;
683 return endptr;
684
685 case EL_CRLF:
686 for (;;)
687 {
688 while (p < endptr && *p != '\r') p++;
689 if (++p >= endptr)
690 {
691 *lenptr = 0;
692 return endptr;
693 }
694 if (*p == '\n')
695 {
696 *lenptr = 2;
697 return p + 1;
698 }
699 }
700 break;
701
702 case EL_ANYCRLF:
703 while (p < endptr)
704 {
705 int extra = 0;
706 register int c = *((unsigned char *)p);
707
708 if (utf8 && c >= 0xc0)
709 {
710 int gcii, gcss;
711 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
712 gcss = 6*extra;
713 c = (c & utf8_table3[extra]) << gcss;
714 for (gcii = 1; gcii <= extra; gcii++)
715 {
716 gcss -= 6;
717 c |= (p[gcii] & 0x3f) << gcss;
718 }
719 }
720
721 p += 1 + extra;
722
723 switch (c)
724 {
725 case 0x0a: /* LF */
726 *lenptr = 1;
727 return p;
728
729 case 0x0d: /* CR */
730 if (p < endptr && *p == 0x0a)
731 {
732 *lenptr = 2;
733 p++;
734 }
735 else *lenptr = 1;
736 return p;
737
738 default:
739 break;
740 }
741 } /* End of loop for ANYCRLF case */
742
743 *lenptr = 0; /* Must have hit the end */
744 return endptr;
745
746 case EL_ANY:
747 while (p < endptr)
748 {
749 int extra = 0;
750 register int c = *((unsigned char *)p);
751
752 if (utf8 && c >= 0xc0)
753 {
754 int gcii, gcss;
755 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
756 gcss = 6*extra;
757 c = (c & utf8_table3[extra]) << gcss;
758 for (gcii = 1; gcii <= extra; gcii++)
759 {
760 gcss -= 6;
761 c |= (p[gcii] & 0x3f) << gcss;
762 }
763 }
764
765 p += 1 + extra;
766
767 switch (c)
768 {
769 case 0x0a: /* LF */
770 case 0x0b: /* VT */
771 case 0x0c: /* FF */
772 *lenptr = 1;
773 return p;
774
775 case 0x0d: /* CR */
776 if (p < endptr && *p == 0x0a)
777 {
778 *lenptr = 2;
779 p++;
780 }
781 else *lenptr = 1;
782 return p;
783
784 case 0x85: /* NEL */
785 *lenptr = utf8? 2 : 1;
786 return p;
787
788 case 0x2028: /* LS */
789 case 0x2029: /* PS */
790 *lenptr = 3;
791 return p;
792
793 default:
794 break;
795 }
796 } /* End of loop for ANY case */
797
798 *lenptr = 0; /* Must have hit the end */
799 return endptr;
800 } /* End of overall switch */
801 }
802
803
804
805 /*************************************************
806 * Find start of previous line *
807 *************************************************/
808
809 /* This is called when looking back for before lines to print.
810
811 Arguments:
812 p start of the subsequent line
813 startptr start of available data
814
815 Returns: pointer to the start of the previous line
816 */
817
818 static char *
819 previous_line(char *p, char *startptr)
820 {
821 switch(endlinetype)
822 {
823 default: /* Just in case */
824 case EL_LF:
825 p--;
826 while (p > startptr && p[-1] != '\n') p--;
827 return p;
828
829 case EL_CR:
830 p--;
831 while (p > startptr && p[-1] != '\n') p--;
832 return p;
833
834 case EL_CRLF:
835 for (;;)
836 {
837 p -= 2;
838 while (p > startptr && p[-1] != '\n') p--;
839 if (p <= startptr + 1 || p[-2] == '\r') return p;
840 }
841 return p; /* But control should never get here */
842
843 case EL_ANY:
844 case EL_ANYCRLF:
845 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
846 if (utf8) while ((*p & 0xc0) == 0x80) p--;
847
848 while (p > startptr)
849 {
850 register int c;
851 char *pp = p - 1;
852
853 if (utf8)
854 {
855 int extra = 0;
856 while ((*pp & 0xc0) == 0x80) pp--;
857 c = *((unsigned char *)pp);
858 if (c >= 0xc0)
859 {
860 int gcii, gcss;
861 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
862 gcss = 6*extra;
863 c = (c & utf8_table3[extra]) << gcss;
864 for (gcii = 1; gcii <= extra; gcii++)
865 {
866 gcss -= 6;
867 c |= (pp[gcii] & 0x3f) << gcss;
868 }
869 }
870 }
871 else c = *((unsigned char *)pp);
872
873 if (endlinetype == EL_ANYCRLF) switch (c)
874 {
875 case 0x0a: /* LF */
876 case 0x0d: /* CR */
877 return p;
878
879 default:
880 break;
881 }
882
883 else switch (c)
884 {
885 case 0x0a: /* LF */
886 case 0x0b: /* VT */
887 case 0x0c: /* FF */
888 case 0x0d: /* CR */
889 case 0x85: /* NEL */
890 case 0x2028: /* LS */
891 case 0x2029: /* PS */
892 return p;
893
894 default:
895 break;
896 }
897
898 p = pp; /* Back one character */
899 } /* End of loop for ANY case */
900
901 return startptr; /* Hit start of data */
902 } /* End of overall switch */
903 }
904
905
906
907
908
909 /*************************************************
910 * Print the previous "after" lines *
911 *************************************************/
912
913 /* This is called if we are about to lose said lines because of buffer filling,
914 and at the end of the file. The data in the line is written using fwrite() so
915 that a binary zero does not terminate it.
916
917 Arguments:
918 lastmatchnumber the number of the last matching line, plus one
919 lastmatchrestart where we restarted after the last match
920 endptr end of available data
921 printname filename for printing
922
923 Returns: nothing
924 */
925
926 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
927 char *endptr, char *printname)
928 {
929 if (after_context > 0 && lastmatchnumber > 0)
930 {
931 int count = 0;
932 while (lastmatchrestart < endptr && count++ < after_context)
933 {
934 int ellength;
935 char *pp = lastmatchrestart;
936 if (printname != NULL) fprintf(stdout, "%s-", printname);
937 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
938 pp = end_of_line(pp, endptr, &ellength);
939 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
940 lastmatchrestart = pp;
941 }
942 hyphenpending = TRUE;
943 }
944 }
945
946
947
948 /*************************************************
949 * Apply patterns to subject till one matches *
950 *************************************************/
951
952 /* This function is called to run through all patterns, looking for a match. It
953 is used multiple times for the same subject when colouring is enabled, in order
954 to find all possible matches.
955
956 Arguments:
957 matchptr the start of the subject
958 length the length of the subject to match
959 startoffset where to start matching
960 offsets the offets vector to fill in
961 mrc address of where to put the result of pcre_exec()
962
963 Returns: TRUE if there was a match
964 FALSE if there was no match
965 invert if there was a non-fatal error
966 */
967
968 static BOOL
969 match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
970 int *mrc)
971 {
972 int i;
973 size_t slen = length;
974 const char *msg = "this text:\n\n";
975 if (slen > 200)
976 {
977 slen = 200;
978 msg = "text that starts:\n\n";
979 }
980 for (i = 0; i < pattern_count; i++)
981 {
982 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
983 startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
984 if (*mrc >= 0) return TRUE;
985 if (*mrc == PCRE_ERROR_NOMATCH) continue;
986 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
987 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
988 fprintf(stderr, "%s", msg);
989 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
990 fprintf(stderr, "\n\n");
991 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
992 *mrc == PCRE_ERROR_JIT_STACKLIMIT)
993 resource_error = TRUE;
994 if (error_count++ > 20)
995 {
996 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
997 pcregrep_exit(2);
998 }
999 return invert; /* No more matching; don't show the line again */
1000 }
1001
1002 return FALSE; /* No match, no errors */
1003 }
1004
1005
1006
1007 /*************************************************
1008 * Grep an individual file *
1009 *************************************************/
1010
1011 /* This is called from grep_or_recurse() below. It uses a buffer that is three
1012 times the value of bufthird. The matching point is never allowed to stray into
1013 the top third of the buffer, thus keeping more of the file available for
1014 context printing or for multiline scanning. For large files, the pointer will
1015 be in the middle third most of the time, so the bottom third is available for
1016 "before" context printing.
1017
1018 Arguments:
1019 handle the fopened FILE stream for a normal file
1020 the gzFile pointer when reading is via libz
1021 the BZFILE pointer when reading is via libbz2
1022 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1023 filename the file name or NULL (for errors)
1024 printname the file name if it is to be printed for each match
1025 or NULL if the file name is not to be printed
1026 it cannot be NULL if filenames[_nomatch]_only is set
1027
1028 Returns: 0 if there was at least one match
1029 1 otherwise (no matches)
1030 2 if an overlong line is encountered
1031 3 if there is a read error on a .bz2 file
1032 */
1033
1034 static int
1035 pcregrep(void *handle, int frtype, char *filename, char *printname)
1036 {
1037 int rc = 1;
1038 int linenumber = 1;
1039 int lastmatchnumber = 0;
1040 int count = 0;
1041 int filepos = 0;
1042 int offsets[OFFSET_SIZE];
1043 char *lastmatchrestart = NULL;
1044 char *ptr = main_buffer;
1045 char *endptr;
1046 size_t bufflength;
1047 BOOL endhyphenpending = FALSE;
1048 BOOL input_line_buffered = line_buffered;
1049 FILE *in = NULL; /* Ensure initialized */
1050
1051 #ifdef SUPPORT_LIBZ
1052 gzFile ingz = NULL;
1053 #endif
1054
1055 #ifdef SUPPORT_LIBBZ2
1056 BZFILE *inbz2 = NULL;
1057 #endif
1058
1059
1060 /* Do the first read into the start of the buffer and set up the pointer to end
1061 of what we have. In the case of libz, a non-zipped .gz file will be read as a
1062 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1063 fail. */
1064
1065 #ifdef SUPPORT_LIBZ
1066 if (frtype == FR_LIBZ)
1067 {
1068 ingz = (gzFile)handle;
1069 bufflength = gzread (ingz, main_buffer, bufsize);
1070 }
1071 else
1072 #endif
1073
1074 #ifdef SUPPORT_LIBBZ2
1075 if (frtype == FR_LIBBZ2)
1076 {
1077 inbz2 = (BZFILE *)handle;
1078 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1079 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1080 } /* without the cast it is unsigned. */
1081 else
1082 #endif
1083
1084 {
1085 in = (FILE *)handle;
1086 if (is_file_tty(in)) input_line_buffered = TRUE;
1087 bufflength = input_line_buffered?
1088 read_one_line(main_buffer, bufsize, in) :
1089 fread(main_buffer, 1, bufsize, in);
1090 }
1091
1092 endptr = main_buffer + bufflength;
1093
1094 /* Loop while the current pointer is not at the end of the file. For large
1095 files, endptr will be at the end of the buffer when we are in the middle of the
1096 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1097 way, the buffer is shifted left and re-filled. */
1098
1099 while (ptr < endptr)
1100 {
1101 int endlinelength;
1102 int mrc = 0;
1103 int startoffset = 0;
1104 BOOL match;
1105 char *matchptr = ptr;
1106 char *t = ptr;
1107 size_t length, linelength;
1108
1109 /* At this point, ptr is at the start of a line. We need to find the length
1110 of the subject string to pass to pcre_exec(). In multiline mode, it is the
1111 length remainder of the data in the buffer. Otherwise, it is the length of
1112 the next line, excluding the terminating newline. After matching, we always
1113 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1114 option is used for compiling, so that any match is constrained to be in the
1115 first line. */
1116
1117 t = end_of_line(t, endptr, &endlinelength);
1118 linelength = t - ptr - endlinelength;
1119 length = multiline? (size_t)(endptr - ptr) : linelength;
1120
1121 /* Check to see if the line we are looking at extends right to the very end
1122 of the buffer without a line terminator. This means the line is too long to
1123 handle. */
1124
1125 if (endlinelength == 0 && t == main_buffer + bufsize)
1126 {
1127 fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1128 "pcregrep: check the --buffer-size option\n",
1129 linenumber,
1130 (filename == NULL)? "" : " of file ",
1131 (filename == NULL)? "" : filename);
1132 return 2;
1133 }
1134
1135 /* Extra processing for Jeffrey Friedl's debugging. */
1136
1137 #ifdef JFRIEDL_DEBUG
1138 if (jfriedl_XT || jfriedl_XR)
1139 {
1140 #include <sys/time.h>
1141 #include <time.h>
1142 struct timeval start_time, end_time;
1143 struct timezone dummy;
1144 int i;
1145
1146 if (jfriedl_XT)
1147 {
1148 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1149 const char *orig = ptr;
1150 ptr = malloc(newlen + 1);
1151 if (!ptr) {
1152 printf("out of memory");
1153 pcregrep_exit(2);
1154 }
1155 endptr = ptr;
1156 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1157 for (i = 0; i < jfriedl_XT; i++) {
1158 strncpy(endptr, orig, length);
1159 endptr += length;
1160 }
1161 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1162 length = newlen;
1163 }
1164
1165 if (gettimeofday(&start_time, &dummy) != 0)
1166 perror("bad gettimeofday");
1167
1168
1169 for (i = 0; i < jfriedl_XR; i++)
1170 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1171 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1172
1173 if (gettimeofday(&end_time, &dummy) != 0)
1174 perror("bad gettimeofday");
1175
1176 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1177 -
1178 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1179
1180 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1181 return 0;
1182 }
1183 #endif
1184
1185 /* We come back here after a match when the -o option (only_matching) is set,
1186 in order to find any further matches in the same line. */
1187
1188 ONLY_MATCHING_RESTART:
1189
1190 /* Run through all the patterns until one matches or there is an error other
1191 than NOMATCH. This code is in a subroutine so that it can be re-used for
1192 finding subsequent matches when colouring matched lines. */
1193
1194 match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1195
1196 /* If it's a match or a not-match (as required), do what's wanted. */
1197
1198 if (match != invert)
1199 {
1200 BOOL hyphenprinted = FALSE;
1201
1202 /* We've failed if we want a file that doesn't have any matches. */
1203
1204 if (filenames == FN_NOMATCH_ONLY) return 1;
1205
1206 /* Just count if just counting is wanted. */
1207
1208 if (count_only) count++;
1209
1210 /* If all we want is a file name, there is no need to scan any more lines
1211 in the file. */
1212
1213 else if (filenames == FN_MATCH_ONLY)
1214 {
1215 fprintf(stdout, "%s\n", printname);
1216 return 0;
1217 }
1218
1219 /* Likewise, if all we want is a yes/no answer. */
1220
1221 else if (quiet) return 0;
1222
1223 /* The --only-matching option prints just the substring that matched, or a
1224 captured portion of it, as long as this string is not empty, and the
1225 --file-offsets and --line-offsets options output offsets for the matching
1226 substring (they both force --only-matching = 0). None of these options
1227 prints any context. Afterwards, adjust the start and then jump back to look
1228 for further matches in the same line. If we are in invert mode, however,
1229 nothing is printed and we do not restart - this could still be useful
1230 because the return code is set. */
1231
1232 else if (only_matching >= 0)
1233 {
1234 if (!invert)
1235 {
1236 if (printname != NULL) fprintf(stdout, "%s:", printname);
1237 if (number) fprintf(stdout, "%d:", linenumber);
1238 if (line_offsets)
1239 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1240 offsets[1] - offsets[0]);
1241 else if (file_offsets)
1242 fprintf(stdout, "%d,%d\n",
1243 (int)(filepos + matchptr + offsets[0] - ptr),
1244 offsets[1] - offsets[0]);
1245 else if (only_matching < mrc)
1246 {
1247 int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1248 if (plen > 0)
1249 {
1250 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1251 FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1252 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1253 fprintf(stdout, "\n");
1254 }
1255 }
1256 else if (printname != NULL || number) fprintf(stdout, "\n");
1257 match = FALSE;
1258 if (line_buffered) fflush(stdout);
1259 rc = 0; /* Had some success */
1260 startoffset = offsets[1]; /* Restart after the match */
1261 goto ONLY_MATCHING_RESTART;
1262 }
1263 }
1264
1265 /* This is the default case when none of the above options is set. We print
1266 the matching lines(s), possibly preceded and/or followed by other lines of
1267 context. */
1268
1269 else
1270 {
1271 /* See if there is a requirement to print some "after" lines from a
1272 previous match. We never print any overlaps. */
1273
1274 if (after_context > 0 && lastmatchnumber > 0)
1275 {
1276 int ellength;
1277 int linecount = 0;
1278 char *p = lastmatchrestart;
1279
1280 while (p < ptr && linecount < after_context)
1281 {
1282 p = end_of_line(p, ptr, &ellength);
1283 linecount++;
1284 }
1285
1286 /* It is important to advance lastmatchrestart during this printing so
1287 that it interacts correctly with any "before" printing below. Print
1288 each line's data using fwrite() in case there are binary zeroes. */
1289
1290 while (lastmatchrestart < p)
1291 {
1292 char *pp = lastmatchrestart;
1293 if (printname != NULL) fprintf(stdout, "%s-", printname);
1294 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1295 pp = end_of_line(pp, endptr, &ellength);
1296 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1297 lastmatchrestart = pp;
1298 }
1299 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1300 }
1301
1302 /* If there were non-contiguous lines printed above, insert hyphens. */
1303
1304 if (hyphenpending)
1305 {
1306 fprintf(stdout, "--\n");
1307 hyphenpending = FALSE;
1308 hyphenprinted = TRUE;
1309 }
1310
1311 /* See if there is a requirement to print some "before" lines for this
1312 match. Again, don't print overlaps. */
1313
1314 if (before_context > 0)
1315 {
1316 int linecount = 0;
1317 char *p = ptr;
1318
1319 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1320 linecount < before_context)
1321 {
1322 linecount++;
1323 p = previous_line(p, main_buffer);
1324 }
1325
1326 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1327 fprintf(stdout, "--\n");
1328
1329 while (p < ptr)
1330 {
1331 int ellength;
1332 char *pp = p;
1333 if (printname != NULL) fprintf(stdout, "%s-", printname);
1334 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1335 pp = end_of_line(pp, endptr, &ellength);
1336 FWRITE(p, 1, pp - p, stdout);
1337 p = pp;
1338 }
1339 }
1340
1341 /* Now print the matching line(s); ensure we set hyphenpending at the end
1342 of the file if any context lines are being output. */
1343
1344 if (after_context > 0 || before_context > 0)
1345 endhyphenpending = TRUE;
1346
1347 if (printname != NULL) fprintf(stdout, "%s:", printname);
1348 if (number) fprintf(stdout, "%d:", linenumber);
1349
1350 /* In multiline mode, we want to print to the end of the line in which
1351 the end of the matched string is found, so we adjust linelength and the
1352 line number appropriately, but only when there actually was a match
1353 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1354 the match will always be before the first newline sequence. */
1355
1356 if (multiline & !invert)
1357 {
1358 char *endmatch = ptr + offsets[1];
1359 t = ptr;
1360 while (t < endmatch)
1361 {
1362 t = end_of_line(t, endptr, &endlinelength);
1363 if (t < endmatch) linenumber++; else break;
1364 }
1365 linelength = t - ptr - endlinelength;
1366 }
1367
1368 /*** NOTE: Use only fwrite() to output the data line, so that binary
1369 zeroes are treated as just another data character. */
1370
1371 /* This extra option, for Jeffrey Friedl's debugging requirements,
1372 replaces the matched string, or a specific captured string if it exists,
1373 with X. When this happens, colouring is ignored. */
1374
1375 #ifdef JFRIEDL_DEBUG
1376 if (S_arg >= 0 && S_arg < mrc)
1377 {
1378 int first = S_arg * 2;
1379 int last = first + 1;
1380 FWRITE(ptr, 1, offsets[first], stdout);
1381 fprintf(stdout, "X");
1382 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1383 }
1384 else
1385 #endif
1386
1387 /* We have to split the line(s) up if colouring, and search for further
1388 matches, but not of course if the line is a non-match. */
1389
1390 if (do_colour && !invert)
1391 {
1392 int plength;
1393 FWRITE(ptr, 1, offsets[0], stdout);
1394 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1395 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1396 fprintf(stdout, "%c[00m", 0x1b);
1397 for (;;)
1398 {
1399 startoffset = offsets[1];
1400 if (startoffset >= (int)linelength + endlinelength ||
1401 !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1402 break;
1403 FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1404 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1405 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1406 fprintf(stdout, "%c[00m", 0x1b);
1407 }
1408
1409 /* In multiline mode, we may have already printed the complete line
1410 and its line-ending characters (if they matched the pattern), so there
1411 may be no more to print. */
1412
1413 plength = (int)((linelength + endlinelength) - startoffset);
1414 if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1415 }
1416
1417 /* Not colouring; no need to search for further matches */
1418
1419 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1420 }
1421
1422 /* End of doing what has to be done for a match. If --line-buffered was
1423 given, flush the output. */
1424
1425 if (line_buffered) fflush(stdout);
1426 rc = 0; /* Had some success */
1427
1428 /* Remember where the last match happened for after_context. We remember
1429 where we are about to restart, and that line's number. */
1430
1431 lastmatchrestart = ptr + linelength + endlinelength;
1432 lastmatchnumber = linenumber + 1;
1433 }
1434
1435 /* For a match in multiline inverted mode (which of course did not cause
1436 anything to be printed), we have to move on to the end of the match before
1437 proceeding. */
1438
1439 if (multiline && invert && match)
1440 {
1441 int ellength;
1442 char *endmatch = ptr + offsets[1];
1443 t = ptr;
1444 while (t < endmatch)
1445 {
1446 t = end_of_line(t, endptr, &ellength);
1447 if (t <= endmatch) linenumber++; else break;
1448 }
1449 endmatch = end_of_line(endmatch, endptr, &ellength);
1450 linelength = endmatch - ptr - ellength;
1451 }
1452
1453 /* Advance to after the newline and increment the line number. The file
1454 offset to the current line is maintained in filepos. */
1455
1456 ptr += linelength + endlinelength;
1457 filepos += (int)(linelength + endlinelength);
1458 linenumber++;
1459
1460 /* If input is line buffered, and the buffer is not yet full, read another
1461 line and add it into the buffer. */
1462
1463 if (input_line_buffered && bufflength < (size_t)bufsize)
1464 {
1465 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1466 bufflength += add;
1467 endptr += add;
1468 }
1469
1470 /* If we haven't yet reached the end of the file (the buffer is full), and
1471 the current point is in the top 1/3 of the buffer, slide the buffer down by
1472 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1473 about to be lost, print them. */
1474
1475 if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1476 {
1477 if (after_context > 0 &&
1478 lastmatchnumber > 0 &&
1479 lastmatchrestart < main_buffer + bufthird)
1480 {
1481 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1482 lastmatchnumber = 0;
1483 }
1484
1485 /* Now do the shuffle */
1486
1487 memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1488 ptr -= bufthird;
1489
1490 #ifdef SUPPORT_LIBZ
1491 if (frtype == FR_LIBZ)
1492 bufflength = 2*bufthird +
1493 gzread (ingz, main_buffer + 2*bufthird, bufthird);
1494 else
1495 #endif
1496
1497 #ifdef SUPPORT_LIBBZ2
1498 if (frtype == FR_LIBBZ2)
1499 bufflength = 2*bufthird +
1500 BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1501 else
1502 #endif
1503
1504 bufflength = 2*bufthird +
1505 (input_line_buffered?
1506 read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1507 fread(main_buffer + 2*bufthird, 1, bufthird, in));
1508 endptr = main_buffer + bufflength;
1509
1510 /* Adjust any last match point */
1511
1512 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1513 }
1514 } /* Loop through the whole file */
1515
1516 /* End of file; print final "after" lines if wanted; do_after_lines sets
1517 hyphenpending if it prints something. */
1518
1519 if (only_matching < 0 && !count_only)
1520 {
1521 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1522 hyphenpending |= endhyphenpending;
1523 }
1524
1525 /* Print the file name if we are looking for those without matches and there
1526 were none. If we found a match, we won't have got this far. */
1527
1528 if (filenames == FN_NOMATCH_ONLY)
1529 {
1530 fprintf(stdout, "%s\n", printname);
1531 return 0;
1532 }
1533
1534 /* Print the match count if wanted */
1535
1536 if (count_only)
1537 {
1538 if (count > 0 || !omit_zero_count)
1539 {
1540 if (printname != NULL && filenames != FN_NONE)
1541 fprintf(stdout, "%s:", printname);
1542 fprintf(stdout, "%d\n", count);
1543 }
1544 }
1545
1546 return rc;
1547 }
1548
1549
1550
1551 /*************************************************
1552 * Grep a file or recurse into a directory *
1553 *************************************************/
1554
1555 /* Given a path name, if it's a directory, scan all the files if we are
1556 recursing; if it's a file, grep it.
1557
1558 Arguments:
1559 pathname the path to investigate
1560 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1561 only_one_at_top TRUE if the path is the only one at toplevel
1562
1563 Returns: 0 if there was at least one match
1564 1 if there were no matches
1565 2 there was some kind of error
1566
1567 However, file opening failures are suppressed if "silent" is set.
1568 */
1569
1570 static int
1571 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1572 {
1573 int rc = 1;
1574 int sep;
1575 int frtype;
1576 void *handle;
1577 FILE *in = NULL; /* Ensure initialized */
1578
1579 #ifdef SUPPORT_LIBZ
1580 gzFile ingz = NULL;
1581 #endif
1582
1583 #ifdef SUPPORT_LIBBZ2
1584 BZFILE *inbz2 = NULL;
1585 #endif
1586
1587 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBZ2
1588 int pathlen;
1589 #endif
1590
1591 /* If the file name is "-" we scan stdin */
1592
1593 if (strcmp(pathname, "-") == 0)
1594 {
1595 return pcregrep(stdin, FR_PLAIN, stdin_name,
1596 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1597 stdin_name : NULL);
1598 }
1599
1600 /* If the file is a directory, skip if skipping or if we are recursing, scan
1601 each file and directory within it, subject to any include or exclude patterns
1602 that were set. The scanning code is localized so it can be made
1603 system-specific. */
1604
1605 if ((sep = isdirectory(pathname)) != 0)
1606 {
1607 if (dee_action == dee_SKIP) return 1;
1608 if (dee_action == dee_RECURSE)
1609 {
1610 char buffer[1024];
1611 char *nextfile;
1612 directory_type *dir = opendirectory(pathname);
1613
1614 if (dir == NULL)
1615 {
1616 if (!silent)
1617 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1618 strerror(errno));
1619 return 2;
1620 }
1621
1622 while ((nextfile = readdirectory(dir)) != NULL)
1623 {
1624 int frc, nflen;
1625 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1626 nflen = (int)(strlen(nextfile));
1627
1628 if (isdirectory(buffer))
1629 {
1630 if (exclude_dir_compiled != NULL &&
1631 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1632 continue;
1633
1634 if (include_dir_compiled != NULL &&
1635 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1636 continue;
1637 }
1638 else
1639 {
1640 if (exclude_compiled != NULL &&
1641 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1642 continue;
1643
1644 if (include_compiled != NULL &&
1645 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1646 continue;
1647 }
1648
1649 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1650 if (frc > 1) rc = frc;
1651 else if (frc == 0 && rc == 1) rc = 0;
1652 }
1653
1654 closedirectory(dir);
1655 return rc;
1656 }
1657 }
1658
1659 /* If the file is not a directory and not a regular file, skip it if that's
1660 been requested. */
1661
1662 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1663
1664 /* Control reaches here if we have a regular file, or if we have a directory
1665 and recursion or skipping was not requested, or if we have anything else and
1666 skipping was not requested. The scan proceeds. If this is the first and only
1667 argument at top level, we don't show the file name, unless we are only showing
1668 the file name, or the filename was forced (-H). */
1669
1670 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBZ2
1671 pathlen = (int)(strlen(pathname));
1672 #endif
1673
1674 /* Open using zlib if it is supported and the file name ends with .gz. */
1675
1676 #ifdef SUPPORT_LIBZ
1677 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1678 {
1679 ingz = gzopen(pathname, "rb");
1680 if (ingz == NULL)
1681 {
1682 if (!silent)
1683 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1684 strerror(errno));
1685 return 2;
1686 }
1687 handle = (void *)ingz;
1688 frtype = FR_LIBZ;
1689 }
1690 else
1691 #endif
1692
1693 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1694
1695 #ifdef SUPPORT_LIBBZ2
1696 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1697 {
1698 inbz2 = BZ2_bzopen(pathname, "rb");
1699 handle = (void *)inbz2;
1700 frtype = FR_LIBBZ2;
1701 }
1702 else
1703 #endif
1704
1705 /* Otherwise use plain fopen(). The label is so that we can come back here if
1706 an attempt to read a .bz2 file indicates that it really is a plain file. */
1707
1708 #ifdef SUPPORT_LIBBZ2
1709 PLAIN_FILE:
1710 #endif
1711 {
1712 in = fopen(pathname, "rb");
1713 handle = (void *)in;
1714 frtype = FR_PLAIN;
1715 }
1716
1717 /* All the opening methods return errno when they fail. */
1718
1719 if (handle == NULL)
1720 {
1721 if (!silent)
1722 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1723 strerror(errno));
1724 return 2;
1725 }
1726
1727 /* Now grep the file */
1728
1729 rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1730 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1731
1732 /* Close in an appropriate manner. */
1733
1734 #ifdef SUPPORT_LIBZ
1735 if (frtype == FR_LIBZ)
1736 gzclose(ingz);
1737 else
1738 #endif
1739
1740 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1741 read failed. If the error indicates that the file isn't in fact bzipped, try
1742 again as a normal file. */
1743
1744 #ifdef SUPPORT_LIBBZ2
1745 if (frtype == FR_LIBBZ2)
1746 {
1747 if (rc == 3)
1748 {
1749 int errnum;
1750 const char *err = BZ2_bzerror(inbz2, &errnum);
1751 if (errnum == BZ_DATA_ERROR_MAGIC)
1752 {
1753 BZ2_bzclose(inbz2);
1754 goto PLAIN_FILE;
1755 }
1756 else if (!silent)
1757 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1758 pathname, err);
1759 rc = 2; /* The normal "something went wrong" code */
1760 }
1761 BZ2_bzclose(inbz2);
1762 }
1763 else
1764 #endif
1765
1766 /* Normal file close */
1767
1768 fclose(in);
1769
1770 /* Pass back the yield from pcregrep(). */
1771
1772 return rc;
1773 }
1774
1775
1776
1777
1778 /*************************************************
1779 * Usage function *
1780 *************************************************/
1781
1782 static int
1783 usage(int rc)
1784 {
1785 option_item *op;
1786 fprintf(stderr, "Usage: pcregrep [-");
1787 for (op = optionlist; op->one_char != 0; op++)
1788 {
1789 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1790 }
1791 fprintf(stderr, "] [long options] [pattern] [files]\n");
1792 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1793 "options.\n");
1794 return rc;
1795 }
1796
1797
1798
1799
1800 /*************************************************
1801 * Help function *
1802 *************************************************/
1803
1804 static void
1805 help(void)
1806 {
1807 option_item *op;
1808
1809 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1810 printf("Search for PATTERN in each FILE or standard input.\n");
1811 printf("PATTERN must be present if neither -e nor -f is used.\n");
1812 printf("\"-\" can be used as a file name to mean STDIN.\n");
1813
1814 #ifdef SUPPORT_LIBZ
1815 printf("Files whose names end in .gz are read using zlib.\n");
1816 #endif
1817
1818 #ifdef SUPPORT_LIBBZ2
1819 printf("Files whose names end in .bz2 are read using bzlib2.\n");
1820 #endif
1821
1822 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1823 printf("Other files and the standard input are read as plain files.\n\n");
1824 #else
1825 printf("All files are read as plain files, without any interpretation.\n\n");
1826 #endif
1827
1828 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1829 printf("Options:\n");
1830
1831 for (op = optionlist; op->one_char != 0; op++)
1832 {
1833 int n;
1834 char s[4];
1835
1836 /* Two options were accidentally implemented and documented with underscores
1837 instead of hyphens in their names, something that was not noticed for quite a
1838 few releases. When fixing this, I left the underscored versions in the list
1839 in case people were using them. However, we don't want to display them in the
1840 help data. There are no other options that contain underscores, and we do not
1841 expect ever to implement such options. Therefore, just omit any option that
1842 contains an underscore. */
1843
1844 if (strchr(op->long_name, '_') != NULL) continue;
1845
1846 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1847 n = 31 - printf(" %s --%s", s, op->long_name);
1848 if (n < 1) n = 1;
1849 printf("%.*s%s\n", n, " ", op->help_text);
1850 }
1851
1852 printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
1853 printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
1854 printf("When reading patterns from a file instead of using a command line option,\n");
1855 printf("trailing white space is removed and blank lines are ignored.\n");
1856 printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",
1857 MAX_PATTERN_COUNT, PATBUFSIZE);
1858
1859 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1860 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1861 }
1862
1863
1864
1865
1866 /*************************************************
1867 * Handle a single-letter, no data option *
1868 *************************************************/
1869
1870 static int
1871 handle_option(int letter, int options)
1872 {
1873 switch(letter)
1874 {
1875 case N_FOFFSETS: file_offsets = TRUE; break;
1876 case N_HELP: help(); pcregrep_exit(0);
1877 case N_LBUFFER: line_buffered = TRUE; break;
1878 case N_LOFFSETS: line_offsets = number = TRUE; break;
1879 case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
1880 case 'c': count_only = TRUE; break;
1881 case 'F': process_options |= PO_FIXED_STRINGS; break;
1882 case 'H': filenames = FN_FORCE; break;
1883 case 'h': filenames = FN_NONE; break;
1884 case 'i': options |= PCRE_CASELESS; break;
1885 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1886 case 'L': filenames = FN_NOMATCH_ONLY; break;
1887 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1888 case 'n': number = TRUE; break;
1889 case 'o': only_matching = 0; break;
1890 case 'q': quiet = TRUE; break;
1891 case 'r': dee_action = dee_RECURSE; break;
1892 case 's': silent = TRUE; break;
1893 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1894 case 'v': invert = TRUE; break;
1895 case 'w': process_options |= PO_WORD_MATCH; break;
1896 case 'x': process_options |= PO_LINE_MATCH; break;
1897
1898 case 'V':
1899 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1900 pcregrep_exit(0);
1901 break;
1902
1903 default:
1904 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1905 pcregrep_exit(usage(2));
1906 }
1907
1908 return options;
1909 }
1910
1911
1912
1913
1914 /*************************************************
1915 * Construct printed ordinal *
1916 *************************************************/
1917
1918 /* This turns a number into "1st", "3rd", etc. */
1919
1920 static char *
1921 ordin(int n)
1922 {
1923 static char buffer[8];
1924 char *p = buffer;
1925 sprintf(p, "%d", n);
1926 while (*p != 0) p++;
1927 switch (n%10)
1928 {
1929 case 1: strcpy(p, "st"); break;
1930 case 2: strcpy(p, "nd"); break;
1931 case 3: strcpy(p, "rd"); break;
1932 default: strcpy(p, "th"); break;
1933 }
1934 return buffer;
1935 }
1936
1937
1938
1939 /*************************************************
1940 * Compile a single pattern *
1941 *************************************************/
1942
1943 /* When the -F option has been used, this is called for each substring.
1944 Otherwise it's called for each supplied pattern.
1945
1946 Arguments:
1947 pattern the pattern string
1948 options the PCRE options
1949 filename the file name, or NULL for a command-line pattern
1950 count 0 if this is the only command line pattern, or
1951 number of the command line pattern, or
1952 linenumber for a pattern from a file
1953
1954 Returns: TRUE on success, FALSE after an error
1955 */
1956
1957 static BOOL
1958 compile_single_pattern(char *pattern, int options, char *filename, int count)
1959 {
1960 char buffer[PATBUFSIZE];
1961 const char *error;
1962 int errptr;
1963
1964 if (pattern_count >= MAX_PATTERN_COUNT)
1965 {
1966 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1967 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1968 return FALSE;
1969 }
1970
1971 sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,
1972 suffix[process_options]);
1973 pattern_list[pattern_count] =
1974 pcre_compile(buffer, options, &error, &errptr, pcretables);
1975 if (pattern_list[pattern_count] != NULL)
1976 {
1977 pattern_count++;
1978 return TRUE;
1979 }
1980
1981 /* Handle compile errors */
1982
1983 errptr -= (int)strlen(prefix[process_options]);
1984 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1985
1986 if (filename == NULL)
1987 {
1988 if (count == 0)
1989 fprintf(stderr, "pcregrep: Error in command-line regex "
1990 "at offset %d: %s\n", errptr, error);
1991 else
1992 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1993 "at offset %d: %s\n", ordin(count), errptr, error);
1994 }
1995 else
1996 {
1997 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1998 "at offset %d: %s\n", count, filename, errptr, error);
1999 }
2000
2001 return FALSE;
2002 }
2003
2004
2005
2006 /*************************************************
2007 * Compile one supplied pattern *
2008 *************************************************/
2009
2010 /* When the -F option has been used, each string may be a list of strings,
2011 separated by line breaks. They will be matched literally.
2012
2013 Arguments:
2014 pattern the pattern string
2015 options the PCRE options
2016 filename the file name, or NULL for a command-line pattern
2017 count 0 if this is the only command line pattern, or
2018 number of the command line pattern, or
2019 linenumber for a pattern from a file
2020
2021 Returns: TRUE on success, FALSE after an error
2022 */
2023
2024 static BOOL
2025 compile_pattern(char *pattern, int options, char *filename, int count)
2026 {
2027 if ((process_options & PO_FIXED_STRINGS) != 0)
2028 {
2029 char *eop = pattern + strlen(pattern);
2030 char buffer[PATBUFSIZE];
2031 for(;;)
2032 {
2033 int ellength;
2034 char *p = end_of_line(pattern, eop, &ellength);
2035 if (ellength == 0)
2036 return compile_single_pattern(pattern, options, filename, count);
2037 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
2038 pattern = p;
2039 if (!compile_single_pattern(buffer, options, filename, count))
2040 return FALSE;
2041 }
2042 }
2043 else return compile_single_pattern(pattern, options, filename, count);
2044 }
2045
2046
2047
2048 /*************************************************
2049 * Main program *
2050 *************************************************/
2051
2052 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2053
2054 int
2055 main(int argc, char **argv)
2056 {
2057 int i, j;
2058 int rc = 1;
2059 int pcre_options = 0;
2060 int cmd_pattern_count = 0;
2061 int hint_count = 0;
2062 int errptr;
2063 BOOL only_one_at_top;
2064 char *patterns[MAX_PATTERN_COUNT];
2065 const char *locale_from = "--locale";
2066 const char *error;
2067
2068 #ifdef SUPPORT_PCREGREP_JIT
2069 pcre_jit_stack *jit_stack = NULL;
2070 #endif
2071
2072 /* Set the default line ending value from the default in the PCRE library;
2073 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2074 Note that the return values from pcre_config(), though derived from the ASCII
2075 codes, are the same in EBCDIC environments, so we must use the actual values
2076 rather than escapes such as as '\r'. */
2077
2078 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2079 switch(i)
2080 {
2081 default: newline = (char *)"lf"; break;
2082 case 13: newline = (char *)"cr"; break;
2083 case (13 << 8) | 10: newline = (char *)"crlf"; break;
2084 case -1: newline = (char *)"any"; break;
2085 case -2: newline = (char *)"anycrlf"; break;
2086 }
2087
2088 /* Process the options */
2089
2090 for (i = 1; i < argc; i++)
2091 {
2092 option_item *op = NULL;
2093 char *option_data = (char *)""; /* default to keep compiler happy */
2094 BOOL longop;
2095 BOOL longopwasequals = FALSE;
2096
2097 if (argv[i][0] != '-') break;
2098
2099 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2100 but only if we have previously had -e or -f to define the patterns. */
2101
2102 if (argv[i][1] == 0)
2103 {
2104 if (pattern_filename != NULL || pattern_count > 0) break;
2105 else pcregrep_exit(usage(2));
2106 }
2107
2108 /* Handle a long name option, or -- to terminate the options */
2109
2110 if (argv[i][1] == '-')
2111 {
2112 char *arg = argv[i] + 2;
2113 char *argequals = strchr(arg, '=');
2114
2115 if (*arg == 0) /* -- terminates options */
2116 {
2117 i++;
2118 break; /* out of the options-handling loop */
2119 }
2120
2121 longop = TRUE;
2122
2123 /* Some long options have data that follows after =, for example file=name.
2124 Some options have variations in the long name spelling: specifically, we
2125 allow "regexp" because GNU grep allows it, though I personally go along
2126 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2127 These options are entered in the table as "regex(p)". Options can be in
2128 both these categories. */
2129
2130 for (op = optionlist; op->one_char != 0; op++)
2131 {
2132 char *opbra = strchr(op->long_name, '(');
2133 char *equals = strchr(op->long_name, '=');
2134
2135 /* Handle options with only one spelling of the name */
2136
2137 if (opbra == NULL) /* Does not contain '(' */
2138 {
2139 if (equals == NULL) /* Not thing=data case */
2140 {
2141 if (strcmp(arg, op->long_name) == 0) break;
2142 }
2143 else /* Special case xxx=data */
2144 {
2145 int oplen = (int)(equals - op->long_name);
2146 int arglen = (argequals == NULL)?
2147 (int)strlen(arg) : (int)(argequals - arg);
2148 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2149 {
2150 option_data = arg + arglen;
2151 if (*option_data == '=')
2152 {
2153 option_data++;
2154 longopwasequals = TRUE;
2155 }
2156 break;
2157 }
2158 }
2159 }
2160
2161 /* Handle options with an alternate spelling of the name */
2162
2163 else
2164 {
2165 char buff1[24];
2166 char buff2[24];
2167
2168 int baselen = (int)(opbra - op->long_name);
2169 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2170 int arglen = (argequals == NULL || equals == NULL)?
2171 (int)strlen(arg) : (int)(argequals - arg);
2172
2173 sprintf(buff1, "%.*s", baselen, op->long_name);
2174 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2175
2176 if (strncmp(arg, buff1, arglen) == 0 ||
2177 strncmp(arg, buff2, arglen) == 0)
2178 {
2179 if (equals != NULL && argequals != NULL)
2180 {
2181 option_data = argequals;
2182 if (*option_data == '=')
2183 {
2184 option_data++;
2185 longopwasequals = TRUE;
2186 }
2187 }
2188 break;
2189 }
2190 }
2191 }
2192
2193 if (op->one_char == 0)
2194 {
2195 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2196 pcregrep_exit(usage(2));
2197 }
2198 }
2199
2200 /* Jeffrey Friedl's debugging harness uses these additional options which
2201 are not in the right form for putting in the option table because they use
2202 only one hyphen, yet are more than one character long. By putting them
2203 separately here, they will not get displayed as part of the help() output,
2204 but I don't think Jeffrey will care about that. */
2205
2206 #ifdef JFRIEDL_DEBUG
2207 else if (strcmp(argv[i], "-pre") == 0) {
2208 jfriedl_prefix = argv[++i];
2209 continue;
2210 } else if (strcmp(argv[i], "-post") == 0) {
2211 jfriedl_postfix = argv[++i];
2212 continue;
2213 } else if (strcmp(argv[i], "-XT") == 0) {
2214 sscanf(argv[++i], "%d", &jfriedl_XT);
2215 continue;
2216 } else if (strcmp(argv[i], "-XR") == 0) {
2217 sscanf(argv[++i], "%d", &jfriedl_XR);
2218 continue;
2219 }
2220 #endif
2221
2222
2223 /* One-char options; many that have no data may be in a single argument; we
2224 continue till we hit the last one or one that needs data. */
2225
2226 else
2227 {
2228 char *s = argv[i] + 1;
2229 longop = FALSE;
2230 while (*s != 0)
2231 {
2232 for (op = optionlist; op->one_char != 0; op++)
2233 {
2234 if (*s == op->one_char) break;
2235 }
2236 if (op->one_char == 0)
2237 {
2238 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2239 *s, argv[i]);
2240 pcregrep_exit(usage(2));
2241 }
2242
2243 /* Check for a single-character option that has data: OP_OP_NUMBER
2244 is used for one that either has a numerical number or defaults, i.e. the
2245 data is optional. If a digit follows, there is data; if not, carry on
2246 with other single-character options in the same string. */
2247
2248 option_data = s+1;
2249 if (op->type == OP_OP_NUMBER)
2250 {
2251 if (isdigit((unsigned char)s[1])) break;
2252 }
2253 else /* Check for end or a dataless option */
2254 {
2255 if (op->type != OP_NODATA || s[1] == 0) break;
2256 }
2257
2258 /* Handle a single-character option with no data, then loop for the
2259 next character in the string. */
2260
2261 pcre_options = handle_option(*s++, pcre_options);
2262 }
2263 }
2264
2265 /* At this point we should have op pointing to a matched option. If the type
2266 is NO_DATA, it means that there is no data, and the option might set
2267 something in the PCRE options. */
2268
2269 if (op->type == OP_NODATA)
2270 {
2271 pcre_options = handle_option(op->one_char, pcre_options);
2272 continue;
2273 }
2274
2275 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2276 either has a value or defaults to something. It cannot have data in a
2277 separate item. At the moment, the only such options are "colo(u)r",
2278 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2279
2280 if (*option_data == 0 &&
2281 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2282 {
2283 switch (op->one_char)
2284 {
2285 case N_COLOUR:
2286 colour_option = (char *)"auto";
2287 break;
2288
2289 case 'o':
2290 only_matching = 0;
2291 break;
2292
2293 #ifdef JFRIEDL_DEBUG
2294 case 'S':
2295 S_arg = 0;
2296 break;
2297 #endif
2298 }
2299 continue;
2300 }
2301
2302 /* Otherwise, find the data string for the option. */
2303
2304 if (*option_data == 0)
2305 {
2306 if (i >= argc - 1 || longopwasequals)
2307 {
2308 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2309 pcregrep_exit(usage(2));
2310 }
2311 option_data = argv[++i];
2312 }
2313
2314 /* If the option type is OP_PATLIST, it's the -e option, which can be called
2315 multiple times to create a list of patterns. */
2316
2317 if (op->type == OP_PATLIST)
2318 {
2319 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2320 {
2321 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2322 MAX_PATTERN_COUNT);
2323 return 2;
2324 }
2325 patterns[cmd_pattern_count++] = option_data;
2326 }
2327
2328 /* Otherwise, deal with single string or numeric data values. */
2329
2330 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2331 op->type != OP_OP_NUMBER)
2332 {
2333 *((char **)op->dataptr) = option_data;
2334 }
2335
2336 /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2337 only for unpicking arguments, so just keep it simple. */
2338
2339 else
2340 {
2341 unsigned long int n = 0;
2342 char *endptr = option_data;
2343 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2344 while (isdigit((unsigned char)(*endptr)))
2345 n = n * 10 + (int)(*endptr++ - '0');
2346 if (toupper(*endptr) == 'K')
2347 {
2348 n *= 1024;
2349 endptr++;
2350 }
2351 else if (toupper(*endptr) == 'M')
2352 {
2353 n *= 1024*1024;
2354 endptr++;
2355 }
2356 if (*endptr != 0)
2357 {
2358 if (longop)
2359 {
2360 char *equals = strchr(op->long_name, '=');
2361 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2362 (int)(equals - op->long_name);
2363 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2364 option_data, nlen, op->long_name);
2365 }
2366 else
2367 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2368 option_data, op->one_char);
2369 pcregrep_exit(usage(2));
2370 }
2371 if (op->type == OP_LONGNUMBER)
2372 *((unsigned long int *)op->dataptr) = n;
2373 else
2374 *((int *)op->dataptr) = n;
2375 }
2376 }
2377
2378 /* Options have been decoded. If -C was used, its value is used as a default
2379 for -A and -B. */
2380
2381 if (both_context > 0)
2382 {
2383 if (after_context == 0) after_context = both_context;
2384 if (before_context == 0) before_context = both_context;
2385 }
2386
2387 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2388 However, the latter two set only_matching. */
2389
2390 if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2391 (file_offsets && line_offsets))
2392 {
2393 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2394 "and/or --line-offsets\n");
2395 pcregrep_exit(usage(2));
2396 }
2397
2398 if (file_offsets || line_offsets) only_matching = 0;
2399
2400 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2401 LC_ALL environment variable is set, and if so, use it. */
2402
2403 if (locale == NULL)
2404 {
2405 locale = getenv("LC_ALL");
2406 locale_from = "LCC_ALL";
2407 }
2408
2409 if (locale == NULL)
2410 {
2411 locale = getenv("LC_CTYPE");
2412 locale_from = "LC_CTYPE";
2413 }
2414
2415 /* If a locale has been provided, set it, and generate the tables the PCRE
2416 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2417
2418 if (locale != NULL)
2419 {
2420 if (setlocale(LC_CTYPE, locale) == NULL)
2421 {
2422 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2423 locale, locale_from);
2424 return 2;
2425 }
2426 pcretables = pcre_maketables();
2427 }
2428
2429 /* Sort out colouring */
2430
2431 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2432 {
2433 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2434 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2435 else
2436 {
2437 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2438 colour_option);
2439 return 2;
2440 }
2441 if (do_colour)
2442 {
2443 char *cs = getenv("PCREGREP_COLOUR");
2444 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2445 if (cs != NULL) colour_string = cs;
2446 }
2447 }
2448
2449 /* Interpret the newline type; the default settings are Unix-like. */
2450
2451 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2452 {
2453 pcre_options |= PCRE_NEWLINE_CR;
2454 endlinetype = EL_CR;
2455 }
2456 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2457 {
2458 pcre_options |= PCRE_NEWLINE_LF;
2459 endlinetype = EL_LF;
2460 }
2461 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2462 {
2463 pcre_options |= PCRE_NEWLINE_CRLF;
2464 endlinetype = EL_CRLF;
2465 }
2466 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2467 {
2468 pcre_options |= PCRE_NEWLINE_ANY;
2469 endlinetype = EL_ANY;
2470 }
2471 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2472 {
2473 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2474 endlinetype = EL_ANYCRLF;
2475 }
2476 else
2477 {
2478 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2479 return 2;
2480 }
2481
2482 /* Interpret the text values for -d and -D */
2483
2484 if (dee_option != NULL)
2485 {
2486 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2487 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2488 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2489 else
2490 {
2491 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2492 return 2;
2493 }
2494 }
2495
2496 if (DEE_option != NULL)
2497 {
2498 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2499 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2500 else
2501 {
2502 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2503 return 2;
2504 }
2505 }
2506
2507 /* Check the values for Jeffrey Friedl's debugging options. */
2508
2509 #ifdef JFRIEDL_DEBUG
2510 if (S_arg > 9)
2511 {
2512 fprintf(stderr, "pcregrep: bad value for -S option\n");
2513 return 2;
2514 }
2515 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2516 {
2517 if (jfriedl_XT == 0) jfriedl_XT = 1;
2518 if (jfriedl_XR == 0) jfriedl_XR = 1;
2519 }
2520 #endif
2521
2522 /* Get memory for the main buffer, and to store the pattern and hints lists. */
2523
2524 bufsize = 3*bufthird;
2525 main_buffer = (char *)malloc(bufsize);
2526 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2527 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2528
2529 if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)
2530 {
2531 fprintf(stderr, "pcregrep: malloc failed\n");
2532 goto EXIT2;
2533 }
2534
2535 /* If no patterns were provided by -e, and there is no file provided by -f,
2536 the first argument is the one and only pattern, and it must exist. */
2537
2538 if (cmd_pattern_count == 0 && pattern_filename == NULL)
2539 {
2540 if (i >= argc) return usage(2);
2541 patterns[cmd_pattern_count++] = argv[i++];
2542 }
2543
2544 /* Compile the patterns that were provided on the command line, either by
2545 multiple uses of -e or as a single unkeyed pattern. */
2546
2547 for (j = 0; j < cmd_pattern_count; j++)
2548 {
2549 if (!compile_pattern(patterns[j], pcre_options, NULL,
2550 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2551 goto EXIT2;
2552 }
2553
2554 /* Compile the regular expressions that are provided in a file. */
2555
2556 if (pattern_filename != NULL)
2557 {
2558 int linenumber = 0;
2559 FILE *f;
2560 char *filename;
2561 char buffer[PATBUFSIZE];
2562
2563 if (strcmp(pattern_filename, "-") == 0)
2564 {
2565 f = stdin;
2566 filename = stdin_name;
2567 }
2568 else
2569 {
2570 f = fopen(pattern_filename, "r");
2571 if (f == NULL)
2572 {
2573 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2574 strerror(errno));
2575 goto EXIT2;
2576 }
2577 filename = pattern_filename;
2578 }
2579
2580 while (fgets(buffer, PATBUFSIZE, f) != NULL)
2581 {
2582 char *s = buffer + (int)strlen(buffer);
2583 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2584 *s = 0;
2585 linenumber++;
2586 if (buffer[0] == 0) continue; /* Skip blank lines */
2587 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2588 goto EXIT2;
2589 }
2590
2591 if (f != stdin) fclose(f);
2592 }
2593
2594 /* Study the regular expressions, as we will be running them many times. Unless
2595 JIT has been explicitly disabled, arrange a stack for it to use. */
2596
2597 #ifdef SUPPORT_PCREGREP_JIT
2598 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2599 jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
2600 #endif
2601
2602 for (j = 0; j < pattern_count; j++)
2603 {
2604 hints_list[j] = pcre_study(pattern_list[j], study_options, &error);
2605 if (error != NULL)
2606 {
2607 char s[16];
2608 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2609 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2610 goto EXIT2;
2611 }
2612 hint_count++;
2613 #ifdef SUPPORT_PCREGREP_JIT
2614 if (jit_stack != NULL && hints_list[j] != NULL)
2615 pcre_assign_jit_stack(hints_list[j], NULL, jit_stack);
2616 #endif
2617 }
2618
2619 /* If --match-limit or --recursion-limit was set, put the value(s) into the
2620 pcre_extra block for each pattern. */
2621
2622 if (match_limit > 0 || match_limit_recursion > 0)
2623 {
2624 for (j = 0; j < pattern_count; j++)
2625 {
2626 if (hints_list[j] == NULL)
2627 {
2628 hints_list[j] = malloc(sizeof(pcre_extra));
2629 if (hints_list[j] == NULL)
2630 {
2631 fprintf(stderr, "pcregrep: malloc failed\n");
2632 pcregrep_exit(2);
2633 }
2634 }
2635 if (match_limit > 0)
2636 {
2637 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2638 hints_list[j]->match_limit = match_limit;
2639 }
2640 if (match_limit_recursion > 0)
2641 {
2642 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2643 hints_list[j]->match_limit_recursion = match_limit_recursion;
2644 }
2645 }
2646 }
2647
2648 /* If there are include or exclude patterns, compile them. */
2649
2650 if (exclude_pattern != NULL)
2651 {
2652 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2653 pcretables);
2654 if (exclude_compiled == NULL)
2655 {
2656 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2657 errptr, error);
2658 goto EXIT2;
2659 }
2660 }
2661
2662 if (include_pattern != NULL)
2663 {
2664 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2665 pcretables);
2666 if (include_compiled == NULL)
2667 {
2668 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2669 errptr, error);
2670 goto EXIT2;
2671 }
2672 }
2673
2674 if (exclude_dir_pattern != NULL)
2675 {
2676 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2677 pcretables);
2678 if (exclude_dir_compiled == NULL)
2679 {
2680 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2681 errptr, error);
2682 goto EXIT2;
2683 }
2684 }
2685
2686 if (include_dir_pattern != NULL)
2687 {
2688 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2689 pcretables);
2690 if (include_dir_compiled == NULL)
2691 {
2692 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2693 errptr, error);
2694 goto EXIT2;
2695 }
2696 }
2697
2698 /* If there are no further arguments, do the business on stdin and exit. */
2699
2700 if (i >= argc)
2701 {
2702 rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2703 (filenames > FN_DEFAULT)? stdin_name : NULL);
2704 goto EXIT;
2705 }
2706
2707 /* Otherwise, work through the remaining arguments as files or directories.
2708 Pass in the fact that there is only one argument at top level - this suppresses
2709 the file name if the argument is not a directory and filenames are not
2710 otherwise forced. */
2711
2712 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2713
2714 for (; i < argc; i++)
2715 {
2716 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2717 only_one_at_top);
2718 if (frc > 1) rc = frc;
2719 else if (frc == 0 && rc == 1) rc = 0;
2720 }
2721
2722 EXIT:
2723 #ifdef SUPPORT_PCREGREP_JIT
2724 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2725 #endif
2726 if (main_buffer != NULL) free(main_buffer);
2727 if (pattern_list != NULL)
2728 {
2729 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2730 free(pattern_list);
2731 }
2732 if (hints_list != NULL)
2733 {
2734 for (i = 0; i < hint_count; i++)
2735 {
2736 if (hints_list[i] != NULL) pcre_free_study(hints_list[i]);
2737 }
2738 free(hints_list);
2739 }
2740 pcregrep_exit(rc);
2741
2742 EXIT2:
2743 rc = 2;
2744 goto EXIT;
2745 }
2746
2747 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5