/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 646 - (show annotations)
Mon Aug 1 09:54:26 2011 UTC (8 years, 1 month ago) by ph10
File MIME type: text/plain
File size: 78830 byte(s)
Error occurred while calculating annotation data.
Fix typo in error message in pcregrep.
1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2011 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65
66 #include "pcre.h"
67
68 #define FALSE 0
69 #define TRUE 1
70
71 typedef int BOOL;
72
73 #define MAX_PATTERN_COUNT 100
74 #define OFFSET_SIZE 99
75
76 #if BUFSIZ > 8192
77 #define PATBUFSIZE BUFSIZ
78 #else
79 #define PATBUFSIZE 8192
80 #endif
81
82 /* Values for the "filenames" variable, which specifies options for file name
83 output. The order is important; it is assumed that a file name is wanted for
84 all values greater than FN_DEFAULT. */
85
86 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87
88 /* File reading styles */
89
90 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91
92 /* Actions for the -d and -D options */
93
94 enum { dee_READ, dee_SKIP, dee_RECURSE };
95 enum { DEE_READ, DEE_SKIP };
96
97 /* Actions for special processing options (flag bits) */
98
99 #define PO_WORD_MATCH 0x0001
100 #define PO_LINE_MATCH 0x0002
101 #define PO_FIXED_STRINGS 0x0004
102
103 /* Line ending types */
104
105 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106
107 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108 environments), a warning is issued if the value of fwrite() is ignored.
109 Unfortunately, casting to (void) does not suppress the warning. To get round
110 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 apply to fprintf(). */
112
113 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114
115
116
117 /*************************************************
118 * Global variables *
119 *************************************************/
120
121 /* Jeffrey Friedl has some debugging requirements that are not part of the
122 regular code. */
123
124 #ifdef JFRIEDL_DEBUG
125 static int S_arg = -1;
126 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128 static const char *jfriedl_prefix = "";
129 static const char *jfriedl_postfix = "";
130 #endif
131
132 static int endlinetype;
133
134 static char *colour_string = (char *)"1;31";
135 static char *colour_option = NULL;
136 static char *dee_option = NULL;
137 static char *DEE_option = NULL;
138 static char *main_buffer = NULL;
139 static char *newline = NULL;
140 static char *pattern_filename = NULL;
141 static char *stdin_name = (char *)"(standard input)";
142 static char *locale = NULL;
143
144 static const unsigned char *pcretables = NULL;
145
146 static int pattern_count = 0;
147 static pcre **pattern_list = NULL;
148 static pcre_extra **hints_list = NULL;
149
150 static char *include_pattern = NULL;
151 static char *exclude_pattern = NULL;
152 static char *include_dir_pattern = NULL;
153 static char *exclude_dir_pattern = NULL;
154
155 static pcre *include_compiled = NULL;
156 static pcre *exclude_compiled = NULL;
157 static pcre *include_dir_compiled = NULL;
158 static pcre *exclude_dir_compiled = NULL;
159
160 static int after_context = 0;
161 static int before_context = 0;
162 static int both_context = 0;
163 static int bufthird = PCREGREP_BUFSIZE;
164 static int bufsize = 3*PCREGREP_BUFSIZE;
165 static int dee_action = dee_READ;
166 static int DEE_action = DEE_READ;
167 static int error_count = 0;
168 static int filenames = FN_DEFAULT;
169 static int only_matching = -1;
170 static int process_options = 0;
171
172 static unsigned long int match_limit = 0;
173 static unsigned long int match_limit_recursion = 0;
174
175 static BOOL count_only = FALSE;
176 static BOOL do_colour = FALSE;
177 static BOOL file_offsets = FALSE;
178 static BOOL hyphenpending = FALSE;
179 static BOOL invert = FALSE;
180 static BOOL line_buffered = FALSE;
181 static BOOL line_offsets = FALSE;
182 static BOOL multiline = FALSE;
183 static BOOL number = FALSE;
184 static BOOL omit_zero_count = FALSE;
185 static BOOL resource_error = FALSE;
186 static BOOL quiet = FALSE;
187 static BOOL silent = FALSE;
188 static BOOL utf8 = FALSE;
189
190 /* Structure for options and list of them */
191
192 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
193 OP_OP_NUMBER, OP_PATLIST };
194
195 typedef struct option_item {
196 int type;
197 int one_char;
198 void *dataptr;
199 const char *long_name;
200 const char *help_text;
201 } option_item;
202
203 /* Options without a single-letter equivalent get a negative value. This can be
204 used to identify them. */
205
206 #define N_COLOUR (-1)
207 #define N_EXCLUDE (-2)
208 #define N_EXCLUDE_DIR (-3)
209 #define N_HELP (-4)
210 #define N_INCLUDE (-5)
211 #define N_INCLUDE_DIR (-6)
212 #define N_LABEL (-7)
213 #define N_LOCALE (-8)
214 #define N_NULL (-9)
215 #define N_LOFFSETS (-10)
216 #define N_FOFFSETS (-11)
217 #define N_LBUFFER (-12)
218 #define N_M_LIMIT (-13)
219 #define N_M_LIMIT_REC (-14)
220 #define N_BUFSIZE (-15)
221
222 static option_item optionlist[] = {
223 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
224 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
225 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
226 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
227 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" },
228 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
229 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
230 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
231 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
232 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
233 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
234 { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
235 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
236 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
237 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
238 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
239 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
240 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
241 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
242 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
243 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
244 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
245 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
246 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
247 { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
248 { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
249 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
250 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
251 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
252 { OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" },
253 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
254 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
255 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
256 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
257 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
258 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
259
260 /* These two were accidentally implemented with underscores instead of
261 hyphens in the option names. As this was not discovered for several releases,
262 the incorrect versions are left in the table for compatibility. However, the
263 --help function misses out any option that has an underscore in its name. */
264
265 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
266 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
267
268 #ifdef JFRIEDL_DEBUG
269 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
270 #endif
271 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
272 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
273 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
274 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
275 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
276 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
277 { OP_NODATA, 0, NULL, NULL, NULL }
278 };
279
280 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
281 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
282 that the combination of -w and -x has the same effect as -x on its own, so we
283 can treat them as the same. */
284
285 static const char *prefix[] = {
286 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
287
288 static const char *suffix[] = {
289 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
290
291 /* UTF-8 tables - used only when the newline setting is "any". */
292
293 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
294
295 const char utf8_table4[] = {
296 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
297 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
298 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
299 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
300
301
302
303 /*************************************************
304 * Exit from the program *
305 *************************************************/
306
307 /* If there has been a resource error, give a suitable message.
308
309 Argument: the return code
310 Returns: does not return
311 */
312
313 static void
314 pcregrep_exit(int rc)
315 {
316 if (resource_error)
317 {
318 fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
319 "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
320 fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
321 }
322
323 exit(rc);
324 }
325
326
327 /*************************************************
328 * OS-specific functions *
329 *************************************************/
330
331 /* These functions are defined so that they can be made system specific,
332 although at present the only ones are for Unix, Win32, and for "no support". */
333
334
335 /************* Directory scanning in Unix ***********/
336
337 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
338 #include <sys/types.h>
339 #include <sys/stat.h>
340 #include <dirent.h>
341
342 typedef DIR directory_type;
343
344 static int
345 isdirectory(char *filename)
346 {
347 struct stat statbuf;
348 if (stat(filename, &statbuf) < 0)
349 return 0; /* In the expectation that opening as a file will fail */
350 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
351 }
352
353 static directory_type *
354 opendirectory(char *filename)
355 {
356 return opendir(filename);
357 }
358
359 static char *
360 readdirectory(directory_type *dir)
361 {
362 for (;;)
363 {
364 struct dirent *dent = readdir(dir);
365 if (dent == NULL) return NULL;
366 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
367 return dent->d_name;
368 }
369 /* Control never reaches here */
370 }
371
372 static void
373 closedirectory(directory_type *dir)
374 {
375 closedir(dir);
376 }
377
378
379 /************* Test for regular file in Unix **********/
380
381 static int
382 isregfile(char *filename)
383 {
384 struct stat statbuf;
385 if (stat(filename, &statbuf) < 0)
386 return 1; /* In the expectation that opening as a file will fail */
387 return (statbuf.st_mode & S_IFMT) == S_IFREG;
388 }
389
390
391 /************* Test for a terminal in Unix **********/
392
393 static BOOL
394 is_stdout_tty(void)
395 {
396 return isatty(fileno(stdout));
397 }
398
399 static BOOL
400 is_file_tty(FILE *f)
401 {
402 return isatty(fileno(f));
403 }
404
405
406 /************* Directory scanning in Win32 ***********/
407
408 /* I (Philip Hazel) have no means of testing this code. It was contributed by
409 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
410 when it did not exist. David Byron added a patch that moved the #include of
411 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
412 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
413 undefined when it is indeed undefined. */
414
415 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
416
417 #ifndef STRICT
418 # define STRICT
419 #endif
420 #ifndef WIN32_LEAN_AND_MEAN
421 # define WIN32_LEAN_AND_MEAN
422 #endif
423
424 #include <windows.h>
425
426 #ifndef INVALID_FILE_ATTRIBUTES
427 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
428 #endif
429
430 typedef struct directory_type
431 {
432 HANDLE handle;
433 BOOL first;
434 WIN32_FIND_DATA data;
435 } directory_type;
436
437 int
438 isdirectory(char *filename)
439 {
440 DWORD attr = GetFileAttributes(filename);
441 if (attr == INVALID_FILE_ATTRIBUTES)
442 return 0;
443 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
444 }
445
446 directory_type *
447 opendirectory(char *filename)
448 {
449 size_t len;
450 char *pattern;
451 directory_type *dir;
452 DWORD err;
453 len = strlen(filename);
454 pattern = (char *) malloc(len + 3);
455 dir = (directory_type *) malloc(sizeof(*dir));
456 if ((pattern == NULL) || (dir == NULL))
457 {
458 fprintf(stderr, "pcregrep: malloc failed\n");
459 pcregrep_exit(2);
460 }
461 memcpy(pattern, filename, len);
462 memcpy(&(pattern[len]), "\\*", 3);
463 dir->handle = FindFirstFile(pattern, &(dir->data));
464 if (dir->handle != INVALID_HANDLE_VALUE)
465 {
466 free(pattern);
467 dir->first = TRUE;
468 return dir;
469 }
470 err = GetLastError();
471 free(pattern);
472 free(dir);
473 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
474 return NULL;
475 }
476
477 char *
478 readdirectory(directory_type *dir)
479 {
480 for (;;)
481 {
482 if (!dir->first)
483 {
484 if (!FindNextFile(dir->handle, &(dir->data)))
485 return NULL;
486 }
487 else
488 {
489 dir->first = FALSE;
490 }
491 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
492 return dir->data.cFileName;
493 }
494 #ifndef _MSC_VER
495 return NULL; /* Keep compiler happy; never executed */
496 #endif
497 }
498
499 void
500 closedirectory(directory_type *dir)
501 {
502 FindClose(dir->handle);
503 free(dir);
504 }
505
506
507 /************* Test for regular file in Win32 **********/
508
509 /* I don't know how to do this, or if it can be done; assume all paths are
510 regular if they are not directories. */
511
512 int isregfile(char *filename)
513 {
514 return !isdirectory(filename);
515 }
516
517
518 /************* Test for a terminal in Win32 **********/
519
520 /* I don't know how to do this; assume never */
521
522 static BOOL
523 is_stdout_tty(void)
524 {
525 return FALSE;
526 }
527
528 static BOOL
529 is_file_tty(FILE *f)
530 {
531 return FALSE;
532 }
533
534
535 /************* Directory scanning when we can't do it ***********/
536
537 /* The type is void, and apart from isdirectory(), the functions do nothing. */
538
539 #else
540
541 typedef void directory_type;
542
543 int isdirectory(char *filename) { return 0; }
544 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
545 char *readdirectory(directory_type *dir) { return (char*)0;}
546 void closedirectory(directory_type *dir) {}
547
548
549 /************* Test for regular when we can't do it **********/
550
551 /* Assume all files are regular. */
552
553 int isregfile(char *filename) { return 1; }
554
555
556 /************* Test for a terminal when we can't do it **********/
557
558 static BOOL
559 is_stdout_tty(void)
560 {
561 return FALSE;
562 }
563
564 static BOOL
565 is_file_tty(FILE *f)
566 {
567 return FALSE;
568 }
569
570 #endif
571
572
573
574 #ifndef HAVE_STRERROR
575 /*************************************************
576 * Provide strerror() for non-ANSI libraries *
577 *************************************************/
578
579 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
580 in their libraries, but can provide the same facility by this simple
581 alternative function. */
582
583 extern int sys_nerr;
584 extern char *sys_errlist[];
585
586 char *
587 strerror(int n)
588 {
589 if (n < 0 || n >= sys_nerr) return "unknown error number";
590 return sys_errlist[n];
591 }
592 #endif /* HAVE_STRERROR */
593
594
595
596 /*************************************************
597 * Read one line of input *
598 *************************************************/
599
600 /* Normally, input is read using fread() into a large buffer, so many lines may
601 be read at once. However, doing this for tty input means that no output appears
602 until a lot of input has been typed. Instead, tty input is handled line by
603 line. We cannot use fgets() for this, because it does not stop at a binary
604 zero, and therefore there is no way of telling how many characters it has read,
605 because there may be binary zeros embedded in the data.
606
607 Arguments:
608 buffer the buffer to read into
609 length the maximum number of characters to read
610 f the file
611
612 Returns: the number of characters read, zero at end of file
613 */
614
615 static int
616 read_one_line(char *buffer, int length, FILE *f)
617 {
618 int c;
619 int yield = 0;
620 while ((c = fgetc(f)) != EOF)
621 {
622 buffer[yield++] = c;
623 if (c == '\n' || yield >= length) break;
624 }
625 return yield;
626 }
627
628
629
630 /*************************************************
631 * Find end of line *
632 *************************************************/
633
634 /* The length of the endline sequence that is found is set via lenptr. This may
635 be zero at the very end of the file if there is no line-ending sequence there.
636
637 Arguments:
638 p current position in line
639 endptr end of available data
640 lenptr where to put the length of the eol sequence
641
642 Returns: pointer after the last byte of the line,
643 including the newline byte(s)
644 */
645
646 static char *
647 end_of_line(char *p, char *endptr, int *lenptr)
648 {
649 switch(endlinetype)
650 {
651 default: /* Just in case */
652 case EL_LF:
653 while (p < endptr && *p != '\n') p++;
654 if (p < endptr)
655 {
656 *lenptr = 1;
657 return p + 1;
658 }
659 *lenptr = 0;
660 return endptr;
661
662 case EL_CR:
663 while (p < endptr && *p != '\r') p++;
664 if (p < endptr)
665 {
666 *lenptr = 1;
667 return p + 1;
668 }
669 *lenptr = 0;
670 return endptr;
671
672 case EL_CRLF:
673 for (;;)
674 {
675 while (p < endptr && *p != '\r') p++;
676 if (++p >= endptr)
677 {
678 *lenptr = 0;
679 return endptr;
680 }
681 if (*p == '\n')
682 {
683 *lenptr = 2;
684 return p + 1;
685 }
686 }
687 break;
688
689 case EL_ANYCRLF:
690 while (p < endptr)
691 {
692 int extra = 0;
693 register int c = *((unsigned char *)p);
694
695 if (utf8 && c >= 0xc0)
696 {
697 int gcii, gcss;
698 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
699 gcss = 6*extra;
700 c = (c & utf8_table3[extra]) << gcss;
701 for (gcii = 1; gcii <= extra; gcii++)
702 {
703 gcss -= 6;
704 c |= (p[gcii] & 0x3f) << gcss;
705 }
706 }
707
708 p += 1 + extra;
709
710 switch (c)
711 {
712 case 0x0a: /* LF */
713 *lenptr = 1;
714 return p;
715
716 case 0x0d: /* CR */
717 if (p < endptr && *p == 0x0a)
718 {
719 *lenptr = 2;
720 p++;
721 }
722 else *lenptr = 1;
723 return p;
724
725 default:
726 break;
727 }
728 } /* End of loop for ANYCRLF case */
729
730 *lenptr = 0; /* Must have hit the end */
731 return endptr;
732
733 case EL_ANY:
734 while (p < endptr)
735 {
736 int extra = 0;
737 register int c = *((unsigned char *)p);
738
739 if (utf8 && c >= 0xc0)
740 {
741 int gcii, gcss;
742 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
743 gcss = 6*extra;
744 c = (c & utf8_table3[extra]) << gcss;
745 for (gcii = 1; gcii <= extra; gcii++)
746 {
747 gcss -= 6;
748 c |= (p[gcii] & 0x3f) << gcss;
749 }
750 }
751
752 p += 1 + extra;
753
754 switch (c)
755 {
756 case 0x0a: /* LF */
757 case 0x0b: /* VT */
758 case 0x0c: /* FF */
759 *lenptr = 1;
760 return p;
761
762 case 0x0d: /* CR */
763 if (p < endptr && *p == 0x0a)
764 {
765 *lenptr = 2;
766 p++;
767 }
768 else *lenptr = 1;
769 return p;
770
771 case 0x85: /* NEL */
772 *lenptr = utf8? 2 : 1;
773 return p;
774
775 case 0x2028: /* LS */
776 case 0x2029: /* PS */
777 *lenptr = 3;
778 return p;
779
780 default:
781 break;
782 }
783 } /* End of loop for ANY case */
784
785 *lenptr = 0; /* Must have hit the end */
786 return endptr;
787 } /* End of overall switch */
788 }
789
790
791
792 /*************************************************
793 * Find start of previous line *
794 *************************************************/
795
796 /* This is called when looking back for before lines to print.
797
798 Arguments:
799 p start of the subsequent line
800 startptr start of available data
801
802 Returns: pointer to the start of the previous line
803 */
804
805 static char *
806 previous_line(char *p, char *startptr)
807 {
808 switch(endlinetype)
809 {
810 default: /* Just in case */
811 case EL_LF:
812 p--;
813 while (p > startptr && p[-1] != '\n') p--;
814 return p;
815
816 case EL_CR:
817 p--;
818 while (p > startptr && p[-1] != '\n') p--;
819 return p;
820
821 case EL_CRLF:
822 for (;;)
823 {
824 p -= 2;
825 while (p > startptr && p[-1] != '\n') p--;
826 if (p <= startptr + 1 || p[-2] == '\r') return p;
827 }
828 return p; /* But control should never get here */
829
830 case EL_ANY:
831 case EL_ANYCRLF:
832 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
833 if (utf8) while ((*p & 0xc0) == 0x80) p--;
834
835 while (p > startptr)
836 {
837 register int c;
838 char *pp = p - 1;
839
840 if (utf8)
841 {
842 int extra = 0;
843 while ((*pp & 0xc0) == 0x80) pp--;
844 c = *((unsigned char *)pp);
845 if (c >= 0xc0)
846 {
847 int gcii, gcss;
848 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
849 gcss = 6*extra;
850 c = (c & utf8_table3[extra]) << gcss;
851 for (gcii = 1; gcii <= extra; gcii++)
852 {
853 gcss -= 6;
854 c |= (pp[gcii] & 0x3f) << gcss;
855 }
856 }
857 }
858 else c = *((unsigned char *)pp);
859
860 if (endlinetype == EL_ANYCRLF) switch (c)
861 {
862 case 0x0a: /* LF */
863 case 0x0d: /* CR */
864 return p;
865
866 default:
867 break;
868 }
869
870 else switch (c)
871 {
872 case 0x0a: /* LF */
873 case 0x0b: /* VT */
874 case 0x0c: /* FF */
875 case 0x0d: /* CR */
876 case 0x85: /* NEL */
877 case 0x2028: /* LS */
878 case 0x2029: /* PS */
879 return p;
880
881 default:
882 break;
883 }
884
885 p = pp; /* Back one character */
886 } /* End of loop for ANY case */
887
888 return startptr; /* Hit start of data */
889 } /* End of overall switch */
890 }
891
892
893
894
895
896 /*************************************************
897 * Print the previous "after" lines *
898 *************************************************/
899
900 /* This is called if we are about to lose said lines because of buffer filling,
901 and at the end of the file. The data in the line is written using fwrite() so
902 that a binary zero does not terminate it.
903
904 Arguments:
905 lastmatchnumber the number of the last matching line, plus one
906 lastmatchrestart where we restarted after the last match
907 endptr end of available data
908 printname filename for printing
909
910 Returns: nothing
911 */
912
913 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
914 char *endptr, char *printname)
915 {
916 if (after_context > 0 && lastmatchnumber > 0)
917 {
918 int count = 0;
919 while (lastmatchrestart < endptr && count++ < after_context)
920 {
921 int ellength;
922 char *pp = lastmatchrestart;
923 if (printname != NULL) fprintf(stdout, "%s-", printname);
924 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
925 pp = end_of_line(pp, endptr, &ellength);
926 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
927 lastmatchrestart = pp;
928 }
929 hyphenpending = TRUE;
930 }
931 }
932
933
934
935 /*************************************************
936 * Apply patterns to subject till one matches *
937 *************************************************/
938
939 /* This function is called to run through all patterns, looking for a match. It
940 is used multiple times for the same subject when colouring is enabled, in order
941 to find all possible matches.
942
943 Arguments:
944 matchptr the start of the subject
945 length the length of the subject to match
946 startoffset where to start matching
947 offsets the offets vector to fill in
948 mrc address of where to put the result of pcre_exec()
949
950 Returns: TRUE if there was a match
951 FALSE if there was no match
952 invert if there was a non-fatal error
953 */
954
955 static BOOL
956 match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
957 int *mrc)
958 {
959 int i;
960 size_t slen = length;
961 const char *msg = "this text:\n\n";
962 if (slen > 200)
963 {
964 slen = 200;
965 msg = "text that starts:\n\n";
966 }
967 for (i = 0; i < pattern_count; i++)
968 {
969 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
970 startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
971 if (*mrc >= 0) return TRUE;
972 if (*mrc == PCRE_ERROR_NOMATCH) continue;
973 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
974 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
975 fprintf(stderr, "%s", msg);
976 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
977 fprintf(stderr, "\n\n");
978 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
979 resource_error = TRUE;
980 if (error_count++ > 20)
981 {
982 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
983 pcregrep_exit(2);
984 }
985 return invert; /* No more matching; don't show the line again */
986 }
987
988 return FALSE; /* No match, no errors */
989 }
990
991
992
993 /*************************************************
994 * Grep an individual file *
995 *************************************************/
996
997 /* This is called from grep_or_recurse() below. It uses a buffer that is three
998 times the value of bufthird. The matching point is never allowed to stray into
999 the top third of the buffer, thus keeping more of the file available for
1000 context printing or for multiline scanning. For large files, the pointer will
1001 be in the middle third most of the time, so the bottom third is available for
1002 "before" context printing.
1003
1004 Arguments:
1005 handle the fopened FILE stream for a normal file
1006 the gzFile pointer when reading is via libz
1007 the BZFILE pointer when reading is via libbz2
1008 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1009 filename the file name or NULL (for errors)
1010 printname the file name if it is to be printed for each match
1011 or NULL if the file name is not to be printed
1012 it cannot be NULL if filenames[_nomatch]_only is set
1013
1014 Returns: 0 if there was at least one match
1015 1 otherwise (no matches)
1016 2 if an overlong line is encountered
1017 3 if there is a read error on a .bz2 file
1018 */
1019
1020 static int
1021 pcregrep(void *handle, int frtype, char *filename, char *printname)
1022 {
1023 int rc = 1;
1024 int linenumber = 1;
1025 int lastmatchnumber = 0;
1026 int count = 0;
1027 int filepos = 0;
1028 int offsets[OFFSET_SIZE];
1029 char *lastmatchrestart = NULL;
1030 char *ptr = main_buffer;
1031 char *endptr;
1032 size_t bufflength;
1033 BOOL endhyphenpending = FALSE;
1034 BOOL input_line_buffered = line_buffered;
1035 FILE *in = NULL; /* Ensure initialized */
1036
1037 #ifdef SUPPORT_LIBZ
1038 gzFile ingz = NULL;
1039 #endif
1040
1041 #ifdef SUPPORT_LIBBZ2
1042 BZFILE *inbz2 = NULL;
1043 #endif
1044
1045
1046 /* Do the first read into the start of the buffer and set up the pointer to end
1047 of what we have. In the case of libz, a non-zipped .gz file will be read as a
1048 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1049 fail. */
1050
1051 #ifdef SUPPORT_LIBZ
1052 if (frtype == FR_LIBZ)
1053 {
1054 ingz = (gzFile)handle;
1055 bufflength = gzread (ingz, main_buffer, bufsize);
1056 }
1057 else
1058 #endif
1059
1060 #ifdef SUPPORT_LIBBZ2
1061 if (frtype == FR_LIBBZ2)
1062 {
1063 inbz2 = (BZFILE *)handle;
1064 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1065 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1066 } /* without the cast it is unsigned. */
1067 else
1068 #endif
1069
1070 {
1071 in = (FILE *)handle;
1072 if (is_file_tty(in)) input_line_buffered = TRUE;
1073 bufflength = input_line_buffered?
1074 read_one_line(main_buffer, bufsize, in) :
1075 fread(main_buffer, 1, bufsize, in);
1076 }
1077
1078 endptr = main_buffer + bufflength;
1079
1080 /* Loop while the current pointer is not at the end of the file. For large
1081 files, endptr will be at the end of the buffer when we are in the middle of the
1082 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1083 way, the buffer is shifted left and re-filled. */
1084
1085 while (ptr < endptr)
1086 {
1087 int endlinelength;
1088 int mrc = 0;
1089 int startoffset = 0;
1090 BOOL match;
1091 char *matchptr = ptr;
1092 char *t = ptr;
1093 size_t length, linelength;
1094
1095 /* At this point, ptr is at the start of a line. We need to find the length
1096 of the subject string to pass to pcre_exec(). In multiline mode, it is the
1097 length remainder of the data in the buffer. Otherwise, it is the length of
1098 the next line, excluding the terminating newline. After matching, we always
1099 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1100 option is used for compiling, so that any match is constrained to be in the
1101 first line. */
1102
1103 t = end_of_line(t, endptr, &endlinelength);
1104 linelength = t - ptr - endlinelength;
1105 length = multiline? (size_t)(endptr - ptr) : linelength;
1106
1107 /* Check to see if the line we are looking at extends right to the very end
1108 of the buffer without a line terminator. This means the line is too long to
1109 handle. */
1110
1111 if (endlinelength == 0 && t == main_buffer + bufsize)
1112 {
1113 fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1114 "pcregrep: check the --buffer-size option\n",
1115 linenumber,
1116 (filename == NULL)? "" : " of file ",
1117 (filename == NULL)? "" : filename);
1118 return 2;
1119 }
1120
1121 /* Extra processing for Jeffrey Friedl's debugging. */
1122
1123 #ifdef JFRIEDL_DEBUG
1124 if (jfriedl_XT || jfriedl_XR)
1125 {
1126 #include <sys/time.h>
1127 #include <time.h>
1128 struct timeval start_time, end_time;
1129 struct timezone dummy;
1130 int i;
1131
1132 if (jfriedl_XT)
1133 {
1134 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1135 const char *orig = ptr;
1136 ptr = malloc(newlen + 1);
1137 if (!ptr) {
1138 printf("out of memory");
1139 pcregrep_exit(2);
1140 }
1141 endptr = ptr;
1142 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1143 for (i = 0; i < jfriedl_XT; i++) {
1144 strncpy(endptr, orig, length);
1145 endptr += length;
1146 }
1147 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1148 length = newlen;
1149 }
1150
1151 if (gettimeofday(&start_time, &dummy) != 0)
1152 perror("bad gettimeofday");
1153
1154
1155 for (i = 0; i < jfriedl_XR; i++)
1156 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1157 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1158
1159 if (gettimeofday(&end_time, &dummy) != 0)
1160 perror("bad gettimeofday");
1161
1162 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1163 -
1164 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1165
1166 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1167 return 0;
1168 }
1169 #endif
1170
1171 /* We come back here after a match when the -o option (only_matching) is set,
1172 in order to find any further matches in the same line. */
1173
1174 ONLY_MATCHING_RESTART:
1175
1176 /* Run through all the patterns until one matches or there is an error other
1177 than NOMATCH. This code is in a subroutine so that it can be re-used for
1178 finding subsequent matches when colouring matched lines. */
1179
1180 match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1181
1182 /* If it's a match or a not-match (as required), do what's wanted. */
1183
1184 if (match != invert)
1185 {
1186 BOOL hyphenprinted = FALSE;
1187
1188 /* We've failed if we want a file that doesn't have any matches. */
1189
1190 if (filenames == FN_NOMATCH_ONLY) return 1;
1191
1192 /* Just count if just counting is wanted. */
1193
1194 if (count_only) count++;
1195
1196 /* If all we want is a file name, there is no need to scan any more lines
1197 in the file. */
1198
1199 else if (filenames == FN_MATCH_ONLY)
1200 {
1201 fprintf(stdout, "%s\n", printname);
1202 return 0;
1203 }
1204
1205 /* Likewise, if all we want is a yes/no answer. */
1206
1207 else if (quiet) return 0;
1208
1209 /* The --only-matching option prints just the substring that matched, or a
1210 captured portion of it, as long as this string is not empty, and the
1211 --file-offsets and --line-offsets options output offsets for the matching
1212 substring (they both force --only-matching = 0). None of these options
1213 prints any context. Afterwards, adjust the start and then jump back to look
1214 for further matches in the same line. If we are in invert mode, however,
1215 nothing is printed and we do not restart - this could still be useful
1216 because the return code is set. */
1217
1218 else if (only_matching >= 0)
1219 {
1220 if (!invert)
1221 {
1222 if (printname != NULL) fprintf(stdout, "%s:", printname);
1223 if (number) fprintf(stdout, "%d:", linenumber);
1224 if (line_offsets)
1225 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1226 offsets[1] - offsets[0]);
1227 else if (file_offsets)
1228 fprintf(stdout, "%d,%d\n",
1229 (int)(filepos + matchptr + offsets[0] - ptr),
1230 offsets[1] - offsets[0]);
1231 else if (only_matching < mrc)
1232 {
1233 int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1234 if (plen > 0)
1235 {
1236 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1237 FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1238 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1239 fprintf(stdout, "\n");
1240 }
1241 }
1242 else if (printname != NULL || number) fprintf(stdout, "\n");
1243 match = FALSE;
1244 if (line_buffered) fflush(stdout);
1245 rc = 0; /* Had some success */
1246 startoffset = offsets[1]; /* Restart after the match */
1247 goto ONLY_MATCHING_RESTART;
1248 }
1249 }
1250
1251 /* This is the default case when none of the above options is set. We print
1252 the matching lines(s), possibly preceded and/or followed by other lines of
1253 context. */
1254
1255 else
1256 {
1257 /* See if there is a requirement to print some "after" lines from a
1258 previous match. We never print any overlaps. */
1259
1260 if (after_context > 0 && lastmatchnumber > 0)
1261 {
1262 int ellength;
1263 int linecount = 0;
1264 char *p = lastmatchrestart;
1265
1266 while (p < ptr && linecount < after_context)
1267 {
1268 p = end_of_line(p, ptr, &ellength);
1269 linecount++;
1270 }
1271
1272 /* It is important to advance lastmatchrestart during this printing so
1273 that it interacts correctly with any "before" printing below. Print
1274 each line's data using fwrite() in case there are binary zeroes. */
1275
1276 while (lastmatchrestart < p)
1277 {
1278 char *pp = lastmatchrestart;
1279 if (printname != NULL) fprintf(stdout, "%s-", printname);
1280 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1281 pp = end_of_line(pp, endptr, &ellength);
1282 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1283 lastmatchrestart = pp;
1284 }
1285 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1286 }
1287
1288 /* If there were non-contiguous lines printed above, insert hyphens. */
1289
1290 if (hyphenpending)
1291 {
1292 fprintf(stdout, "--\n");
1293 hyphenpending = FALSE;
1294 hyphenprinted = TRUE;
1295 }
1296
1297 /* See if there is a requirement to print some "before" lines for this
1298 match. Again, don't print overlaps. */
1299
1300 if (before_context > 0)
1301 {
1302 int linecount = 0;
1303 char *p = ptr;
1304
1305 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1306 linecount < before_context)
1307 {
1308 linecount++;
1309 p = previous_line(p, main_buffer);
1310 }
1311
1312 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1313 fprintf(stdout, "--\n");
1314
1315 while (p < ptr)
1316 {
1317 int ellength;
1318 char *pp = p;
1319 if (printname != NULL) fprintf(stdout, "%s-", printname);
1320 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1321 pp = end_of_line(pp, endptr, &ellength);
1322 FWRITE(p, 1, pp - p, stdout);
1323 p = pp;
1324 }
1325 }
1326
1327 /* Now print the matching line(s); ensure we set hyphenpending at the end
1328 of the file if any context lines are being output. */
1329
1330 if (after_context > 0 || before_context > 0)
1331 endhyphenpending = TRUE;
1332
1333 if (printname != NULL) fprintf(stdout, "%s:", printname);
1334 if (number) fprintf(stdout, "%d:", linenumber);
1335
1336 /* In multiline mode, we want to print to the end of the line in which
1337 the end of the matched string is found, so we adjust linelength and the
1338 line number appropriately, but only when there actually was a match
1339 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1340 the match will always be before the first newline sequence. */
1341
1342 if (multiline & !invert)
1343 {
1344 char *endmatch = ptr + offsets[1];
1345 t = ptr;
1346 while (t < endmatch)
1347 {
1348 t = end_of_line(t, endptr, &endlinelength);
1349 if (t < endmatch) linenumber++; else break;
1350 }
1351 linelength = t - ptr - endlinelength;
1352 }
1353
1354 /*** NOTE: Use only fwrite() to output the data line, so that binary
1355 zeroes are treated as just another data character. */
1356
1357 /* This extra option, for Jeffrey Friedl's debugging requirements,
1358 replaces the matched string, or a specific captured string if it exists,
1359 with X. When this happens, colouring is ignored. */
1360
1361 #ifdef JFRIEDL_DEBUG
1362 if (S_arg >= 0 && S_arg < mrc)
1363 {
1364 int first = S_arg * 2;
1365 int last = first + 1;
1366 FWRITE(ptr, 1, offsets[first], stdout);
1367 fprintf(stdout, "X");
1368 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1369 }
1370 else
1371 #endif
1372
1373 /* We have to split the line(s) up if colouring, and search for further
1374 matches, but not of course if the line is a non-match. */
1375
1376 if (do_colour && !invert)
1377 {
1378 int plength;
1379 FWRITE(ptr, 1, offsets[0], stdout);
1380 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1381 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1382 fprintf(stdout, "%c[00m", 0x1b);
1383 for (;;)
1384 {
1385 startoffset = offsets[1];
1386 if (startoffset >= linelength + endlinelength ||
1387 !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1388 break;
1389 FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1390 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1391 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1392 fprintf(stdout, "%c[00m", 0x1b);
1393 }
1394
1395 /* In multiline mode, we may have already printed the complete line
1396 and its line-ending characters (if they matched the pattern), so there
1397 may be no more to print. */
1398
1399 plength = (linelength + endlinelength) - startoffset;
1400 if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1401 }
1402
1403 /* Not colouring; no need to search for further matches */
1404
1405 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1406 }
1407
1408 /* End of doing what has to be done for a match. If --line-buffered was
1409 given, flush the output. */
1410
1411 if (line_buffered) fflush(stdout);
1412 rc = 0; /* Had some success */
1413
1414 /* Remember where the last match happened for after_context. We remember
1415 where we are about to restart, and that line's number. */
1416
1417 lastmatchrestart = ptr + linelength + endlinelength;
1418 lastmatchnumber = linenumber + 1;
1419 }
1420
1421 /* For a match in multiline inverted mode (which of course did not cause
1422 anything to be printed), we have to move on to the end of the match before
1423 proceeding. */
1424
1425 if (multiline && invert && match)
1426 {
1427 int ellength;
1428 char *endmatch = ptr + offsets[1];
1429 t = ptr;
1430 while (t < endmatch)
1431 {
1432 t = end_of_line(t, endptr, &ellength);
1433 if (t <= endmatch) linenumber++; else break;
1434 }
1435 endmatch = end_of_line(endmatch, endptr, &ellength);
1436 linelength = endmatch - ptr - ellength;
1437 }
1438
1439 /* Advance to after the newline and increment the line number. The file
1440 offset to the current line is maintained in filepos. */
1441
1442 ptr += linelength + endlinelength;
1443 filepos += (int)(linelength + endlinelength);
1444 linenumber++;
1445
1446 /* If input is line buffered, and the buffer is not yet full, read another
1447 line and add it into the buffer. */
1448
1449 if (input_line_buffered && bufflength < bufsize)
1450 {
1451 int add = read_one_line(ptr, bufsize - (ptr - main_buffer), in);
1452 bufflength += add;
1453 endptr += add;
1454 }
1455
1456 /* If we haven't yet reached the end of the file (the buffer is full), and
1457 the current point is in the top 1/3 of the buffer, slide the buffer down by
1458 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1459 about to be lost, print them. */
1460
1461 if (bufflength >= bufsize && ptr > main_buffer + 2*bufthird)
1462 {
1463 if (after_context > 0 &&
1464 lastmatchnumber > 0 &&
1465 lastmatchrestart < main_buffer + bufthird)
1466 {
1467 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1468 lastmatchnumber = 0;
1469 }
1470
1471 /* Now do the shuffle */
1472
1473 memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1474 ptr -= bufthird;
1475
1476 #ifdef SUPPORT_LIBZ
1477 if (frtype == FR_LIBZ)
1478 bufflength = 2*bufthird +
1479 gzread (ingz, main_buffer + 2*bufthird, bufthird);
1480 else
1481 #endif
1482
1483 #ifdef SUPPORT_LIBBZ2
1484 if (frtype == FR_LIBBZ2)
1485 bufflength = 2*bufthird +
1486 BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1487 else
1488 #endif
1489
1490 bufflength = 2*bufthird +
1491 (input_line_buffered?
1492 read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1493 fread(main_buffer + 2*bufthird, 1, bufthird, in));
1494 endptr = main_buffer + bufflength;
1495
1496 /* Adjust any last match point */
1497
1498 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1499 }
1500 } /* Loop through the whole file */
1501
1502 /* End of file; print final "after" lines if wanted; do_after_lines sets
1503 hyphenpending if it prints something. */
1504
1505 if (only_matching < 0 && !count_only)
1506 {
1507 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1508 hyphenpending |= endhyphenpending;
1509 }
1510
1511 /* Print the file name if we are looking for those without matches and there
1512 were none. If we found a match, we won't have got this far. */
1513
1514 if (filenames == FN_NOMATCH_ONLY)
1515 {
1516 fprintf(stdout, "%s\n", printname);
1517 return 0;
1518 }
1519
1520 /* Print the match count if wanted */
1521
1522 if (count_only)
1523 {
1524 if (count > 0 || !omit_zero_count)
1525 {
1526 if (printname != NULL && filenames != FN_NONE)
1527 fprintf(stdout, "%s:", printname);
1528 fprintf(stdout, "%d\n", count);
1529 }
1530 }
1531
1532 return rc;
1533 }
1534
1535
1536
1537 /*************************************************
1538 * Grep a file or recurse into a directory *
1539 *************************************************/
1540
1541 /* Given a path name, if it's a directory, scan all the files if we are
1542 recursing; if it's a file, grep it.
1543
1544 Arguments:
1545 pathname the path to investigate
1546 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1547 only_one_at_top TRUE if the path is the only one at toplevel
1548
1549 Returns: 0 if there was at least one match
1550 1 if there were no matches
1551 2 there was some kind of error
1552
1553 However, file opening failures are suppressed if "silent" is set.
1554 */
1555
1556 static int
1557 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1558 {
1559 int rc = 1;
1560 int sep;
1561 int frtype;
1562 int pathlen;
1563 void *handle;
1564 FILE *in = NULL; /* Ensure initialized */
1565
1566 #ifdef SUPPORT_LIBZ
1567 gzFile ingz = NULL;
1568 #endif
1569
1570 #ifdef SUPPORT_LIBBZ2
1571 BZFILE *inbz2 = NULL;
1572 #endif
1573
1574 /* If the file name is "-" we scan stdin */
1575
1576 if (strcmp(pathname, "-") == 0)
1577 {
1578 return pcregrep(stdin, FR_PLAIN, stdin_name,
1579 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1580 stdin_name : NULL);
1581 }
1582
1583 /* If the file is a directory, skip if skipping or if we are recursing, scan
1584 each file and directory within it, subject to any include or exclude patterns
1585 that were set. The scanning code is localized so it can be made
1586 system-specific. */
1587
1588 if ((sep = isdirectory(pathname)) != 0)
1589 {
1590 if (dee_action == dee_SKIP) return 1;
1591 if (dee_action == dee_RECURSE)
1592 {
1593 char buffer[1024];
1594 char *nextfile;
1595 directory_type *dir = opendirectory(pathname);
1596
1597 if (dir == NULL)
1598 {
1599 if (!silent)
1600 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1601 strerror(errno));
1602 return 2;
1603 }
1604
1605 while ((nextfile = readdirectory(dir)) != NULL)
1606 {
1607 int frc, nflen;
1608 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1609 nflen = (int)(strlen(nextfile));
1610
1611 if (isdirectory(buffer))
1612 {
1613 if (exclude_dir_compiled != NULL &&
1614 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1615 continue;
1616
1617 if (include_dir_compiled != NULL &&
1618 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1619 continue;
1620 }
1621 else
1622 {
1623 if (exclude_compiled != NULL &&
1624 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1625 continue;
1626
1627 if (include_compiled != NULL &&
1628 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1629 continue;
1630 }
1631
1632 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1633 if (frc > 1) rc = frc;
1634 else if (frc == 0 && rc == 1) rc = 0;
1635 }
1636
1637 closedirectory(dir);
1638 return rc;
1639 }
1640 }
1641
1642 /* If the file is not a directory and not a regular file, skip it if that's
1643 been requested. */
1644
1645 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1646
1647 /* Control reaches here if we have a regular file, or if we have a directory
1648 and recursion or skipping was not requested, or if we have anything else and
1649 skipping was not requested. The scan proceeds. If this is the first and only
1650 argument at top level, we don't show the file name, unless we are only showing
1651 the file name, or the filename was forced (-H). */
1652
1653 pathlen = (int)(strlen(pathname));
1654
1655 /* Open using zlib if it is supported and the file name ends with .gz. */
1656
1657 #ifdef SUPPORT_LIBZ
1658 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1659 {
1660 ingz = gzopen(pathname, "rb");
1661 if (ingz == NULL)
1662 {
1663 if (!silent)
1664 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1665 strerror(errno));
1666 return 2;
1667 }
1668 handle = (void *)ingz;
1669 frtype = FR_LIBZ;
1670 }
1671 else
1672 #endif
1673
1674 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1675
1676 #ifdef SUPPORT_LIBBZ2
1677 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1678 {
1679 inbz2 = BZ2_bzopen(pathname, "rb");
1680 handle = (void *)inbz2;
1681 frtype = FR_LIBBZ2;
1682 }
1683 else
1684 #endif
1685
1686 /* Otherwise use plain fopen(). The label is so that we can come back here if
1687 an attempt to read a .bz2 file indicates that it really is a plain file. */
1688
1689 #ifdef SUPPORT_LIBBZ2
1690 PLAIN_FILE:
1691 #endif
1692 {
1693 in = fopen(pathname, "rb");
1694 handle = (void *)in;
1695 frtype = FR_PLAIN;
1696 }
1697
1698 /* All the opening methods return errno when they fail. */
1699
1700 if (handle == NULL)
1701 {
1702 if (!silent)
1703 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1704 strerror(errno));
1705 return 2;
1706 }
1707
1708 /* Now grep the file */
1709
1710 rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1711 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1712
1713 /* Close in an appropriate manner. */
1714
1715 #ifdef SUPPORT_LIBZ
1716 if (frtype == FR_LIBZ)
1717 gzclose(ingz);
1718 else
1719 #endif
1720
1721 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1722 read failed. If the error indicates that the file isn't in fact bzipped, try
1723 again as a normal file. */
1724
1725 #ifdef SUPPORT_LIBBZ2
1726 if (frtype == FR_LIBBZ2)
1727 {
1728 if (rc == 3)
1729 {
1730 int errnum;
1731 const char *err = BZ2_bzerror(inbz2, &errnum);
1732 if (errnum == BZ_DATA_ERROR_MAGIC)
1733 {
1734 BZ2_bzclose(inbz2);
1735 goto PLAIN_FILE;
1736 }
1737 else if (!silent)
1738 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1739 pathname, err);
1740 rc = 2; /* The normal "something went wrong" code */
1741 }
1742 BZ2_bzclose(inbz2);
1743 }
1744 else
1745 #endif
1746
1747 /* Normal file close */
1748
1749 fclose(in);
1750
1751 /* Pass back the yield from pcregrep(). */
1752
1753 return rc;
1754 }
1755
1756
1757
1758
1759 /*************************************************
1760 * Usage function *
1761 *************************************************/
1762
1763 static int
1764 usage(int rc)
1765 {
1766 option_item *op;
1767 fprintf(stderr, "Usage: pcregrep [-");
1768 for (op = optionlist; op->one_char != 0; op++)
1769 {
1770 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1771 }
1772 fprintf(stderr, "] [long options] [pattern] [files]\n");
1773 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1774 "options.\n");
1775 return rc;
1776 }
1777
1778
1779
1780
1781 /*************************************************
1782 * Help function *
1783 *************************************************/
1784
1785 static void
1786 help(void)
1787 {
1788 option_item *op;
1789
1790 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1791 printf("Search for PATTERN in each FILE or standard input.\n");
1792 printf("PATTERN must be present if neither -e nor -f is used.\n");
1793 printf("\"-\" can be used as a file name to mean STDIN.\n");
1794
1795 #ifdef SUPPORT_LIBZ
1796 printf("Files whose names end in .gz are read using zlib.\n");
1797 #endif
1798
1799 #ifdef SUPPORT_LIBBZ2
1800 printf("Files whose names end in .bz2 are read using bzlib2.\n");
1801 #endif
1802
1803 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1804 printf("Other files and the standard input are read as plain files.\n\n");
1805 #else
1806 printf("All files are read as plain files, without any interpretation.\n\n");
1807 #endif
1808
1809 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1810 printf("Options:\n");
1811
1812 for (op = optionlist; op->one_char != 0; op++)
1813 {
1814 int n;
1815 char s[4];
1816
1817 /* Two options were accidentally implemented and documented with underscores
1818 instead of hyphens in their names, something that was not noticed for quite a
1819 few releases. When fixing this, I left the underscored versions in the list
1820 in case people were using them. However, we don't want to display them in the
1821 help data. There are no other options that contain underscores, and we do not
1822 expect ever to implement such options. Therefore, just omit any option that
1823 contains an underscore. */
1824
1825 if (strchr(op->long_name, '_') != NULL) continue;
1826
1827 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1828 n = 31 - printf(" %s --%s", s, op->long_name);
1829 if (n < 1) n = 1;
1830 printf("%.*s%s\n", n, " ", op->help_text);
1831 }
1832
1833 printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
1834 printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
1835 printf("When reading patterns from a file instead of using a command line option,\n");
1836 printf("trailing white space is removed and blank lines are ignored.\n");
1837 printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",
1838 MAX_PATTERN_COUNT, PATBUFSIZE);
1839
1840 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1841 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1842 }
1843
1844
1845
1846
1847 /*************************************************
1848 * Handle a single-letter, no data option *
1849 *************************************************/
1850
1851 static int
1852 handle_option(int letter, int options)
1853 {
1854 switch(letter)
1855 {
1856 case N_FOFFSETS: file_offsets = TRUE; break;
1857 case N_HELP: help(); pcregrep_exit(0);
1858 case N_LOFFSETS: line_offsets = number = TRUE; break;
1859 case N_LBUFFER: line_buffered = TRUE; break;
1860 case 'c': count_only = TRUE; break;
1861 case 'F': process_options |= PO_FIXED_STRINGS; break;
1862 case 'H': filenames = FN_FORCE; break;
1863 case 'h': filenames = FN_NONE; break;
1864 case 'i': options |= PCRE_CASELESS; break;
1865 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1866 case 'L': filenames = FN_NOMATCH_ONLY; break;
1867 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1868 case 'n': number = TRUE; break;
1869 case 'o': only_matching = 0; break;
1870 case 'q': quiet = TRUE; break;
1871 case 'r': dee_action = dee_RECURSE; break;
1872 case 's': silent = TRUE; break;
1873 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1874 case 'v': invert = TRUE; break;
1875 case 'w': process_options |= PO_WORD_MATCH; break;
1876 case 'x': process_options |= PO_LINE_MATCH; break;
1877
1878 case 'V':
1879 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1880 pcregrep_exit(0);
1881 break;
1882
1883 default:
1884 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1885 pcregrep_exit(usage(2));
1886 }
1887
1888 return options;
1889 }
1890
1891
1892
1893
1894 /*************************************************
1895 * Construct printed ordinal *
1896 *************************************************/
1897
1898 /* This turns a number into "1st", "3rd", etc. */
1899
1900 static char *
1901 ordin(int n)
1902 {
1903 static char buffer[8];
1904 char *p = buffer;
1905 sprintf(p, "%d", n);
1906 while (*p != 0) p++;
1907 switch (n%10)
1908 {
1909 case 1: strcpy(p, "st"); break;
1910 case 2: strcpy(p, "nd"); break;
1911 case 3: strcpy(p, "rd"); break;
1912 default: strcpy(p, "th"); break;
1913 }
1914 return buffer;
1915 }
1916
1917
1918
1919 /*************************************************
1920 * Compile a single pattern *
1921 *************************************************/
1922
1923 /* When the -F option has been used, this is called for each substring.
1924 Otherwise it's called for each supplied pattern.
1925
1926 Arguments:
1927 pattern the pattern string
1928 options the PCRE options
1929 filename the file name, or NULL for a command-line pattern
1930 count 0 if this is the only command line pattern, or
1931 number of the command line pattern, or
1932 linenumber for a pattern from a file
1933
1934 Returns: TRUE on success, FALSE after an error
1935 */
1936
1937 static BOOL
1938 compile_single_pattern(char *pattern, int options, char *filename, int count)
1939 {
1940 char buffer[PATBUFSIZE];
1941 const char *error;
1942 int errptr;
1943
1944 if (pattern_count >= MAX_PATTERN_COUNT)
1945 {
1946 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1947 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1948 return FALSE;
1949 }
1950
1951 sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,
1952 suffix[process_options]);
1953 pattern_list[pattern_count] =
1954 pcre_compile(buffer, options, &error, &errptr, pcretables);
1955 if (pattern_list[pattern_count] != NULL)
1956 {
1957 pattern_count++;
1958 return TRUE;
1959 }
1960
1961 /* Handle compile errors */
1962
1963 errptr -= (int)strlen(prefix[process_options]);
1964 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1965
1966 if (filename == NULL)
1967 {
1968 if (count == 0)
1969 fprintf(stderr, "pcregrep: Error in command-line regex "
1970 "at offset %d: %s\n", errptr, error);
1971 else
1972 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1973 "at offset %d: %s\n", ordin(count), errptr, error);
1974 }
1975 else
1976 {
1977 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1978 "at offset %d: %s\n", count, filename, errptr, error);
1979 }
1980
1981 return FALSE;
1982 }
1983
1984
1985
1986 /*************************************************
1987 * Compile one supplied pattern *
1988 *************************************************/
1989
1990 /* When the -F option has been used, each string may be a list of strings,
1991 separated by line breaks. They will be matched literally.
1992
1993 Arguments:
1994 pattern the pattern string
1995 options the PCRE options
1996 filename the file name, or NULL for a command-line pattern
1997 count 0 if this is the only command line pattern, or
1998 number of the command line pattern, or
1999 linenumber for a pattern from a file
2000
2001 Returns: TRUE on success, FALSE after an error
2002 */
2003
2004 static BOOL
2005 compile_pattern(char *pattern, int options, char *filename, int count)
2006 {
2007 if ((process_options & PO_FIXED_STRINGS) != 0)
2008 {
2009 char *eop = pattern + strlen(pattern);
2010 char buffer[PATBUFSIZE];
2011 for(;;)
2012 {
2013 int ellength;
2014 char *p = end_of_line(pattern, eop, &ellength);
2015 if (ellength == 0)
2016 return compile_single_pattern(pattern, options, filename, count);
2017 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
2018 pattern = p;
2019 if (!compile_single_pattern(buffer, options, filename, count))
2020 return FALSE;
2021 }
2022 }
2023 else return compile_single_pattern(pattern, options, filename, count);
2024 }
2025
2026
2027
2028 /*************************************************
2029 * Main program *
2030 *************************************************/
2031
2032 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2033
2034 int
2035 main(int argc, char **argv)
2036 {
2037 int i, j;
2038 int rc = 1;
2039 int pcre_options = 0;
2040 int cmd_pattern_count = 0;
2041 int hint_count = 0;
2042 int errptr;
2043 BOOL only_one_at_top;
2044 char *patterns[MAX_PATTERN_COUNT];
2045 const char *locale_from = "--locale";
2046 const char *error;
2047
2048 /* Set the default line ending value from the default in the PCRE library;
2049 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2050 Note that the return values from pcre_config(), though derived from the ASCII
2051 codes, are the same in EBCDIC environments, so we must use the actual values
2052 rather than escapes such as as '\r'. */
2053
2054 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2055 switch(i)
2056 {
2057 default: newline = (char *)"lf"; break;
2058 case 13: newline = (char *)"cr"; break;
2059 case (13 << 8) | 10: newline = (char *)"crlf"; break;
2060 case -1: newline = (char *)"any"; break;
2061 case -2: newline = (char *)"anycrlf"; break;
2062 }
2063
2064 /* Process the options */
2065
2066 for (i = 1; i < argc; i++)
2067 {
2068 option_item *op = NULL;
2069 char *option_data = (char *)""; /* default to keep compiler happy */
2070 BOOL longop;
2071 BOOL longopwasequals = FALSE;
2072
2073 if (argv[i][0] != '-') break;
2074
2075 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2076 but only if we have previously had -e or -f to define the patterns. */
2077
2078 if (argv[i][1] == 0)
2079 {
2080 if (pattern_filename != NULL || pattern_count > 0) break;
2081 else pcregrep_exit(usage(2));
2082 }
2083
2084 /* Handle a long name option, or -- to terminate the options */
2085
2086 if (argv[i][1] == '-')
2087 {
2088 char *arg = argv[i] + 2;
2089 char *argequals = strchr(arg, '=');
2090
2091 if (*arg == 0) /* -- terminates options */
2092 {
2093 i++;
2094 break; /* out of the options-handling loop */
2095 }
2096
2097 longop = TRUE;
2098
2099 /* Some long options have data that follows after =, for example file=name.
2100 Some options have variations in the long name spelling: specifically, we
2101 allow "regexp" because GNU grep allows it, though I personally go along
2102 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2103 These options are entered in the table as "regex(p)". Options can be in
2104 both these categories. */
2105
2106 for (op = optionlist; op->one_char != 0; op++)
2107 {
2108 char *opbra = strchr(op->long_name, '(');
2109 char *equals = strchr(op->long_name, '=');
2110
2111 /* Handle options with only one spelling of the name */
2112
2113 if (opbra == NULL) /* Does not contain '(' */
2114 {
2115 if (equals == NULL) /* Not thing=data case */
2116 {
2117 if (strcmp(arg, op->long_name) == 0) break;
2118 }
2119 else /* Special case xxx=data */
2120 {
2121 int oplen = (int)(equals - op->long_name);
2122 int arglen = (argequals == NULL)?
2123 (int)strlen(arg) : (int)(argequals - arg);
2124 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2125 {
2126 option_data = arg + arglen;
2127 if (*option_data == '=')
2128 {
2129 option_data++;
2130 longopwasequals = TRUE;
2131 }
2132 break;
2133 }
2134 }
2135 }
2136
2137 /* Handle options with an alternate spelling of the name */
2138
2139 else
2140 {
2141 char buff1[24];
2142 char buff2[24];
2143
2144 int baselen = (int)(opbra - op->long_name);
2145 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2146 int arglen = (argequals == NULL || equals == NULL)?
2147 (int)strlen(arg) : (int)(argequals - arg);
2148
2149 sprintf(buff1, "%.*s", baselen, op->long_name);
2150 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2151
2152 if (strncmp(arg, buff1, arglen) == 0 ||
2153 strncmp(arg, buff2, arglen) == 0)
2154 {
2155 if (equals != NULL && argequals != NULL)
2156 {
2157 option_data = argequals;
2158 if (*option_data == '=')
2159 {
2160 option_data++;
2161 longopwasequals = TRUE;
2162 }
2163 }
2164 break;
2165 }
2166 }
2167 }
2168
2169 if (op->one_char == 0)
2170 {
2171 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2172 pcregrep_exit(usage(2));
2173 }
2174 }
2175
2176 /* Jeffrey Friedl's debugging harness uses these additional options which
2177 are not in the right form for putting in the option table because they use
2178 only one hyphen, yet are more than one character long. By putting them
2179 separately here, they will not get displayed as part of the help() output,
2180 but I don't think Jeffrey will care about that. */
2181
2182 #ifdef JFRIEDL_DEBUG
2183 else if (strcmp(argv[i], "-pre") == 0) {
2184 jfriedl_prefix = argv[++i];
2185 continue;
2186 } else if (strcmp(argv[i], "-post") == 0) {
2187 jfriedl_postfix = argv[++i];
2188 continue;
2189 } else if (strcmp(argv[i], "-XT") == 0) {
2190 sscanf(argv[++i], "%d", &jfriedl_XT);
2191 continue;
2192 } else if (strcmp(argv[i], "-XR") == 0) {
2193 sscanf(argv[++i], "%d", &jfriedl_XR);
2194 continue;
2195 }
2196 #endif
2197
2198
2199 /* One-char options; many that have no data may be in a single argument; we
2200 continue till we hit the last one or one that needs data. */
2201
2202 else
2203 {
2204 char *s = argv[i] + 1;
2205 longop = FALSE;
2206 while (*s != 0)
2207 {
2208 for (op = optionlist; op->one_char != 0; op++)
2209 {
2210 if (*s == op->one_char) break;
2211 }
2212 if (op->one_char == 0)
2213 {
2214 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2215 *s, argv[i]);
2216 pcregrep_exit(usage(2));
2217 }
2218
2219 /* Check for a single-character option that has data: OP_OP_NUMBER
2220 is used for one that either has a numerical number or defaults, i.e. the
2221 data is optional. If a digit follows, there is data; if not, carry on
2222 with other single-character options in the same string. */
2223
2224 option_data = s+1;
2225 if (op->type == OP_OP_NUMBER)
2226 {
2227 if (isdigit((unsigned char)s[1])) break;
2228 }
2229 else /* Check for end or a dataless option */
2230 {
2231 if (op->type != OP_NODATA || s[1] == 0) break;
2232 }
2233
2234 /* Handle a single-character option with no data, then loop for the
2235 next character in the string. */
2236
2237 pcre_options = handle_option(*s++, pcre_options);
2238 }
2239 }
2240
2241 /* At this point we should have op pointing to a matched option. If the type
2242 is NO_DATA, it means that there is no data, and the option might set
2243 something in the PCRE options. */
2244
2245 if (op->type == OP_NODATA)
2246 {
2247 pcre_options = handle_option(op->one_char, pcre_options);
2248 continue;
2249 }
2250
2251 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2252 either has a value or defaults to something. It cannot have data in a
2253 separate item. At the moment, the only such options are "colo(u)r",
2254 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2255
2256 if (*option_data == 0 &&
2257 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2258 {
2259 switch (op->one_char)
2260 {
2261 case N_COLOUR:
2262 colour_option = (char *)"auto";
2263 break;
2264
2265 case 'o':
2266 only_matching = 0;
2267 break;
2268
2269 #ifdef JFRIEDL_DEBUG
2270 case 'S':
2271 S_arg = 0;
2272 break;
2273 #endif
2274 }
2275 continue;
2276 }
2277
2278 /* Otherwise, find the data string for the option. */
2279
2280 if (*option_data == 0)
2281 {
2282 if (i >= argc - 1 || longopwasequals)
2283 {
2284 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2285 pcregrep_exit(usage(2));
2286 }
2287 option_data = argv[++i];
2288 }
2289
2290 /* If the option type is OP_PATLIST, it's the -e option, which can be called
2291 multiple times to create a list of patterns. */
2292
2293 if (op->type == OP_PATLIST)
2294 {
2295 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2296 {
2297 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2298 MAX_PATTERN_COUNT);
2299 return 2;
2300 }
2301 patterns[cmd_pattern_count++] = option_data;
2302 }
2303
2304 /* Otherwise, deal with single string or numeric data values. */
2305
2306 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2307 op->type != OP_OP_NUMBER)
2308 {
2309 *((char **)op->dataptr) = option_data;
2310 }
2311
2312 /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2313 only for unpicking arguments, so just keep it simple. */
2314
2315 else
2316 {
2317 unsigned long int n = 0;
2318 char *endptr = option_data;
2319 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2320 while (isdigit((unsigned char)(*endptr)))
2321 n = n * 10 + (int)(*endptr++ - '0');
2322 if (toupper(*endptr) == 'K')
2323 {
2324 n *= 1024;
2325 endptr++;
2326 }
2327 else if (toupper(*endptr) == 'M')
2328 {
2329 n *= 1024*1024;
2330 endptr++;
2331 }
2332 if (*endptr != 0)
2333 {
2334 if (longop)
2335 {
2336 char *equals = strchr(op->long_name, '=');
2337 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2338 (int)(equals - op->long_name);
2339 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2340 option_data, nlen, op->long_name);
2341 }
2342 else
2343 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2344 option_data, op->one_char);
2345 pcregrep_exit(usage(2));
2346 }
2347 if (op->type == OP_LONGNUMBER)
2348 *((unsigned long int *)op->dataptr) = n;
2349 else
2350 *((int *)op->dataptr) = n;
2351 }
2352 }
2353
2354 /* Options have been decoded. If -C was used, its value is used as a default
2355 for -A and -B. */
2356
2357 if (both_context > 0)
2358 {
2359 if (after_context == 0) after_context = both_context;
2360 if (before_context == 0) before_context = both_context;
2361 }
2362
2363 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2364 However, the latter two set only_matching. */
2365
2366 if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2367 (file_offsets && line_offsets))
2368 {
2369 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2370 "and/or --line-offsets\n");
2371 pcregrep_exit(usage(2));
2372 }
2373
2374 if (file_offsets || line_offsets) only_matching = 0;
2375
2376 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2377 LC_ALL environment variable is set, and if so, use it. */
2378
2379 if (locale == NULL)
2380 {
2381 locale = getenv("LC_ALL");
2382 locale_from = "LCC_ALL";
2383 }
2384
2385 if (locale == NULL)
2386 {
2387 locale = getenv("LC_CTYPE");
2388 locale_from = "LC_CTYPE";
2389 }
2390
2391 /* If a locale has been provided, set it, and generate the tables the PCRE
2392 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2393
2394 if (locale != NULL)
2395 {
2396 if (setlocale(LC_CTYPE, locale) == NULL)
2397 {
2398 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2399 locale, locale_from);
2400 return 2;
2401 }
2402 pcretables = pcre_maketables();
2403 }
2404
2405 /* Sort out colouring */
2406
2407 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2408 {
2409 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2410 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2411 else
2412 {
2413 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2414 colour_option);
2415 return 2;
2416 }
2417 if (do_colour)
2418 {
2419 char *cs = getenv("PCREGREP_COLOUR");
2420 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2421 if (cs != NULL) colour_string = cs;
2422 }
2423 }
2424
2425 /* Interpret the newline type; the default settings are Unix-like. */
2426
2427 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2428 {
2429 pcre_options |= PCRE_NEWLINE_CR;
2430 endlinetype = EL_CR;
2431 }
2432 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2433 {
2434 pcre_options |= PCRE_NEWLINE_LF;
2435 endlinetype = EL_LF;
2436 }
2437 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2438 {
2439 pcre_options |= PCRE_NEWLINE_CRLF;
2440 endlinetype = EL_CRLF;
2441 }
2442 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2443 {
2444 pcre_options |= PCRE_NEWLINE_ANY;
2445 endlinetype = EL_ANY;
2446 }
2447 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2448 {
2449 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2450 endlinetype = EL_ANYCRLF;
2451 }
2452 else
2453 {
2454 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2455 return 2;
2456 }
2457
2458 /* Interpret the text values for -d and -D */
2459
2460 if (dee_option != NULL)
2461 {
2462 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2463 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2464 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2465 else
2466 {
2467 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2468 return 2;
2469 }
2470 }
2471
2472 if (DEE_option != NULL)
2473 {
2474 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2475 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2476 else
2477 {
2478 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2479 return 2;
2480 }
2481 }
2482
2483 /* Check the values for Jeffrey Friedl's debugging options. */
2484
2485 #ifdef JFRIEDL_DEBUG
2486 if (S_arg > 9)
2487 {
2488 fprintf(stderr, "pcregrep: bad value for -S option\n");
2489 return 2;
2490 }
2491 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2492 {
2493 if (jfriedl_XT == 0) jfriedl_XT = 1;
2494 if (jfriedl_XR == 0) jfriedl_XR = 1;
2495 }
2496 #endif
2497
2498 /* Get memory for the main buffer, and to store the pattern and hints lists. */
2499
2500 bufsize = 3*bufthird;
2501 main_buffer = (char *)malloc(bufsize);
2502 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2503 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2504
2505 if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)
2506 {
2507 fprintf(stderr, "pcregrep: malloc failed\n");
2508 goto EXIT2;
2509 }
2510
2511 /* If no patterns were provided by -e, and there is no file provided by -f,
2512 the first argument is the one and only pattern, and it must exist. */
2513
2514 if (cmd_pattern_count == 0 && pattern_filename == NULL)
2515 {
2516 if (i >= argc) return usage(2);
2517 patterns[cmd_pattern_count++] = argv[i++];
2518 }
2519
2520 /* Compile the patterns that were provided on the command line, either by
2521 multiple uses of -e or as a single unkeyed pattern. */
2522
2523 for (j = 0; j < cmd_pattern_count; j++)
2524 {
2525 if (!compile_pattern(patterns[j], pcre_options, NULL,
2526 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2527 goto EXIT2;
2528 }
2529
2530 /* Compile the regular expressions that are provided in a file. */
2531
2532 if (pattern_filename != NULL)
2533 {
2534 int linenumber = 0;
2535 FILE *f;
2536 char *filename;
2537 char buffer[PATBUFSIZE];
2538
2539 if (strcmp(pattern_filename, "-") == 0)
2540 {
2541 f = stdin;
2542 filename = stdin_name;
2543 }
2544 else
2545 {
2546 f = fopen(pattern_filename, "r");
2547 if (f == NULL)
2548 {
2549 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2550 strerror(errno));
2551 goto EXIT2;
2552 }
2553 filename = pattern_filename;
2554 }
2555
2556 while (fgets(buffer, PATBUFSIZE, f) != NULL)
2557 {
2558 char *s = buffer + (int)strlen(buffer);
2559 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2560 *s = 0;
2561 linenumber++;
2562 if (buffer[0] == 0) continue; /* Skip blank lines */
2563 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2564 goto EXIT2;
2565 }
2566
2567 if (f != stdin) fclose(f);
2568 }
2569
2570 /* Study the regular expressions, as we will be running them many times */
2571
2572 for (j = 0; j < pattern_count; j++)
2573 {
2574 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2575 if (error != NULL)
2576 {
2577 char s[16];
2578 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2579 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2580 goto EXIT2;
2581 }
2582 hint_count++;
2583 }
2584
2585 /* If --match-limit or --recursion-limit was set, put the value(s) into the
2586 pcre_extra block for each pattern. */
2587
2588 if (match_limit > 0 || match_limit_recursion > 0)
2589 {
2590 for (j = 0; j < pattern_count; j++)
2591 {
2592 if (hints_list[j] == NULL)
2593 {
2594 hints_list[j] = malloc(sizeof(pcre_extra));
2595 if (hints_list[j] == NULL)
2596 {
2597 fprintf(stderr, "pcregrep: malloc failed\n");
2598 pcregrep_exit(2);
2599 }
2600 }
2601 if (match_limit > 0)
2602 {
2603 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2604 hints_list[j]->match_limit = match_limit;
2605 }
2606 if (match_limit_recursion > 0)
2607 {
2608 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2609 hints_list[j]->match_limit_recursion = match_limit_recursion;
2610 }
2611 }
2612 }
2613
2614 /* If there are include or exclude patterns, compile them. */
2615
2616 if (exclude_pattern != NULL)
2617 {
2618 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2619 pcretables);
2620 if (exclude_compiled == NULL)
2621 {
2622 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2623 errptr, error);
2624 goto EXIT2;
2625 }
2626 }
2627
2628 if (include_pattern != NULL)
2629 {
2630 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2631 pcretables);
2632 if (include_compiled == NULL)
2633 {
2634 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2635 errptr, error);
2636 goto EXIT2;
2637 }
2638 }
2639
2640 if (exclude_dir_pattern != NULL)
2641 {
2642 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2643 pcretables);
2644 if (exclude_dir_compiled == NULL)
2645 {
2646 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2647 errptr, error);
2648 goto EXIT2;
2649 }
2650 }
2651
2652 if (include_dir_pattern != NULL)
2653 {
2654 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2655 pcretables);
2656 if (include_dir_compiled == NULL)
2657 {
2658 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2659 errptr, error);
2660 goto EXIT2;
2661 }
2662 }
2663
2664 /* If there are no further arguments, do the business on stdin and exit. */
2665
2666 if (i >= argc)
2667 {
2668 rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2669 (filenames > FN_DEFAULT)? stdin_name : NULL);
2670 goto EXIT;
2671 }
2672
2673 /* Otherwise, work through the remaining arguments as files or directories.
2674 Pass in the fact that there is only one argument at top level - this suppresses
2675 the file name if the argument is not a directory and filenames are not
2676 otherwise forced. */
2677
2678 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2679
2680 for (; i < argc; i++)
2681 {
2682 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2683 only_one_at_top);
2684 if (frc > 1) rc = frc;
2685 else if (frc == 0 && rc == 1) rc = 0;
2686 }
2687
2688 EXIT:
2689 if (main_buffer != NULL) free(main_buffer);
2690 if (pattern_list != NULL)
2691 {
2692 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2693 free(pattern_list);
2694 }
2695 if (hints_list != NULL)
2696 {
2697 for (i = 0; i < hint_count; i++)
2698 {
2699 if (hints_list[i] != NULL) free(hints_list[i]);
2700 }
2701 free(hints_list);
2702 }
2703 pcregrep_exit(rc);
2704
2705 EXIT2:
2706 rc = 2;
2707 goto EXIT;
2708 }
2709
2710 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5