/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 561 - (show annotations)
Sat Oct 30 18:37:47 2010 UTC (8 years, 10 months ago) by ph10
File MIME type: text/plain
File size: 74526 byte(s)
Error occurred while calculating annotation data.
Added --match-limit and --recursion-limit to pcregrep; tidied some error 
messages.
1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2010 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65
66 #include "pcre.h"
67
68 #define FALSE 0
69 #define TRUE 1
70
71 typedef int BOOL;
72
73 #define MAX_PATTERN_COUNT 100
74 #define OFFSET_SIZE 99
75
76 #if BUFSIZ > 8192
77 #define MBUFTHIRD BUFSIZ
78 #else
79 #define MBUFTHIRD 8192
80 #endif
81
82 /* Values for the "filenames" variable, which specifies options for file name
83 output. The order is important; it is assumed that a file name is wanted for
84 all values greater than FN_DEFAULT. */
85
86 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87
88 /* File reading styles */
89
90 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91
92 /* Actions for the -d and -D options */
93
94 enum { dee_READ, dee_SKIP, dee_RECURSE };
95 enum { DEE_READ, DEE_SKIP };
96
97 /* Actions for special processing options (flag bits) */
98
99 #define PO_WORD_MATCH 0x0001
100 #define PO_LINE_MATCH 0x0002
101 #define PO_FIXED_STRINGS 0x0004
102
103 /* Line ending types */
104
105 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106
107 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108 environments), a warning is issued if the value of fwrite() is ignored.
109 Unfortunately, casting to (void) does not suppress the warning. To get round
110 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 apply to fprintf(). */
112
113 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114
115
116
117 /*************************************************
118 * Global variables *
119 *************************************************/
120
121 /* Jeffrey Friedl has some debugging requirements that are not part of the
122 regular code. */
123
124 #ifdef JFRIEDL_DEBUG
125 static int S_arg = -1;
126 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128 static const char *jfriedl_prefix = "";
129 static const char *jfriedl_postfix = "";
130 #endif
131
132 static int endlinetype;
133
134 static char *colour_string = (char *)"1;31";
135 static char *colour_option = NULL;
136 static char *dee_option = NULL;
137 static char *DEE_option = NULL;
138 static char *newline = NULL;
139 static char *pattern_filename = NULL;
140 static char *stdin_name = (char *)"(standard input)";
141 static char *locale = NULL;
142
143 static const unsigned char *pcretables = NULL;
144
145 static int pattern_count = 0;
146 static pcre **pattern_list = NULL;
147 static pcre_extra **hints_list = NULL;
148
149 static char *include_pattern = NULL;
150 static char *exclude_pattern = NULL;
151 static char *include_dir_pattern = NULL;
152 static char *exclude_dir_pattern = NULL;
153
154 static pcre *include_compiled = NULL;
155 static pcre *exclude_compiled = NULL;
156 static pcre *include_dir_compiled = NULL;
157 static pcre *exclude_dir_compiled = NULL;
158
159 static int after_context = 0;
160 static int before_context = 0;
161 static int both_context = 0;
162 static int dee_action = dee_READ;
163 static int DEE_action = DEE_READ;
164 static int error_count = 0;
165 static int filenames = FN_DEFAULT;
166 static int process_options = 0;
167
168 static unsigned long int match_limit = 0;
169 static unsigned long int match_limit_recursion = 0;
170
171 static BOOL count_only = FALSE;
172 static BOOL do_colour = FALSE;
173 static BOOL file_offsets = FALSE;
174 static BOOL hyphenpending = FALSE;
175 static BOOL invert = FALSE;
176 static BOOL line_buffered = FALSE;
177 static BOOL line_offsets = FALSE;
178 static BOOL multiline = FALSE;
179 static BOOL number = FALSE;
180 static BOOL omit_zero_count = FALSE;
181 static BOOL only_matching = FALSE;
182 static BOOL resource_error = FALSE;
183 static BOOL quiet = FALSE;
184 static BOOL silent = FALSE;
185 static BOOL utf8 = FALSE;
186
187 /* Structure for options and list of them */
188
189 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
190 OP_PATLIST };
191
192 typedef struct option_item {
193 int type;
194 int one_char;
195 void *dataptr;
196 const char *long_name;
197 const char *help_text;
198 } option_item;
199
200 /* Options without a single-letter equivalent get a negative value. This can be
201 used to identify them. */
202
203 #define N_COLOUR (-1)
204 #define N_EXCLUDE (-2)
205 #define N_EXCLUDE_DIR (-3)
206 #define N_HELP (-4)
207 #define N_INCLUDE (-5)
208 #define N_INCLUDE_DIR (-6)
209 #define N_LABEL (-7)
210 #define N_LOCALE (-8)
211 #define N_NULL (-9)
212 #define N_LOFFSETS (-10)
213 #define N_FOFFSETS (-11)
214 #define N_LBUFFER (-12)
215 #define N_M_LIMIT (-13)
216 #define N_M_LIMIT_REC (-14)
217
218 static option_item optionlist[] = {
219 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
220 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
221 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
222 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
223 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
224 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
225 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
226 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
227 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
228 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
229 { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
230 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
231 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
232 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
233 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
234 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
235 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
236 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
237 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
238 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
239 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
240 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
241 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
242 { OP_NUMBER, N_M_LIMIT,&match_limit, "match-limit=number", "set PCRE match limit option" },
243 { OP_NUMBER, N_M_LIMIT_REC,&match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
244 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
245 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
246 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
247 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
248 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
249 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
250 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
251 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
252 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
253 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
254 #ifdef JFRIEDL_DEBUG
255 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
256 #endif
257 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
258 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
259 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
260 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
261 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
262 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
263 { OP_NODATA, 0, NULL, NULL, NULL }
264 };
265
266 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
267 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
268 that the combination of -w and -x has the same effect as -x on its own, so we
269 can treat them as the same. */
270
271 static const char *prefix[] = {
272 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
273
274 static const char *suffix[] = {
275 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
276
277 /* UTF-8 tables - used only when the newline setting is "any". */
278
279 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
280
281 const char utf8_table4[] = {
282 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
283 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
284 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
285 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
286
287
288
289 /*************************************************
290 * OS-specific functions *
291 *************************************************/
292
293 /* These functions are defined so that they can be made system specific,
294 although at present the only ones are for Unix, Win32, and for "no support". */
295
296
297 /************* Directory scanning in Unix ***********/
298
299 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
300 #include <sys/types.h>
301 #include <sys/stat.h>
302 #include <dirent.h>
303
304 typedef DIR directory_type;
305
306 static int
307 isdirectory(char *filename)
308 {
309 struct stat statbuf;
310 if (stat(filename, &statbuf) < 0)
311 return 0; /* In the expectation that opening as a file will fail */
312 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
313 }
314
315 static directory_type *
316 opendirectory(char *filename)
317 {
318 return opendir(filename);
319 }
320
321 static char *
322 readdirectory(directory_type *dir)
323 {
324 for (;;)
325 {
326 struct dirent *dent = readdir(dir);
327 if (dent == NULL) return NULL;
328 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
329 return dent->d_name;
330 }
331 /* Control never reaches here */
332 }
333
334 static void
335 closedirectory(directory_type *dir)
336 {
337 closedir(dir);
338 }
339
340
341 /************* Test for regular file in Unix **********/
342
343 static int
344 isregfile(char *filename)
345 {
346 struct stat statbuf;
347 if (stat(filename, &statbuf) < 0)
348 return 1; /* In the expectation that opening as a file will fail */
349 return (statbuf.st_mode & S_IFMT) == S_IFREG;
350 }
351
352
353 /************* Test for a terminal in Unix **********/
354
355 static BOOL
356 is_stdout_tty(void)
357 {
358 return isatty(fileno(stdout));
359 }
360
361 static BOOL
362 is_file_tty(FILE *f)
363 {
364 return isatty(fileno(f));
365 }
366
367
368 /************* Directory scanning in Win32 ***********/
369
370 /* I (Philip Hazel) have no means of testing this code. It was contributed by
371 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
372 when it did not exist. David Byron added a patch that moved the #include of
373 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
374 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
375 undefined when it is indeed undefined. */
376
377 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
378
379 #ifndef STRICT
380 # define STRICT
381 #endif
382 #ifndef WIN32_LEAN_AND_MEAN
383 # define WIN32_LEAN_AND_MEAN
384 #endif
385
386 #include <windows.h>
387
388 #ifndef INVALID_FILE_ATTRIBUTES
389 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
390 #endif
391
392 typedef struct directory_type
393 {
394 HANDLE handle;
395 BOOL first;
396 WIN32_FIND_DATA data;
397 } directory_type;
398
399 int
400 isdirectory(char *filename)
401 {
402 DWORD attr = GetFileAttributes(filename);
403 if (attr == INVALID_FILE_ATTRIBUTES)
404 return 0;
405 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
406 }
407
408 directory_type *
409 opendirectory(char *filename)
410 {
411 size_t len;
412 char *pattern;
413 directory_type *dir;
414 DWORD err;
415 len = strlen(filename);
416 pattern = (char *) malloc(len + 3);
417 dir = (directory_type *) malloc(sizeof(*dir));
418 if ((pattern == NULL) || (dir == NULL))
419 {
420 fprintf(stderr, "pcregrep: malloc failed\n");
421 pcregrep_exit(2);
422 }
423 memcpy(pattern, filename, len);
424 memcpy(&(pattern[len]), "\\*", 3);
425 dir->handle = FindFirstFile(pattern, &(dir->data));
426 if (dir->handle != INVALID_HANDLE_VALUE)
427 {
428 free(pattern);
429 dir->first = TRUE;
430 return dir;
431 }
432 err = GetLastError();
433 free(pattern);
434 free(dir);
435 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
436 return NULL;
437 }
438
439 char *
440 readdirectory(directory_type *dir)
441 {
442 for (;;)
443 {
444 if (!dir->first)
445 {
446 if (!FindNextFile(dir->handle, &(dir->data)))
447 return NULL;
448 }
449 else
450 {
451 dir->first = FALSE;
452 }
453 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
454 return dir->data.cFileName;
455 }
456 #ifndef _MSC_VER
457 return NULL; /* Keep compiler happy; never executed */
458 #endif
459 }
460
461 void
462 closedirectory(directory_type *dir)
463 {
464 FindClose(dir->handle);
465 free(dir);
466 }
467
468
469 /************* Test for regular file in Win32 **********/
470
471 /* I don't know how to do this, or if it can be done; assume all paths are
472 regular if they are not directories. */
473
474 int isregfile(char *filename)
475 {
476 return !isdirectory(filename);
477 }
478
479
480 /************* Test for a terminal in Win32 **********/
481
482 /* I don't know how to do this; assume never */
483
484 static BOOL
485 is_stdout_tty(void)
486 {
487 return FALSE;
488 }
489
490 static BOOL
491 is_file_tty(FILE *f)
492 {
493 return FALSE;
494 }
495
496
497 /************* Directory scanning when we can't do it ***********/
498
499 /* The type is void, and apart from isdirectory(), the functions do nothing. */
500
501 #else
502
503 typedef void directory_type;
504
505 int isdirectory(char *filename) { return 0; }
506 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
507 char *readdirectory(directory_type *dir) { return (char*)0;}
508 void closedirectory(directory_type *dir) {}
509
510
511 /************* Test for regular when we can't do it **********/
512
513 /* Assume all files are regular. */
514
515 int isregfile(char *filename) { return 1; }
516
517
518 /************* Test for a terminal when we can't do it **********/
519
520 static BOOL
521 is_stdout_tty(void)
522 {
523 return FALSE;
524 }
525
526 static BOOL
527 is_file_tty(FILE *f)
528 {
529 return FALSE;
530 }
531
532 #endif
533
534
535
536 #ifndef HAVE_STRERROR
537 /*************************************************
538 * Provide strerror() for non-ANSI libraries *
539 *************************************************/
540
541 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
542 in their libraries, but can provide the same facility by this simple
543 alternative function. */
544
545 extern int sys_nerr;
546 extern char *sys_errlist[];
547
548 char *
549 strerror(int n)
550 {
551 if (n < 0 || n >= sys_nerr) return "unknown error number";
552 return sys_errlist[n];
553 }
554 #endif /* HAVE_STRERROR */
555
556
557
558 /*************************************************
559 * Exit from the program *
560 *************************************************/
561
562 /* If there has been a resource error, give a suitable message.
563
564 Argument: the return code
565 Returns: does not return
566 */
567
568 static void
569 pcregrep_exit(int rc)
570 {
571 if (resource_error)
572 {
573 fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
574 "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
575 fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
576 }
577
578 exit(rc);
579 }
580
581
582
583 /*************************************************
584 * Read one line of input *
585 *************************************************/
586
587 /* Normally, input is read using fread() into a large buffer, so many lines may
588 be read at once. However, doing this for tty input means that no output appears
589 until a lot of input has been typed. Instead, tty input is handled line by
590 line. We cannot use fgets() for this, because it does not stop at a binary
591 zero, and therefore there is no way of telling how many characters it has read,
592 because there may be binary zeros embedded in the data.
593
594 Arguments:
595 buffer the buffer to read into
596 length the maximum number of characters to read
597 f the file
598
599 Returns: the number of characters read, zero at end of file
600 */
601
602 static int
603 read_one_line(char *buffer, int length, FILE *f)
604 {
605 int c;
606 int yield = 0;
607 while ((c = fgetc(f)) != EOF)
608 {
609 buffer[yield++] = c;
610 if (c == '\n' || yield >= length) break;
611 }
612 return yield;
613 }
614
615
616
617 /*************************************************
618 * Find end of line *
619 *************************************************/
620
621 /* The length of the endline sequence that is found is set via lenptr. This may
622 be zero at the very end of the file if there is no line-ending sequence there.
623
624 Arguments:
625 p current position in line
626 endptr end of available data
627 lenptr where to put the length of the eol sequence
628
629 Returns: pointer to the last byte of the line
630 */
631
632 static char *
633 end_of_line(char *p, char *endptr, int *lenptr)
634 {
635 switch(endlinetype)
636 {
637 default: /* Just in case */
638 case EL_LF:
639 while (p < endptr && *p != '\n') p++;
640 if (p < endptr)
641 {
642 *lenptr = 1;
643 return p + 1;
644 }
645 *lenptr = 0;
646 return endptr;
647
648 case EL_CR:
649 while (p < endptr && *p != '\r') p++;
650 if (p < endptr)
651 {
652 *lenptr = 1;
653 return p + 1;
654 }
655 *lenptr = 0;
656 return endptr;
657
658 case EL_CRLF:
659 for (;;)
660 {
661 while (p < endptr && *p != '\r') p++;
662 if (++p >= endptr)
663 {
664 *lenptr = 0;
665 return endptr;
666 }
667 if (*p == '\n')
668 {
669 *lenptr = 2;
670 return p + 1;
671 }
672 }
673 break;
674
675 case EL_ANYCRLF:
676 while (p < endptr)
677 {
678 int extra = 0;
679 register int c = *((unsigned char *)p);
680
681 if (utf8 && c >= 0xc0)
682 {
683 int gcii, gcss;
684 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
685 gcss = 6*extra;
686 c = (c & utf8_table3[extra]) << gcss;
687 for (gcii = 1; gcii <= extra; gcii++)
688 {
689 gcss -= 6;
690 c |= (p[gcii] & 0x3f) << gcss;
691 }
692 }
693
694 p += 1 + extra;
695
696 switch (c)
697 {
698 case 0x0a: /* LF */
699 *lenptr = 1;
700 return p;
701
702 case 0x0d: /* CR */
703 if (p < endptr && *p == 0x0a)
704 {
705 *lenptr = 2;
706 p++;
707 }
708 else *lenptr = 1;
709 return p;
710
711 default:
712 break;
713 }
714 } /* End of loop for ANYCRLF case */
715
716 *lenptr = 0; /* Must have hit the end */
717 return endptr;
718
719 case EL_ANY:
720 while (p < endptr)
721 {
722 int extra = 0;
723 register int c = *((unsigned char *)p);
724
725 if (utf8 && c >= 0xc0)
726 {
727 int gcii, gcss;
728 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
729 gcss = 6*extra;
730 c = (c & utf8_table3[extra]) << gcss;
731 for (gcii = 1; gcii <= extra; gcii++)
732 {
733 gcss -= 6;
734 c |= (p[gcii] & 0x3f) << gcss;
735 }
736 }
737
738 p += 1 + extra;
739
740 switch (c)
741 {
742 case 0x0a: /* LF */
743 case 0x0b: /* VT */
744 case 0x0c: /* FF */
745 *lenptr = 1;
746 return p;
747
748 case 0x0d: /* CR */
749 if (p < endptr && *p == 0x0a)
750 {
751 *lenptr = 2;
752 p++;
753 }
754 else *lenptr = 1;
755 return p;
756
757 case 0x85: /* NEL */
758 *lenptr = utf8? 2 : 1;
759 return p;
760
761 case 0x2028: /* LS */
762 case 0x2029: /* PS */
763 *lenptr = 3;
764 return p;
765
766 default:
767 break;
768 }
769 } /* End of loop for ANY case */
770
771 *lenptr = 0; /* Must have hit the end */
772 return endptr;
773 } /* End of overall switch */
774 }
775
776
777
778 /*************************************************
779 * Find start of previous line *
780 *************************************************/
781
782 /* This is called when looking back for before lines to print.
783
784 Arguments:
785 p start of the subsequent line
786 startptr start of available data
787
788 Returns: pointer to the start of the previous line
789 */
790
791 static char *
792 previous_line(char *p, char *startptr)
793 {
794 switch(endlinetype)
795 {
796 default: /* Just in case */
797 case EL_LF:
798 p--;
799 while (p > startptr && p[-1] != '\n') p--;
800 return p;
801
802 case EL_CR:
803 p--;
804 while (p > startptr && p[-1] != '\n') p--;
805 return p;
806
807 case EL_CRLF:
808 for (;;)
809 {
810 p -= 2;
811 while (p > startptr && p[-1] != '\n') p--;
812 if (p <= startptr + 1 || p[-2] == '\r') return p;
813 }
814 return p; /* But control should never get here */
815
816 case EL_ANY:
817 case EL_ANYCRLF:
818 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
819 if (utf8) while ((*p & 0xc0) == 0x80) p--;
820
821 while (p > startptr)
822 {
823 register int c;
824 char *pp = p - 1;
825
826 if (utf8)
827 {
828 int extra = 0;
829 while ((*pp & 0xc0) == 0x80) pp--;
830 c = *((unsigned char *)pp);
831 if (c >= 0xc0)
832 {
833 int gcii, gcss;
834 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
835 gcss = 6*extra;
836 c = (c & utf8_table3[extra]) << gcss;
837 for (gcii = 1; gcii <= extra; gcii++)
838 {
839 gcss -= 6;
840 c |= (pp[gcii] & 0x3f) << gcss;
841 }
842 }
843 }
844 else c = *((unsigned char *)pp);
845
846 if (endlinetype == EL_ANYCRLF) switch (c)
847 {
848 case 0x0a: /* LF */
849 case 0x0d: /* CR */
850 return p;
851
852 default:
853 break;
854 }
855
856 else switch (c)
857 {
858 case 0x0a: /* LF */
859 case 0x0b: /* VT */
860 case 0x0c: /* FF */
861 case 0x0d: /* CR */
862 case 0x85: /* NEL */
863 case 0x2028: /* LS */
864 case 0x2029: /* PS */
865 return p;
866
867 default:
868 break;
869 }
870
871 p = pp; /* Back one character */
872 } /* End of loop for ANY case */
873
874 return startptr; /* Hit start of data */
875 } /* End of overall switch */
876 }
877
878
879
880
881
882 /*************************************************
883 * Print the previous "after" lines *
884 *************************************************/
885
886 /* This is called if we are about to lose said lines because of buffer filling,
887 and at the end of the file. The data in the line is written using fwrite() so
888 that a binary zero does not terminate it.
889
890 Arguments:
891 lastmatchnumber the number of the last matching line, plus one
892 lastmatchrestart where we restarted after the last match
893 endptr end of available data
894 printname filename for printing
895
896 Returns: nothing
897 */
898
899 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
900 char *endptr, char *printname)
901 {
902 if (after_context > 0 && lastmatchnumber > 0)
903 {
904 int count = 0;
905 while (lastmatchrestart < endptr && count++ < after_context)
906 {
907 int ellength;
908 char *pp = lastmatchrestart;
909 if (printname != NULL) fprintf(stdout, "%s-", printname);
910 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
911 pp = end_of_line(pp, endptr, &ellength);
912 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
913 lastmatchrestart = pp;
914 }
915 hyphenpending = TRUE;
916 }
917 }
918
919
920
921 /*************************************************
922 * Apply patterns to subject till one matches *
923 *************************************************/
924
925 /* This function is called to run through all patterns, looking for a match. It
926 is used multiple times for the same subject when colouring is enabled, in order
927 to find all possible matches.
928
929 Arguments:
930 matchptr the start of the subject
931 length the length of the subject to match
932 offsets the offets vector to fill in
933 mrc address of where to put the result of pcre_exec()
934
935 Returns: TRUE if there was a match
936 FALSE if there was no match
937 invert if there was a non-fatal error
938 */
939
940 static BOOL
941 match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
942 {
943 int i;
944 size_t slen = length;
945 const char *msg = "this text:\n\n";
946 if (slen > 200)
947 {
948 slen = 200;
949 msg = "text that starts:\n\n";
950 }
951 for (i = 0; i < pattern_count; i++)
952 {
953 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
954 PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
955 if (*mrc >= 0) return TRUE;
956 if (*mrc == PCRE_ERROR_NOMATCH) continue;
957 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
958 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
959 fprintf(stderr, "%s", msg);
960 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
961 fprintf(stderr, "\n\n");
962 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
963 resource_error = TRUE;
964 if (error_count++ > 20)
965 {
966 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
967 pcregrep_exit(2);
968 }
969 return invert; /* No more matching; don't show the line again */
970 }
971
972 return FALSE; /* No match, no errors */
973 }
974
975
976
977 /*************************************************
978 * Grep an individual file *
979 *************************************************/
980
981 /* This is called from grep_or_recurse() below. It uses a buffer that is three
982 times the value of MBUFTHIRD. The matching point is never allowed to stray into
983 the top third of the buffer, thus keeping more of the file available for
984 context printing or for multiline scanning. For large files, the pointer will
985 be in the middle third most of the time, so the bottom third is available for
986 "before" context printing.
987
988 Arguments:
989 handle the fopened FILE stream for a normal file
990 the gzFile pointer when reading is via libz
991 the BZFILE pointer when reading is via libbz2
992 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
993 printname the file name if it is to be printed for each match
994 or NULL if the file name is not to be printed
995 it cannot be NULL if filenames[_nomatch]_only is set
996
997 Returns: 0 if there was at least one match
998 1 otherwise (no matches)
999 2 if there is a read error on a .bz2 file
1000 */
1001
1002 static int
1003 pcregrep(void *handle, int frtype, char *printname)
1004 {
1005 int rc = 1;
1006 int linenumber = 1;
1007 int lastmatchnumber = 0;
1008 int count = 0;
1009 int filepos = 0;
1010 int offsets[OFFSET_SIZE];
1011 char *lastmatchrestart = NULL;
1012 char buffer[3*MBUFTHIRD];
1013 char *ptr = buffer;
1014 char *endptr;
1015 size_t bufflength;
1016 BOOL endhyphenpending = FALSE;
1017 BOOL input_line_buffered = line_buffered;
1018 FILE *in = NULL; /* Ensure initialized */
1019
1020 #ifdef SUPPORT_LIBZ
1021 gzFile ingz = NULL;
1022 #endif
1023
1024 #ifdef SUPPORT_LIBBZ2
1025 BZFILE *inbz2 = NULL;
1026 #endif
1027
1028
1029 /* Do the first read into the start of the buffer and set up the pointer to end
1030 of what we have. In the case of libz, a non-zipped .gz file will be read as a
1031 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1032 fail. */
1033
1034 #ifdef SUPPORT_LIBZ
1035 if (frtype == FR_LIBZ)
1036 {
1037 ingz = (gzFile)handle;
1038 bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1039 }
1040 else
1041 #endif
1042
1043 #ifdef SUPPORT_LIBBZ2
1044 if (frtype == FR_LIBBZ2)
1045 {
1046 inbz2 = (BZFILE *)handle;
1047 bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1048 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1049 } /* without the cast it is unsigned. */
1050 else
1051 #endif
1052
1053 {
1054 in = (FILE *)handle;
1055 if (is_file_tty(in)) input_line_buffered = TRUE;
1056 bufflength = input_line_buffered?
1057 read_one_line(buffer, 3*MBUFTHIRD, in) :
1058 fread(buffer, 1, 3*MBUFTHIRD, in);
1059 }
1060
1061 endptr = buffer + bufflength;
1062
1063 /* Loop while the current pointer is not at the end of the file. For large
1064 files, endptr will be at the end of the buffer when we are in the middle of the
1065 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1066 way, the buffer is shifted left and re-filled. */
1067
1068 while (ptr < endptr)
1069 {
1070 int endlinelength;
1071 int mrc = 0;
1072 BOOL match;
1073 char *matchptr = ptr;
1074 char *t = ptr;
1075 size_t length, linelength;
1076
1077 /* At this point, ptr is at the start of a line. We need to find the length
1078 of the subject string to pass to pcre_exec(). In multiline mode, it is the
1079 length remainder of the data in the buffer. Otherwise, it is the length of
1080 the next line, excluding the terminating newline. After matching, we always
1081 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1082 option is used for compiling, so that any match is constrained to be in the
1083 first line. */
1084
1085 t = end_of_line(t, endptr, &endlinelength);
1086 linelength = t - ptr - endlinelength;
1087 length = multiline? (size_t)(endptr - ptr) : linelength;
1088
1089 /* Extra processing for Jeffrey Friedl's debugging. */
1090
1091 #ifdef JFRIEDL_DEBUG
1092 if (jfriedl_XT || jfriedl_XR)
1093 {
1094 #include <sys/time.h>
1095 #include <time.h>
1096 struct timeval start_time, end_time;
1097 struct timezone dummy;
1098 int i;
1099
1100 if (jfriedl_XT)
1101 {
1102 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1103 const char *orig = ptr;
1104 ptr = malloc(newlen + 1);
1105 if (!ptr) {
1106 printf("out of memory");
1107 pcregrep_exit(2);
1108 }
1109 endptr = ptr;
1110 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1111 for (i = 0; i < jfriedl_XT; i++) {
1112 strncpy(endptr, orig, length);
1113 endptr += length;
1114 }
1115 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1116 length = newlen;
1117 }
1118
1119 if (gettimeofday(&start_time, &dummy) != 0)
1120 perror("bad gettimeofday");
1121
1122
1123 for (i = 0; i < jfriedl_XR; i++)
1124 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1125 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1126
1127 if (gettimeofday(&end_time, &dummy) != 0)
1128 perror("bad gettimeofday");
1129
1130 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1131 -
1132 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1133
1134 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1135 return 0;
1136 }
1137 #endif
1138
1139 /* We come back here after a match when the -o option (only_matching) is set,
1140 in order to find any further matches in the same line. */
1141
1142 ONLY_MATCHING_RESTART:
1143
1144 /* Run through all the patterns until one matches or there is an error other
1145 than NOMATCH. This code is in a subroutine so that it can be re-used for
1146 finding subsequent matches when colouring matched lines. */
1147
1148 match = match_patterns(matchptr, length, offsets, &mrc);
1149
1150 /* If it's a match or a not-match (as required), do what's wanted. */
1151
1152 if (match != invert)
1153 {
1154 BOOL hyphenprinted = FALSE;
1155
1156 /* We've failed if we want a file that doesn't have any matches. */
1157
1158 if (filenames == FN_NOMATCH_ONLY) return 1;
1159
1160 /* Just count if just counting is wanted. */
1161
1162 if (count_only) count++;
1163
1164 /* If all we want is a file name, there is no need to scan any more lines
1165 in the file. */
1166
1167 else if (filenames == FN_MATCH_ONLY)
1168 {
1169 fprintf(stdout, "%s\n", printname);
1170 return 0;
1171 }
1172
1173 /* Likewise, if all we want is a yes/no answer. */
1174
1175 else if (quiet) return 0;
1176
1177 /* The --only-matching option prints just the substring that matched, and
1178 the --file-offsets and --line-offsets options output offsets for the
1179 matching substring (they both force --only-matching). None of these options
1180 prints any context. Afterwards, adjust the start and length, and then jump
1181 back to look for further matches in the same line. If we are in invert
1182 mode, however, nothing is printed - this could be still useful because the
1183 return code is set. */
1184
1185 else if (only_matching)
1186 {
1187 if (!invert)
1188 {
1189 if (printname != NULL) fprintf(stdout, "%s:", printname);
1190 if (number) fprintf(stdout, "%d:", linenumber);
1191 if (line_offsets)
1192 fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1193 offsets[1] - offsets[0]);
1194 else if (file_offsets)
1195 fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1196 offsets[1] - offsets[0]);
1197 else
1198 {
1199 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1200 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1201 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1202 }
1203 fprintf(stdout, "\n");
1204 matchptr += offsets[1];
1205 length -= offsets[1];
1206 match = FALSE;
1207 goto ONLY_MATCHING_RESTART;
1208 }
1209 }
1210
1211 /* This is the default case when none of the above options is set. We print
1212 the matching lines(s), possibly preceded and/or followed by other lines of
1213 context. */
1214
1215 else
1216 {
1217 /* See if there is a requirement to print some "after" lines from a
1218 previous match. We never print any overlaps. */
1219
1220 if (after_context > 0 && lastmatchnumber > 0)
1221 {
1222 int ellength;
1223 int linecount = 0;
1224 char *p = lastmatchrestart;
1225
1226 while (p < ptr && linecount < after_context)
1227 {
1228 p = end_of_line(p, ptr, &ellength);
1229 linecount++;
1230 }
1231
1232 /* It is important to advance lastmatchrestart during this printing so
1233 that it interacts correctly with any "before" printing below. Print
1234 each line's data using fwrite() in case there are binary zeroes. */
1235
1236 while (lastmatchrestart < p)
1237 {
1238 char *pp = lastmatchrestart;
1239 if (printname != NULL) fprintf(stdout, "%s-", printname);
1240 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1241 pp = end_of_line(pp, endptr, &ellength);
1242 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1243 lastmatchrestart = pp;
1244 }
1245 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1246 }
1247
1248 /* If there were non-contiguous lines printed above, insert hyphens. */
1249
1250 if (hyphenpending)
1251 {
1252 fprintf(stdout, "--\n");
1253 hyphenpending = FALSE;
1254 hyphenprinted = TRUE;
1255 }
1256
1257 /* See if there is a requirement to print some "before" lines for this
1258 match. Again, don't print overlaps. */
1259
1260 if (before_context > 0)
1261 {
1262 int linecount = 0;
1263 char *p = ptr;
1264
1265 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1266 linecount < before_context)
1267 {
1268 linecount++;
1269 p = previous_line(p, buffer);
1270 }
1271
1272 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1273 fprintf(stdout, "--\n");
1274
1275 while (p < ptr)
1276 {
1277 int ellength;
1278 char *pp = p;
1279 if (printname != NULL) fprintf(stdout, "%s-", printname);
1280 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1281 pp = end_of_line(pp, endptr, &ellength);
1282 FWRITE(p, 1, pp - p, stdout);
1283 p = pp;
1284 }
1285 }
1286
1287 /* Now print the matching line(s); ensure we set hyphenpending at the end
1288 of the file if any context lines are being output. */
1289
1290 if (after_context > 0 || before_context > 0)
1291 endhyphenpending = TRUE;
1292
1293 if (printname != NULL) fprintf(stdout, "%s:", printname);
1294 if (number) fprintf(stdout, "%d:", linenumber);
1295
1296 /* In multiline mode, we want to print to the end of the line in which
1297 the end of the matched string is found, so we adjust linelength and the
1298 line number appropriately, but only when there actually was a match
1299 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1300 the match will always be before the first newline sequence. */
1301
1302 if (multiline)
1303 {
1304 int ellength;
1305 char *endmatch = ptr;
1306 if (!invert)
1307 {
1308 endmatch += offsets[1];
1309 t = ptr;
1310 while (t < endmatch)
1311 {
1312 t = end_of_line(t, endptr, &ellength);
1313 if (t <= endmatch) linenumber++; else break;
1314 }
1315 }
1316 endmatch = end_of_line(endmatch, endptr, &ellength);
1317 linelength = endmatch - ptr - ellength;
1318 }
1319
1320 /*** NOTE: Use only fwrite() to output the data line, so that binary
1321 zeroes are treated as just another data character. */
1322
1323 /* This extra option, for Jeffrey Friedl's debugging requirements,
1324 replaces the matched string, or a specific captured string if it exists,
1325 with X. When this happens, colouring is ignored. */
1326
1327 #ifdef JFRIEDL_DEBUG
1328 if (S_arg >= 0 && S_arg < mrc)
1329 {
1330 int first = S_arg * 2;
1331 int last = first + 1;
1332 FWRITE(ptr, 1, offsets[first], stdout);
1333 fprintf(stdout, "X");
1334 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1335 }
1336 else
1337 #endif
1338
1339 /* We have to split the line(s) up if colouring, and search for further
1340 matches. */
1341
1342 if (do_colour)
1343 {
1344 int last_offset = 0;
1345 FWRITE(ptr, 1, offsets[0], stdout);
1346 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1347 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1348 fprintf(stdout, "%c[00m", 0x1b);
1349 for (;;)
1350 {
1351 last_offset += offsets[1];
1352 matchptr += offsets[1];
1353 length -= offsets[1];
1354 if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1355 FWRITE(matchptr, 1, offsets[0], stdout);
1356 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1357 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1358 fprintf(stdout, "%c[00m", 0x1b);
1359 }
1360 FWRITE(ptr + last_offset, 1,
1361 (linelength + endlinelength) - last_offset, stdout);
1362 }
1363
1364 /* Not colouring; no need to search for further matches */
1365
1366 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1367 }
1368
1369 /* End of doing what has to be done for a match. If --line-buffered was
1370 given, flush the output. */
1371
1372 if (line_buffered) fflush(stdout);
1373 rc = 0; /* Had some success */
1374
1375 /* Remember where the last match happened for after_context. We remember
1376 where we are about to restart, and that line's number. */
1377
1378 lastmatchrestart = ptr + linelength + endlinelength;
1379 lastmatchnumber = linenumber + 1;
1380 }
1381
1382 /* For a match in multiline inverted mode (which of course did not cause
1383 anything to be printed), we have to move on to the end of the match before
1384 proceeding. */
1385
1386 if (multiline && invert && match)
1387 {
1388 int ellength;
1389 char *endmatch = ptr + offsets[1];
1390 t = ptr;
1391 while (t < endmatch)
1392 {
1393 t = end_of_line(t, endptr, &ellength);
1394 if (t <= endmatch) linenumber++; else break;
1395 }
1396 endmatch = end_of_line(endmatch, endptr, &ellength);
1397 linelength = endmatch - ptr - ellength;
1398 }
1399
1400 /* Advance to after the newline and increment the line number. The file
1401 offset to the current line is maintained in filepos. */
1402
1403 ptr += linelength + endlinelength;
1404 filepos += (int)(linelength + endlinelength);
1405 linenumber++;
1406
1407 /* If input is line buffered, and the buffer is not yet full, read another
1408 line and add it into the buffer. */
1409
1410 if (input_line_buffered && bufflength < sizeof(buffer))
1411 {
1412 int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1413 bufflength += add;
1414 endptr += add;
1415 }
1416
1417 /* If we haven't yet reached the end of the file (the buffer is full), and
1418 the current point is in the top 1/3 of the buffer, slide the buffer down by
1419 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1420 about to be lost, print them. */
1421
1422 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1423 {
1424 if (after_context > 0 &&
1425 lastmatchnumber > 0 &&
1426 lastmatchrestart < buffer + MBUFTHIRD)
1427 {
1428 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1429 lastmatchnumber = 0;
1430 }
1431
1432 /* Now do the shuffle */
1433
1434 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1435 ptr -= MBUFTHIRD;
1436
1437 #ifdef SUPPORT_LIBZ
1438 if (frtype == FR_LIBZ)
1439 bufflength = 2*MBUFTHIRD +
1440 gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1441 else
1442 #endif
1443
1444 #ifdef SUPPORT_LIBBZ2
1445 if (frtype == FR_LIBBZ2)
1446 bufflength = 2*MBUFTHIRD +
1447 BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1448 else
1449 #endif
1450
1451 bufflength = 2*MBUFTHIRD +
1452 (input_line_buffered?
1453 read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1454 fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1455 endptr = buffer + bufflength;
1456
1457 /* Adjust any last match point */
1458
1459 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1460 }
1461 } /* Loop through the whole file */
1462
1463 /* End of file; print final "after" lines if wanted; do_after_lines sets
1464 hyphenpending if it prints something. */
1465
1466 if (!only_matching && !count_only)
1467 {
1468 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1469 hyphenpending |= endhyphenpending;
1470 }
1471
1472 /* Print the file name if we are looking for those without matches and there
1473 were none. If we found a match, we won't have got this far. */
1474
1475 if (filenames == FN_NOMATCH_ONLY)
1476 {
1477 fprintf(stdout, "%s\n", printname);
1478 return 0;
1479 }
1480
1481 /* Print the match count if wanted */
1482
1483 if (count_only)
1484 {
1485 if (count > 0 || !omit_zero_count)
1486 {
1487 if (printname != NULL && filenames != FN_NONE)
1488 fprintf(stdout, "%s:", printname);
1489 fprintf(stdout, "%d\n", count);
1490 }
1491 }
1492
1493 return rc;
1494 }
1495
1496
1497
1498 /*************************************************
1499 * Grep a file or recurse into a directory *
1500 *************************************************/
1501
1502 /* Given a path name, if it's a directory, scan all the files if we are
1503 recursing; if it's a file, grep it.
1504
1505 Arguments:
1506 pathname the path to investigate
1507 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1508 only_one_at_top TRUE if the path is the only one at toplevel
1509
1510 Returns: 0 if there was at least one match
1511 1 if there were no matches
1512 2 there was some kind of error
1513
1514 However, file opening failures are suppressed if "silent" is set.
1515 */
1516
1517 static int
1518 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1519 {
1520 int rc = 1;
1521 int sep;
1522 int frtype;
1523 int pathlen;
1524 void *handle;
1525 FILE *in = NULL; /* Ensure initialized */
1526
1527 #ifdef SUPPORT_LIBZ
1528 gzFile ingz = NULL;
1529 #endif
1530
1531 #ifdef SUPPORT_LIBBZ2
1532 BZFILE *inbz2 = NULL;
1533 #endif
1534
1535 /* If the file name is "-" we scan stdin */
1536
1537 if (strcmp(pathname, "-") == 0)
1538 {
1539 return pcregrep(stdin, FR_PLAIN,
1540 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1541 stdin_name : NULL);
1542 }
1543
1544 /* If the file is a directory, skip if skipping or if we are recursing, scan
1545 each file and directory within it, subject to any include or exclude patterns
1546 that were set. The scanning code is localized so it can be made
1547 system-specific. */
1548
1549 if ((sep = isdirectory(pathname)) != 0)
1550 {
1551 if (dee_action == dee_SKIP) return 1;
1552 if (dee_action == dee_RECURSE)
1553 {
1554 char buffer[1024];
1555 char *nextfile;
1556 directory_type *dir = opendirectory(pathname);
1557
1558 if (dir == NULL)
1559 {
1560 if (!silent)
1561 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1562 strerror(errno));
1563 return 2;
1564 }
1565
1566 while ((nextfile = readdirectory(dir)) != NULL)
1567 {
1568 int frc, nflen;
1569 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1570 nflen = (int)(strlen(nextfile));
1571
1572 if (isdirectory(buffer))
1573 {
1574 if (exclude_dir_compiled != NULL &&
1575 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1576 continue;
1577
1578 if (include_dir_compiled != NULL &&
1579 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1580 continue;
1581 }
1582 else
1583 {
1584 if (exclude_compiled != NULL &&
1585 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1586 continue;
1587
1588 if (include_compiled != NULL &&
1589 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1590 continue;
1591 }
1592
1593 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1594 if (frc > 1) rc = frc;
1595 else if (frc == 0 && rc == 1) rc = 0;
1596 }
1597
1598 closedirectory(dir);
1599 return rc;
1600 }
1601 }
1602
1603 /* If the file is not a directory and not a regular file, skip it if that's
1604 been requested. */
1605
1606 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1607
1608 /* Control reaches here if we have a regular file, or if we have a directory
1609 and recursion or skipping was not requested, or if we have anything else and
1610 skipping was not requested. The scan proceeds. If this is the first and only
1611 argument at top level, we don't show the file name, unless we are only showing
1612 the file name, or the filename was forced (-H). */
1613
1614 pathlen = (int)(strlen(pathname));
1615
1616 /* Open using zlib if it is supported and the file name ends with .gz. */
1617
1618 #ifdef SUPPORT_LIBZ
1619 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1620 {
1621 ingz = gzopen(pathname, "rb");
1622 if (ingz == NULL)
1623 {
1624 if (!silent)
1625 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1626 strerror(errno));
1627 return 2;
1628 }
1629 handle = (void *)ingz;
1630 frtype = FR_LIBZ;
1631 }
1632 else
1633 #endif
1634
1635 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1636
1637 #ifdef SUPPORT_LIBBZ2
1638 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1639 {
1640 inbz2 = BZ2_bzopen(pathname, "rb");
1641 handle = (void *)inbz2;
1642 frtype = FR_LIBBZ2;
1643 }
1644 else
1645 #endif
1646
1647 /* Otherwise use plain fopen(). The label is so that we can come back here if
1648 an attempt to read a .bz2 file indicates that it really is a plain file. */
1649
1650 #ifdef SUPPORT_LIBBZ2
1651 PLAIN_FILE:
1652 #endif
1653 {
1654 in = fopen(pathname, "rb");
1655 handle = (void *)in;
1656 frtype = FR_PLAIN;
1657 }
1658
1659 /* All the opening methods return errno when they fail. */
1660
1661 if (handle == NULL)
1662 {
1663 if (!silent)
1664 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1665 strerror(errno));
1666 return 2;
1667 }
1668
1669 /* Now grep the file */
1670
1671 rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1672 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1673
1674 /* Close in an appropriate manner. */
1675
1676 #ifdef SUPPORT_LIBZ
1677 if (frtype == FR_LIBZ)
1678 gzclose(ingz);
1679 else
1680 #endif
1681
1682 /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1683 read failed. If the error indicates that the file isn't in fact bzipped, try
1684 again as a normal file. */
1685
1686 #ifdef SUPPORT_LIBBZ2
1687 if (frtype == FR_LIBBZ2)
1688 {
1689 if (rc == 2)
1690 {
1691 int errnum;
1692 const char *err = BZ2_bzerror(inbz2, &errnum);
1693 if (errnum == BZ_DATA_ERROR_MAGIC)
1694 {
1695 BZ2_bzclose(inbz2);
1696 goto PLAIN_FILE;
1697 }
1698 else if (!silent)
1699 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1700 pathname, err);
1701 }
1702 BZ2_bzclose(inbz2);
1703 }
1704 else
1705 #endif
1706
1707 /* Normal file close */
1708
1709 fclose(in);
1710
1711 /* Pass back the yield from pcregrep(). */
1712
1713 return rc;
1714 }
1715
1716
1717
1718
1719 /*************************************************
1720 * Usage function *
1721 *************************************************/
1722
1723 static int
1724 usage(int rc)
1725 {
1726 option_item *op;
1727 fprintf(stderr, "Usage: pcregrep [-");
1728 for (op = optionlist; op->one_char != 0; op++)
1729 {
1730 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1731 }
1732 fprintf(stderr, "] [long options] [pattern] [files]\n");
1733 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1734 "options.\n");
1735 return rc;
1736 }
1737
1738
1739
1740
1741 /*************************************************
1742 * Help function *
1743 *************************************************/
1744
1745 static void
1746 help(void)
1747 {
1748 option_item *op;
1749
1750 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1751 printf("Search for PATTERN in each FILE or standard input.\n");
1752 printf("PATTERN must be present if neither -e nor -f is used.\n");
1753 printf("\"-\" can be used as a file name to mean STDIN.\n");
1754
1755 #ifdef SUPPORT_LIBZ
1756 printf("Files whose names end in .gz are read using zlib.\n");
1757 #endif
1758
1759 #ifdef SUPPORT_LIBBZ2
1760 printf("Files whose names end in .bz2 are read using bzlib2.\n");
1761 #endif
1762
1763 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1764 printf("Other files and the standard input are read as plain files.\n\n");
1765 #else
1766 printf("All files are read as plain files, without any interpretation.\n\n");
1767 #endif
1768
1769 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1770 printf("Options:\n");
1771
1772 for (op = optionlist; op->one_char != 0; op++)
1773 {
1774 int n;
1775 char s[4];
1776 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1777 n = 30 - printf(" %s --%s", s, op->long_name);
1778 if (n < 1) n = 1;
1779 printf("%.*s%s\n", n, " ", op->help_text);
1780 }
1781
1782 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1783 printf("trailing white space is removed and blank lines are ignored.\n");
1784 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1785
1786 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1787 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1788 }
1789
1790
1791
1792
1793 /*************************************************
1794 * Handle a single-letter, no data option *
1795 *************************************************/
1796
1797 static int
1798 handle_option(int letter, int options)
1799 {
1800 switch(letter)
1801 {
1802 case N_FOFFSETS: file_offsets = TRUE; break;
1803 case N_HELP: help(); pcregrep_exit(0);
1804 case N_LOFFSETS: line_offsets = number = TRUE; break;
1805 case N_LBUFFER: line_buffered = TRUE; break;
1806 case 'c': count_only = TRUE; break;
1807 case 'F': process_options |= PO_FIXED_STRINGS; break;
1808 case 'H': filenames = FN_FORCE; break;
1809 case 'h': filenames = FN_NONE; break;
1810 case 'i': options |= PCRE_CASELESS; break;
1811 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1812 case 'L': filenames = FN_NOMATCH_ONLY; break;
1813 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1814 case 'n': number = TRUE; break;
1815 case 'o': only_matching = TRUE; break;
1816 case 'q': quiet = TRUE; break;
1817 case 'r': dee_action = dee_RECURSE; break;
1818 case 's': silent = TRUE; break;
1819 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1820 case 'v': invert = TRUE; break;
1821 case 'w': process_options |= PO_WORD_MATCH; break;
1822 case 'x': process_options |= PO_LINE_MATCH; break;
1823
1824 case 'V':
1825 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1826 pcregrep_exit(0);
1827 break;
1828
1829 default:
1830 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1831 pcregrep_exit(usage(2));
1832 }
1833
1834 return options;
1835 }
1836
1837
1838
1839
1840 /*************************************************
1841 * Construct printed ordinal *
1842 *************************************************/
1843
1844 /* This turns a number into "1st", "3rd", etc. */
1845
1846 static char *
1847 ordin(int n)
1848 {
1849 static char buffer[8];
1850 char *p = buffer;
1851 sprintf(p, "%d", n);
1852 while (*p != 0) p++;
1853 switch (n%10)
1854 {
1855 case 1: strcpy(p, "st"); break;
1856 case 2: strcpy(p, "nd"); break;
1857 case 3: strcpy(p, "rd"); break;
1858 default: strcpy(p, "th"); break;
1859 }
1860 return buffer;
1861 }
1862
1863
1864
1865 /*************************************************
1866 * Compile a single pattern *
1867 *************************************************/
1868
1869 /* When the -F option has been used, this is called for each substring.
1870 Otherwise it's called for each supplied pattern.
1871
1872 Arguments:
1873 pattern the pattern string
1874 options the PCRE options
1875 filename the file name, or NULL for a command-line pattern
1876 count 0 if this is the only command line pattern, or
1877 number of the command line pattern, or
1878 linenumber for a pattern from a file
1879
1880 Returns: TRUE on success, FALSE after an error
1881 */
1882
1883 static BOOL
1884 compile_single_pattern(char *pattern, int options, char *filename, int count)
1885 {
1886 char buffer[MBUFTHIRD + 16];
1887 const char *error;
1888 int errptr;
1889
1890 if (pattern_count >= MAX_PATTERN_COUNT)
1891 {
1892 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1893 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1894 return FALSE;
1895 }
1896
1897 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1898 suffix[process_options]);
1899 pattern_list[pattern_count] =
1900 pcre_compile(buffer, options, &error, &errptr, pcretables);
1901 if (pattern_list[pattern_count] != NULL)
1902 {
1903 pattern_count++;
1904 return TRUE;
1905 }
1906
1907 /* Handle compile errors */
1908
1909 errptr -= (int)strlen(prefix[process_options]);
1910 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1911
1912 if (filename == NULL)
1913 {
1914 if (count == 0)
1915 fprintf(stderr, "pcregrep: Error in command-line regex "
1916 "at offset %d: %s\n", errptr, error);
1917 else
1918 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1919 "at offset %d: %s\n", ordin(count), errptr, error);
1920 }
1921 else
1922 {
1923 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1924 "at offset %d: %s\n", count, filename, errptr, error);
1925 }
1926
1927 return FALSE;
1928 }
1929
1930
1931
1932 /*************************************************
1933 * Compile one supplied pattern *
1934 *************************************************/
1935
1936 /* When the -F option has been used, each string may be a list of strings,
1937 separated by line breaks. They will be matched literally.
1938
1939 Arguments:
1940 pattern the pattern string
1941 options the PCRE options
1942 filename the file name, or NULL for a command-line pattern
1943 count 0 if this is the only command line pattern, or
1944 number of the command line pattern, or
1945 linenumber for a pattern from a file
1946
1947 Returns: TRUE on success, FALSE after an error
1948 */
1949
1950 static BOOL
1951 compile_pattern(char *pattern, int options, char *filename, int count)
1952 {
1953 if ((process_options & PO_FIXED_STRINGS) != 0)
1954 {
1955 char *eop = pattern + strlen(pattern);
1956 char buffer[MBUFTHIRD];
1957 for(;;)
1958 {
1959 int ellength;
1960 char *p = end_of_line(pattern, eop, &ellength);
1961 if (ellength == 0)
1962 return compile_single_pattern(pattern, options, filename, count);
1963 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1964 pattern = p;
1965 if (!compile_single_pattern(buffer, options, filename, count))
1966 return FALSE;
1967 }
1968 }
1969 else return compile_single_pattern(pattern, options, filename, count);
1970 }
1971
1972
1973
1974 /*************************************************
1975 * Main program *
1976 *************************************************/
1977
1978 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1979
1980 int
1981 main(int argc, char **argv)
1982 {
1983 int i, j;
1984 int rc = 1;
1985 int pcre_options = 0;
1986 int cmd_pattern_count = 0;
1987 int hint_count = 0;
1988 int errptr;
1989 BOOL only_one_at_top;
1990 char *patterns[MAX_PATTERN_COUNT];
1991 const char *locale_from = "--locale";
1992 const char *error;
1993
1994 /* Set the default line ending value from the default in the PCRE library;
1995 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1996 Note that the return values from pcre_config(), though derived from the ASCII
1997 codes, are the same in EBCDIC environments, so we must use the actual values
1998 rather than escapes such as as '\r'. */
1999
2000 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2001 switch(i)
2002 {
2003 default: newline = (char *)"lf"; break;
2004 case 13: newline = (char *)"cr"; break;
2005 case (13 << 8) | 10: newline = (char *)"crlf"; break;
2006 case -1: newline = (char *)"any"; break;
2007 case -2: newline = (char *)"anycrlf"; break;
2008 }
2009
2010 /* Process the options */
2011
2012 for (i = 1; i < argc; i++)
2013 {
2014 option_item *op = NULL;
2015 char *option_data = (char *)""; /* default to keep compiler happy */
2016 BOOL longop;
2017 BOOL longopwasequals = FALSE;
2018
2019 if (argv[i][0] != '-') break;
2020
2021 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2022 but only if we have previously had -e or -f to define the patterns. */
2023
2024 if (argv[i][1] == 0)
2025 {
2026 if (pattern_filename != NULL || pattern_count > 0) break;
2027 else pcregrep_exit(usage(2));
2028 }
2029
2030 /* Handle a long name option, or -- to terminate the options */
2031
2032 if (argv[i][1] == '-')
2033 {
2034 char *arg = argv[i] + 2;
2035 char *argequals = strchr(arg, '=');
2036
2037 if (*arg == 0) /* -- terminates options */
2038 {
2039 i++;
2040 break; /* out of the options-handling loop */
2041 }
2042
2043 longop = TRUE;
2044
2045 /* Some long options have data that follows after =, for example file=name.
2046 Some options have variations in the long name spelling: specifically, we
2047 allow "regexp" because GNU grep allows it, though I personally go along
2048 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2049 These options are entered in the table as "regex(p)". Options can be in
2050 both these categories. */
2051
2052 for (op = optionlist; op->one_char != 0; op++)
2053 {
2054 char *opbra = strchr(op->long_name, '(');
2055 char *equals = strchr(op->long_name, '=');
2056
2057 /* Handle options with only one spelling of the name */
2058
2059 if (opbra == NULL) /* Does not contain '(' */
2060 {
2061 if (equals == NULL) /* Not thing=data case */
2062 {
2063 if (strcmp(arg, op->long_name) == 0) break;
2064 }
2065 else /* Special case xxx=data */
2066 {
2067 int oplen = (int)(equals - op->long_name);
2068 int arglen = (argequals == NULL)?
2069 (int)strlen(arg) : (int)(argequals - arg);
2070 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2071 {
2072 option_data = arg + arglen;
2073 if (*option_data == '=')
2074 {
2075 option_data++;
2076 longopwasequals = TRUE;
2077 }
2078 break;
2079 }
2080 }
2081 }
2082
2083 /* Handle options with an alternate spelling of the name */
2084
2085 else
2086 {
2087 char buff1[24];
2088 char buff2[24];
2089
2090 int baselen = (int)(opbra - op->long_name);
2091 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2092 int arglen = (argequals == NULL || equals == NULL)?
2093 (int)strlen(arg) : (int)(argequals - arg);
2094
2095 sprintf(buff1, "%.*s", baselen, op->long_name);
2096 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2097
2098 if (strncmp(arg, buff1, arglen) == 0 ||
2099 strncmp(arg, buff2, arglen) == 0)
2100 {
2101 if (equals != NULL && argequals != NULL)
2102 {
2103 option_data = argequals;
2104 if (*option_data == '=')
2105 {
2106 option_data++;
2107 longopwasequals = TRUE;
2108 }
2109 }
2110 break;
2111 }
2112 }
2113 }
2114
2115 if (op->one_char == 0)
2116 {
2117 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2118 pcregrep_exit(usage(2));
2119 }
2120 }
2121
2122 /* Jeffrey Friedl's debugging harness uses these additional options which
2123 are not in the right form for putting in the option table because they use
2124 only one hyphen, yet are more than one character long. By putting them
2125 separately here, they will not get displayed as part of the help() output,
2126 but I don't think Jeffrey will care about that. */
2127
2128 #ifdef JFRIEDL_DEBUG
2129 else if (strcmp(argv[i], "-pre") == 0) {
2130 jfriedl_prefix = argv[++i];
2131 continue;
2132 } else if (strcmp(argv[i], "-post") == 0) {
2133 jfriedl_postfix = argv[++i];
2134 continue;
2135 } else if (strcmp(argv[i], "-XT") == 0) {
2136 sscanf(argv[++i], "%d", &jfriedl_XT);
2137 continue;
2138 } else if (strcmp(argv[i], "-XR") == 0) {
2139 sscanf(argv[++i], "%d", &jfriedl_XR);
2140 continue;
2141 }
2142 #endif
2143
2144
2145 /* One-char options; many that have no data may be in a single argument; we
2146 continue till we hit the last one or one that needs data. */
2147
2148 else
2149 {
2150 char *s = argv[i] + 1;
2151 longop = FALSE;
2152 while (*s != 0)
2153 {
2154 for (op = optionlist; op->one_char != 0; op++)
2155 { if (*s == op->one_char) break; }
2156 if (op->one_char == 0)
2157 {
2158 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2159 *s, argv[i]);
2160 pcregrep_exit(usage(2));
2161 }
2162 if (op->type != OP_NODATA || s[1] == 0)
2163 {
2164 option_data = s+1;
2165 break;
2166 }
2167 pcre_options = handle_option(*s++, pcre_options);
2168 }
2169 }
2170
2171 /* At this point we should have op pointing to a matched option. If the type
2172 is NO_DATA, it means that there is no data, and the option might set
2173 something in the PCRE options. */
2174
2175 if (op->type == OP_NODATA)
2176 {
2177 pcre_options = handle_option(op->one_char, pcre_options);
2178 continue;
2179 }
2180
2181 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2182 either has a value or defaults to something. It cannot have data in a
2183 separate item. At the moment, the only such options are "colo(u)r" and
2184 Jeffrey Friedl's special -S debugging option. */
2185
2186 if (*option_data == 0 &&
2187 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2188 {
2189 switch (op->one_char)
2190 {
2191 case N_COLOUR:
2192 colour_option = (char *)"auto";
2193 break;
2194 #ifdef JFRIEDL_DEBUG
2195 case 'S':
2196 S_arg = 0;
2197 break;
2198 #endif
2199 }
2200 continue;
2201 }
2202
2203 /* Otherwise, find the data string for the option. */
2204
2205 if (*option_data == 0)
2206 {
2207 if (i >= argc - 1 || longopwasequals)
2208 {
2209 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2210 pcregrep_exit(usage(2));
2211 }
2212 option_data = argv[++i];
2213 }
2214
2215 /* If the option type is OP_PATLIST, it's the -e option, which can be called
2216 multiple times to create a list of patterns. */
2217
2218 if (op->type == OP_PATLIST)
2219 {
2220 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2221 {
2222 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2223 MAX_PATTERN_COUNT);
2224 return 2;
2225 }
2226 patterns[cmd_pattern_count++] = option_data;
2227 }
2228
2229 /* Otherwise, deal with single string or numeric data values. */
2230
2231 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2232 {
2233 *((char **)op->dataptr) = option_data;
2234 }
2235
2236 /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2237 only for unpicking arguments, so just keep it simple. */
2238
2239 else
2240 {
2241 unsigned long int n = 0;
2242 char *endptr = option_data;
2243 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2244 while (isdigit((unsigned char)(*endptr)))
2245 n = n * 10 + (int)(*endptr++ - '0');
2246 if (*endptr != 0)
2247 {
2248 if (longop)
2249 {
2250 char *equals = strchr(op->long_name, '=');
2251 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2252 (int)(equals - op->long_name);
2253 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2254 option_data, nlen, op->long_name);
2255 }
2256 else
2257 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2258 option_data, op->one_char);
2259 pcregrep_exit(usage(2));
2260 }
2261 *((int *)op->dataptr) = n;
2262 }
2263 }
2264
2265 /* Options have been decoded. If -C was used, its value is used as a default
2266 for -A and -B. */
2267
2268 if (both_context > 0)
2269 {
2270 if (after_context == 0) after_context = both_context;
2271 if (before_context == 0) before_context = both_context;
2272 }
2273
2274 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2275 However, the latter two set the only_matching flag. */
2276
2277 if ((only_matching && (file_offsets || line_offsets)) ||
2278 (file_offsets && line_offsets))
2279 {
2280 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2281 "and/or --line-offsets\n");
2282 pcregrep_exit(usage(2));
2283 }
2284
2285 if (file_offsets || line_offsets) only_matching = TRUE;
2286
2287 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2288 LC_ALL environment variable is set, and if so, use it. */
2289
2290 if (locale == NULL)
2291 {
2292 locale = getenv("LC_ALL");
2293 locale_from = "LCC_ALL";
2294 }
2295
2296 if (locale == NULL)
2297 {
2298 locale = getenv("LC_CTYPE");
2299 locale_from = "LC_CTYPE";
2300 }
2301
2302 /* If a locale has been provided, set it, and generate the tables the PCRE
2303 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2304
2305 if (locale != NULL)
2306 {
2307 if (setlocale(LC_CTYPE, locale) == NULL)
2308 {
2309 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2310 locale, locale_from);
2311 return 2;
2312 }
2313 pcretables = pcre_maketables();
2314 }
2315
2316 /* Sort out colouring */
2317
2318 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2319 {
2320 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2321 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2322 else
2323 {
2324 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2325 colour_option);
2326 return 2;
2327 }
2328 if (do_colour)
2329 {
2330 char *cs = getenv("PCREGREP_COLOUR");
2331 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2332 if (cs != NULL) colour_string = cs;
2333 }
2334 }
2335
2336 /* Interpret the newline type; the default settings are Unix-like. */
2337
2338 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2339 {
2340 pcre_options |= PCRE_NEWLINE_CR;
2341 endlinetype = EL_CR;
2342 }
2343 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2344 {
2345 pcre_options |= PCRE_NEWLINE_LF;
2346 endlinetype = EL_LF;
2347 }
2348 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2349 {
2350 pcre_options |= PCRE_NEWLINE_CRLF;
2351 endlinetype = EL_CRLF;
2352 }
2353 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2354 {
2355 pcre_options |= PCRE_NEWLINE_ANY;
2356 endlinetype = EL_ANY;
2357 }
2358 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2359 {
2360 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2361 endlinetype = EL_ANYCRLF;
2362 }
2363 else
2364 {
2365 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2366 return 2;
2367 }
2368
2369 /* Interpret the text values for -d and -D */
2370
2371 if (dee_option != NULL)
2372 {
2373 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2374 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2375 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2376 else
2377 {
2378 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2379 return 2;
2380 }
2381 }
2382
2383 if (DEE_option != NULL)
2384 {
2385 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2386 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2387 else
2388 {
2389 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2390 return 2;
2391 }
2392 }
2393
2394 /* Check the values for Jeffrey Friedl's debugging options. */
2395
2396 #ifdef JFRIEDL_DEBUG
2397 if (S_arg > 9)
2398 {
2399 fprintf(stderr, "pcregrep: bad value for -S option\n");
2400 return 2;
2401 }
2402 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2403 {
2404 if (jfriedl_XT == 0) jfriedl_XT = 1;
2405 if (jfriedl_XR == 0) jfriedl_XR = 1;
2406 }
2407 #endif
2408
2409 /* Get memory to store the pattern and hints lists. */
2410
2411 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2412 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2413
2414 if (pattern_list == NULL || hints_list == NULL)
2415 {
2416 fprintf(stderr, "pcregrep: malloc failed\n");
2417 goto EXIT2;
2418 }
2419
2420 /* If no patterns were provided by -e, and there is no file provided by -f,
2421 the first argument is the one and only pattern, and it must exist. */
2422
2423 if (cmd_pattern_count == 0 && pattern_filename == NULL)
2424 {
2425 if (i >= argc) return usage(2);
2426 patterns[cmd_pattern_count++] = argv[i++];
2427 }
2428
2429 /* Compile the patterns that were provided on the command line, either by
2430 multiple uses of -e or as a single unkeyed pattern. */
2431
2432 for (j = 0; j < cmd_pattern_count; j++)
2433 {
2434 if (!compile_pattern(patterns[j], pcre_options, NULL,
2435 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2436 goto EXIT2;
2437 }
2438
2439 /* Compile the regular expressions that are provided in a file. */
2440
2441 if (pattern_filename != NULL)
2442 {
2443 int linenumber = 0;
2444 FILE *f;
2445 char *filename;
2446 char buffer[MBUFTHIRD];
2447
2448 if (strcmp(pattern_filename, "-") == 0)
2449 {
2450 f = stdin;
2451 filename = stdin_name;
2452 }
2453 else
2454 {
2455 f = fopen(pattern_filename, "r");
2456 if (f == NULL)
2457 {
2458 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2459 strerror(errno));
2460 goto EXIT2;
2461 }
2462 filename = pattern_filename;
2463 }
2464
2465 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2466 {
2467 char *s = buffer + (int)strlen(buffer);
2468 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2469 *s = 0;
2470 linenumber++;
2471 if (buffer[0] == 0) continue; /* Skip blank lines */
2472 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2473 goto EXIT2;
2474 }
2475
2476 if (f != stdin) fclose(f);
2477 }
2478
2479 /* Study the regular expressions, as we will be running them many times */
2480
2481 for (j = 0; j < pattern_count; j++)
2482 {
2483 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2484 if (error != NULL)
2485 {
2486 char s[16];
2487 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2488 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2489 goto EXIT2;
2490 }
2491 hint_count++;
2492 }
2493
2494 /* If --match-limit or --recursion-limit was set, put the value(s) into the
2495 pcre_extra block for each pattern. */
2496
2497 if (match_limit > 0 || match_limit_recursion > 0)
2498 {
2499 for (j = 0; j < pattern_count; j++)
2500 {
2501 if (hints_list[j] == NULL)
2502 {
2503 hints_list[j] = malloc(sizeof(pcre_extra));
2504 if (hints_list[j] == NULL)
2505 {
2506 fprintf(stderr, "pcregrep: malloc failed\n");
2507 pcregrep_exit(2);
2508 }
2509 }
2510 if (match_limit > 0)
2511 {
2512 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2513 hints_list[j]->match_limit = match_limit;
2514 }
2515 if (match_limit_recursion > 0)
2516 {
2517 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2518 hints_list[j]->match_limit_recursion = match_limit_recursion;
2519 }
2520 }
2521 }
2522
2523 /* If there are include or exclude patterns, compile them. */
2524
2525 if (exclude_pattern != NULL)
2526 {
2527 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2528 pcretables);
2529 if (exclude_compiled == NULL)
2530 {
2531 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2532 errptr, error);
2533 goto EXIT2;
2534 }
2535 }
2536
2537 if (include_pattern != NULL)
2538 {
2539 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2540 pcretables);
2541 if (include_compiled == NULL)
2542 {
2543 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2544 errptr, error);
2545 goto EXIT2;
2546 }
2547 }
2548
2549 if (exclude_dir_pattern != NULL)
2550 {
2551 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2552 pcretables);
2553 if (exclude_dir_compiled == NULL)
2554 {
2555 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2556 errptr, error);
2557 goto EXIT2;
2558 }
2559 }
2560
2561 if (include_dir_pattern != NULL)
2562 {
2563 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2564 pcretables);
2565 if (include_dir_compiled == NULL)
2566 {
2567 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2568 errptr, error);
2569 goto EXIT2;
2570 }
2571 }
2572
2573 /* If there are no further arguments, do the business on stdin and exit. */
2574
2575 if (i >= argc)
2576 {
2577 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2578 goto EXIT;
2579 }
2580
2581 /* Otherwise, work through the remaining arguments as files or directories.
2582 Pass in the fact that there is only one argument at top level - this suppresses
2583 the file name if the argument is not a directory and filenames are not
2584 otherwise forced. */
2585
2586 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2587
2588 for (; i < argc; i++)
2589 {
2590 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2591 only_one_at_top);
2592 if (frc > 1) rc = frc;
2593 else if (frc == 0 && rc == 1) rc = 0;
2594 }
2595
2596 EXIT:
2597 if (pattern_list != NULL)
2598 {
2599 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2600 free(pattern_list);
2601 }
2602 if (hints_list != NULL)
2603 {
2604 for (i = 0; i < hint_count; i++)
2605 {
2606 if (hints_list[i] != NULL) free(hints_list[i]);
2607 }
2608 free(hints_list);
2609 }
2610 pcregrep_exit(rc);
2611
2612 EXIT2:
2613 rc = 2;
2614 goto EXIT;
2615 }
2616
2617 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5