/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 587 - (show annotations)
Fri Jan 14 19:01:25 2011 UTC (8 years, 10 months ago) by ph10
File MIME type: text/plain
File size: 77110 byte(s)
Fix -M bugs in pcregrep
1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2011 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65
66 #include "pcre.h"
67
68 #define FALSE 0
69 #define TRUE 1
70
71 typedef int BOOL;
72
73 #define MAX_PATTERN_COUNT 100
74 #define OFFSET_SIZE 99
75
76 #if BUFSIZ > 8192
77 #define MBUFTHIRD BUFSIZ
78 #else
79 #define MBUFTHIRD 8192
80 #endif
81
82 /* Values for the "filenames" variable, which specifies options for file name
83 output. The order is important; it is assumed that a file name is wanted for
84 all values greater than FN_DEFAULT. */
85
86 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87
88 /* File reading styles */
89
90 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91
92 /* Actions for the -d and -D options */
93
94 enum { dee_READ, dee_SKIP, dee_RECURSE };
95 enum { DEE_READ, DEE_SKIP };
96
97 /* Actions for special processing options (flag bits) */
98
99 #define PO_WORD_MATCH 0x0001
100 #define PO_LINE_MATCH 0x0002
101 #define PO_FIXED_STRINGS 0x0004
102
103 /* Line ending types */
104
105 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106
107 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108 environments), a warning is issued if the value of fwrite() is ignored.
109 Unfortunately, casting to (void) does not suppress the warning. To get round
110 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 apply to fprintf(). */
112
113 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114
115
116
117 /*************************************************
118 * Global variables *
119 *************************************************/
120
121 /* Jeffrey Friedl has some debugging requirements that are not part of the
122 regular code. */
123
124 #ifdef JFRIEDL_DEBUG
125 static int S_arg = -1;
126 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128 static const char *jfriedl_prefix = "";
129 static const char *jfriedl_postfix = "";
130 #endif
131
132 static int endlinetype;
133
134 static char *colour_string = (char *)"1;31";
135 static char *colour_option = NULL;
136 static char *dee_option = NULL;
137 static char *DEE_option = NULL;
138 static char *newline = NULL;
139 static char *pattern_filename = NULL;
140 static char *stdin_name = (char *)"(standard input)";
141 static char *locale = NULL;
142
143 static const unsigned char *pcretables = NULL;
144
145 static int pattern_count = 0;
146 static pcre **pattern_list = NULL;
147 static pcre_extra **hints_list = NULL;
148
149 static char *include_pattern = NULL;
150 static char *exclude_pattern = NULL;
151 static char *include_dir_pattern = NULL;
152 static char *exclude_dir_pattern = NULL;
153
154 static pcre *include_compiled = NULL;
155 static pcre *exclude_compiled = NULL;
156 static pcre *include_dir_compiled = NULL;
157 static pcre *exclude_dir_compiled = NULL;
158
159 static int after_context = 0;
160 static int before_context = 0;
161 static int both_context = 0;
162 static int dee_action = dee_READ;
163 static int DEE_action = DEE_READ;
164 static int error_count = 0;
165 static int filenames = FN_DEFAULT;
166 static int only_matching = -1;
167 static int process_options = 0;
168
169 static unsigned long int match_limit = 0;
170 static unsigned long int match_limit_recursion = 0;
171
172 static BOOL count_only = FALSE;
173 static BOOL do_colour = FALSE;
174 static BOOL file_offsets = FALSE;
175 static BOOL hyphenpending = FALSE;
176 static BOOL invert = FALSE;
177 static BOOL line_buffered = FALSE;
178 static BOOL line_offsets = FALSE;
179 static BOOL multiline = FALSE;
180 static BOOL number = FALSE;
181 static BOOL omit_zero_count = FALSE;
182 static BOOL resource_error = FALSE;
183 static BOOL quiet = FALSE;
184 static BOOL silent = FALSE;
185 static BOOL utf8 = FALSE;
186
187 /* Structure for options and list of them */
188
189 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
190 OP_OP_NUMBER, OP_PATLIST };
191
192 typedef struct option_item {
193 int type;
194 int one_char;
195 void *dataptr;
196 const char *long_name;
197 const char *help_text;
198 } option_item;
199
200 /* Options without a single-letter equivalent get a negative value. This can be
201 used to identify them. */
202
203 #define N_COLOUR (-1)
204 #define N_EXCLUDE (-2)
205 #define N_EXCLUDE_DIR (-3)
206 #define N_HELP (-4)
207 #define N_INCLUDE (-5)
208 #define N_INCLUDE_DIR (-6)
209 #define N_LABEL (-7)
210 #define N_LOCALE (-8)
211 #define N_NULL (-9)
212 #define N_LOFFSETS (-10)
213 #define N_FOFFSETS (-11)
214 #define N_LBUFFER (-12)
215 #define N_M_LIMIT (-13)
216 #define N_M_LIMIT_REC (-14)
217
218 static option_item optionlist[] = {
219 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
220 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
221 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
222 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
223 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
224 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
225 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
226 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
227 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
228 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
229 { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
230 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
231 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
232 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
233 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
234 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
235 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
236 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
237 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
238 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
239 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
240 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
241 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
242 { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
243 { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
244 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
245 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
246 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
247 { OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" },
248 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
249 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
250 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
251 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
252 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
253 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
254
255 /* These two were accidentally implemented with underscores instead of
256 hyphens in the option names. As this was not discovered for several releases,
257 the incorrect versions are left in the table for compatibility. However, the
258 --help function misses out any option that has an underscore in its name. */
259
260 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
261 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
262
263 #ifdef JFRIEDL_DEBUG
264 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
265 #endif
266 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
267 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
268 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
269 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
270 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
271 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
272 { OP_NODATA, 0, NULL, NULL, NULL }
273 };
274
275 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
276 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
277 that the combination of -w and -x has the same effect as -x on its own, so we
278 can treat them as the same. */
279
280 static const char *prefix[] = {
281 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
282
283 static const char *suffix[] = {
284 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
285
286 /* UTF-8 tables - used only when the newline setting is "any". */
287
288 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
289
290 const char utf8_table4[] = {
291 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
292 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
293 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
294 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
295
296
297
298 /*************************************************
299 * Exit from the program *
300 *************************************************/
301
302 /* If there has been a resource error, give a suitable message.
303
304 Argument: the return code
305 Returns: does not return
306 */
307
308 static void
309 pcregrep_exit(int rc)
310 {
311 if (resource_error)
312 {
313 fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
314 "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
315 fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
316 }
317
318 exit(rc);
319 }
320
321
322 /*************************************************
323 * OS-specific functions *
324 *************************************************/
325
326 /* These functions are defined so that they can be made system specific,
327 although at present the only ones are for Unix, Win32, and for "no support". */
328
329
330 /************* Directory scanning in Unix ***********/
331
332 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
333 #include <sys/types.h>
334 #include <sys/stat.h>
335 #include <dirent.h>
336
337 typedef DIR directory_type;
338
339 static int
340 isdirectory(char *filename)
341 {
342 struct stat statbuf;
343 if (stat(filename, &statbuf) < 0)
344 return 0; /* In the expectation that opening as a file will fail */
345 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
346 }
347
348 static directory_type *
349 opendirectory(char *filename)
350 {
351 return opendir(filename);
352 }
353
354 static char *
355 readdirectory(directory_type *dir)
356 {
357 for (;;)
358 {
359 struct dirent *dent = readdir(dir);
360 if (dent == NULL) return NULL;
361 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
362 return dent->d_name;
363 }
364 /* Control never reaches here */
365 }
366
367 static void
368 closedirectory(directory_type *dir)
369 {
370 closedir(dir);
371 }
372
373
374 /************* Test for regular file in Unix **********/
375
376 static int
377 isregfile(char *filename)
378 {
379 struct stat statbuf;
380 if (stat(filename, &statbuf) < 0)
381 return 1; /* In the expectation that opening as a file will fail */
382 return (statbuf.st_mode & S_IFMT) == S_IFREG;
383 }
384
385
386 /************* Test for a terminal in Unix **********/
387
388 static BOOL
389 is_stdout_tty(void)
390 {
391 return isatty(fileno(stdout));
392 }
393
394 static BOOL
395 is_file_tty(FILE *f)
396 {
397 return isatty(fileno(f));
398 }
399
400
401 /************* Directory scanning in Win32 ***********/
402
403 /* I (Philip Hazel) have no means of testing this code. It was contributed by
404 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
405 when it did not exist. David Byron added a patch that moved the #include of
406 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
407 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
408 undefined when it is indeed undefined. */
409
410 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
411
412 #ifndef STRICT
413 # define STRICT
414 #endif
415 #ifndef WIN32_LEAN_AND_MEAN
416 # define WIN32_LEAN_AND_MEAN
417 #endif
418
419 #include <windows.h>
420
421 #ifndef INVALID_FILE_ATTRIBUTES
422 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
423 #endif
424
425 typedef struct directory_type
426 {
427 HANDLE handle;
428 BOOL first;
429 WIN32_FIND_DATA data;
430 } directory_type;
431
432 int
433 isdirectory(char *filename)
434 {
435 DWORD attr = GetFileAttributes(filename);
436 if (attr == INVALID_FILE_ATTRIBUTES)
437 return 0;
438 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
439 }
440
441 directory_type *
442 opendirectory(char *filename)
443 {
444 size_t len;
445 char *pattern;
446 directory_type *dir;
447 DWORD err;
448 len = strlen(filename);
449 pattern = (char *) malloc(len + 3);
450 dir = (directory_type *) malloc(sizeof(*dir));
451 if ((pattern == NULL) || (dir == NULL))
452 {
453 fprintf(stderr, "pcregrep: malloc failed\n");
454 pcregrep_exit(2);
455 }
456 memcpy(pattern, filename, len);
457 memcpy(&(pattern[len]), "\\*", 3);
458 dir->handle = FindFirstFile(pattern, &(dir->data));
459 if (dir->handle != INVALID_HANDLE_VALUE)
460 {
461 free(pattern);
462 dir->first = TRUE;
463 return dir;
464 }
465 err = GetLastError();
466 free(pattern);
467 free(dir);
468 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
469 return NULL;
470 }
471
472 char *
473 readdirectory(directory_type *dir)
474 {
475 for (;;)
476 {
477 if (!dir->first)
478 {
479 if (!FindNextFile(dir->handle, &(dir->data)))
480 return NULL;
481 }
482 else
483 {
484 dir->first = FALSE;
485 }
486 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
487 return dir->data.cFileName;
488 }
489 #ifndef _MSC_VER
490 return NULL; /* Keep compiler happy; never executed */
491 #endif
492 }
493
494 void
495 closedirectory(directory_type *dir)
496 {
497 FindClose(dir->handle);
498 free(dir);
499 }
500
501
502 /************* Test for regular file in Win32 **********/
503
504 /* I don't know how to do this, or if it can be done; assume all paths are
505 regular if they are not directories. */
506
507 int isregfile(char *filename)
508 {
509 return !isdirectory(filename);
510 }
511
512
513 /************* Test for a terminal in Win32 **********/
514
515 /* I don't know how to do this; assume never */
516
517 static BOOL
518 is_stdout_tty(void)
519 {
520 return FALSE;
521 }
522
523 static BOOL
524 is_file_tty(FILE *f)
525 {
526 return FALSE;
527 }
528
529
530 /************* Directory scanning when we can't do it ***********/
531
532 /* The type is void, and apart from isdirectory(), the functions do nothing. */
533
534 #else
535
536 typedef void directory_type;
537
538 int isdirectory(char *filename) { return 0; }
539 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
540 char *readdirectory(directory_type *dir) { return (char*)0;}
541 void closedirectory(directory_type *dir) {}
542
543
544 /************* Test for regular when we can't do it **********/
545
546 /* Assume all files are regular. */
547
548 int isregfile(char *filename) { return 1; }
549
550
551 /************* Test for a terminal when we can't do it **********/
552
553 static BOOL
554 is_stdout_tty(void)
555 {
556 return FALSE;
557 }
558
559 static BOOL
560 is_file_tty(FILE *f)
561 {
562 return FALSE;
563 }
564
565 #endif
566
567
568
569 #ifndef HAVE_STRERROR
570 /*************************************************
571 * Provide strerror() for non-ANSI libraries *
572 *************************************************/
573
574 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
575 in their libraries, but can provide the same facility by this simple
576 alternative function. */
577
578 extern int sys_nerr;
579 extern char *sys_errlist[];
580
581 char *
582 strerror(int n)
583 {
584 if (n < 0 || n >= sys_nerr) return "unknown error number";
585 return sys_errlist[n];
586 }
587 #endif /* HAVE_STRERROR */
588
589
590
591 /*************************************************
592 * Read one line of input *
593 *************************************************/
594
595 /* Normally, input is read using fread() into a large buffer, so many lines may
596 be read at once. However, doing this for tty input means that no output appears
597 until a lot of input has been typed. Instead, tty input is handled line by
598 line. We cannot use fgets() for this, because it does not stop at a binary
599 zero, and therefore there is no way of telling how many characters it has read,
600 because there may be binary zeros embedded in the data.
601
602 Arguments:
603 buffer the buffer to read into
604 length the maximum number of characters to read
605 f the file
606
607 Returns: the number of characters read, zero at end of file
608 */
609
610 static int
611 read_one_line(char *buffer, int length, FILE *f)
612 {
613 int c;
614 int yield = 0;
615 while ((c = fgetc(f)) != EOF)
616 {
617 buffer[yield++] = c;
618 if (c == '\n' || yield >= length) break;
619 }
620 return yield;
621 }
622
623
624
625 /*************************************************
626 * Find end of line *
627 *************************************************/
628
629 /* The length of the endline sequence that is found is set via lenptr. This may
630 be zero at the very end of the file if there is no line-ending sequence there.
631
632 Arguments:
633 p current position in line
634 endptr end of available data
635 lenptr where to put the length of the eol sequence
636
637 Returns: pointer to the last byte of the line, including the newline byte(s)
638 */
639
640 static char *
641 end_of_line(char *p, char *endptr, int *lenptr)
642 {
643 switch(endlinetype)
644 {
645 default: /* Just in case */
646 case EL_LF:
647 while (p < endptr && *p != '\n') p++;
648 if (p < endptr)
649 {
650 *lenptr = 1;
651 return p + 1;
652 }
653 *lenptr = 0;
654 return endptr;
655
656 case EL_CR:
657 while (p < endptr && *p != '\r') p++;
658 if (p < endptr)
659 {
660 *lenptr = 1;
661 return p + 1;
662 }
663 *lenptr = 0;
664 return endptr;
665
666 case EL_CRLF:
667 for (;;)
668 {
669 while (p < endptr && *p != '\r') p++;
670 if (++p >= endptr)
671 {
672 *lenptr = 0;
673 return endptr;
674 }
675 if (*p == '\n')
676 {
677 *lenptr = 2;
678 return p + 1;
679 }
680 }
681 break;
682
683 case EL_ANYCRLF:
684 while (p < endptr)
685 {
686 int extra = 0;
687 register int c = *((unsigned char *)p);
688
689 if (utf8 && c >= 0xc0)
690 {
691 int gcii, gcss;
692 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
693 gcss = 6*extra;
694 c = (c & utf8_table3[extra]) << gcss;
695 for (gcii = 1; gcii <= extra; gcii++)
696 {
697 gcss -= 6;
698 c |= (p[gcii] & 0x3f) << gcss;
699 }
700 }
701
702 p += 1 + extra;
703
704 switch (c)
705 {
706 case 0x0a: /* LF */
707 *lenptr = 1;
708 return p;
709
710 case 0x0d: /* CR */
711 if (p < endptr && *p == 0x0a)
712 {
713 *lenptr = 2;
714 p++;
715 }
716 else *lenptr = 1;
717 return p;
718
719 default:
720 break;
721 }
722 } /* End of loop for ANYCRLF case */
723
724 *lenptr = 0; /* Must have hit the end */
725 return endptr;
726
727 case EL_ANY:
728 while (p < endptr)
729 {
730 int extra = 0;
731 register int c = *((unsigned char *)p);
732
733 if (utf8 && c >= 0xc0)
734 {
735 int gcii, gcss;
736 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
737 gcss = 6*extra;
738 c = (c & utf8_table3[extra]) << gcss;
739 for (gcii = 1; gcii <= extra; gcii++)
740 {
741 gcss -= 6;
742 c |= (p[gcii] & 0x3f) << gcss;
743 }
744 }
745
746 p += 1 + extra;
747
748 switch (c)
749 {
750 case 0x0a: /* LF */
751 case 0x0b: /* VT */
752 case 0x0c: /* FF */
753 *lenptr = 1;
754 return p;
755
756 case 0x0d: /* CR */
757 if (p < endptr && *p == 0x0a)
758 {
759 *lenptr = 2;
760 p++;
761 }
762 else *lenptr = 1;
763 return p;
764
765 case 0x85: /* NEL */
766 *lenptr = utf8? 2 : 1;
767 return p;
768
769 case 0x2028: /* LS */
770 case 0x2029: /* PS */
771 *lenptr = 3;
772 return p;
773
774 default:
775 break;
776 }
777 } /* End of loop for ANY case */
778
779 *lenptr = 0; /* Must have hit the end */
780 return endptr;
781 } /* End of overall switch */
782 }
783
784
785
786 /*************************************************
787 * Find start of previous line *
788 *************************************************/
789
790 /* This is called when looking back for before lines to print.
791
792 Arguments:
793 p start of the subsequent line
794 startptr start of available data
795
796 Returns: pointer to the start of the previous line
797 */
798
799 static char *
800 previous_line(char *p, char *startptr)
801 {
802 switch(endlinetype)
803 {
804 default: /* Just in case */
805 case EL_LF:
806 p--;
807 while (p > startptr && p[-1] != '\n') p--;
808 return p;
809
810 case EL_CR:
811 p--;
812 while (p > startptr && p[-1] != '\n') p--;
813 return p;
814
815 case EL_CRLF:
816 for (;;)
817 {
818 p -= 2;
819 while (p > startptr && p[-1] != '\n') p--;
820 if (p <= startptr + 1 || p[-2] == '\r') return p;
821 }
822 return p; /* But control should never get here */
823
824 case EL_ANY:
825 case EL_ANYCRLF:
826 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
827 if (utf8) while ((*p & 0xc0) == 0x80) p--;
828
829 while (p > startptr)
830 {
831 register int c;
832 char *pp = p - 1;
833
834 if (utf8)
835 {
836 int extra = 0;
837 while ((*pp & 0xc0) == 0x80) pp--;
838 c = *((unsigned char *)pp);
839 if (c >= 0xc0)
840 {
841 int gcii, gcss;
842 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
843 gcss = 6*extra;
844 c = (c & utf8_table3[extra]) << gcss;
845 for (gcii = 1; gcii <= extra; gcii++)
846 {
847 gcss -= 6;
848 c |= (pp[gcii] & 0x3f) << gcss;
849 }
850 }
851 }
852 else c = *((unsigned char *)pp);
853
854 if (endlinetype == EL_ANYCRLF) switch (c)
855 {
856 case 0x0a: /* LF */
857 case 0x0d: /* CR */
858 return p;
859
860 default:
861 break;
862 }
863
864 else switch (c)
865 {
866 case 0x0a: /* LF */
867 case 0x0b: /* VT */
868 case 0x0c: /* FF */
869 case 0x0d: /* CR */
870 case 0x85: /* NEL */
871 case 0x2028: /* LS */
872 case 0x2029: /* PS */
873 return p;
874
875 default:
876 break;
877 }
878
879 p = pp; /* Back one character */
880 } /* End of loop for ANY case */
881
882 return startptr; /* Hit start of data */
883 } /* End of overall switch */
884 }
885
886
887
888
889
890 /*************************************************
891 * Print the previous "after" lines *
892 *************************************************/
893
894 /* This is called if we are about to lose said lines because of buffer filling,
895 and at the end of the file. The data in the line is written using fwrite() so
896 that a binary zero does not terminate it.
897
898 Arguments:
899 lastmatchnumber the number of the last matching line, plus one
900 lastmatchrestart where we restarted after the last match
901 endptr end of available data
902 printname filename for printing
903
904 Returns: nothing
905 */
906
907 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
908 char *endptr, char *printname)
909 {
910 if (after_context > 0 && lastmatchnumber > 0)
911 {
912 int count = 0;
913 while (lastmatchrestart < endptr && count++ < after_context)
914 {
915 int ellength;
916 char *pp = lastmatchrestart;
917 if (printname != NULL) fprintf(stdout, "%s-", printname);
918 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
919 pp = end_of_line(pp, endptr, &ellength);
920 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
921 lastmatchrestart = pp;
922 }
923 hyphenpending = TRUE;
924 }
925 }
926
927
928
929 /*************************************************
930 * Apply patterns to subject till one matches *
931 *************************************************/
932
933 /* This function is called to run through all patterns, looking for a match. It
934 is used multiple times for the same subject when colouring is enabled, in order
935 to find all possible matches.
936
937 Arguments:
938 matchptr the start of the subject
939 length the length of the subject to match
940 offsets the offets vector to fill in
941 mrc address of where to put the result of pcre_exec()
942
943 Returns: TRUE if there was a match
944 FALSE if there was no match
945 invert if there was a non-fatal error
946 */
947
948 static BOOL
949 match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
950 {
951 int i;
952 size_t slen = length;
953 const char *msg = "this text:\n\n";
954 if (slen > 200)
955 {
956 slen = 200;
957 msg = "text that starts:\n\n";
958 }
959 for (i = 0; i < pattern_count; i++)
960 {
961 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
962 PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
963 if (*mrc >= 0) return TRUE;
964 if (*mrc == PCRE_ERROR_NOMATCH) continue;
965 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
966 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
967 fprintf(stderr, "%s", msg);
968 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
969 fprintf(stderr, "\n\n");
970 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
971 resource_error = TRUE;
972 if (error_count++ > 20)
973 {
974 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
975 pcregrep_exit(2);
976 }
977 return invert; /* No more matching; don't show the line again */
978 }
979
980 return FALSE; /* No match, no errors */
981 }
982
983
984
985 /*************************************************
986 * Grep an individual file *
987 *************************************************/
988
989 /* This is called from grep_or_recurse() below. It uses a buffer that is three
990 times the value of MBUFTHIRD. The matching point is never allowed to stray into
991 the top third of the buffer, thus keeping more of the file available for
992 context printing or for multiline scanning. For large files, the pointer will
993 be in the middle third most of the time, so the bottom third is available for
994 "before" context printing.
995
996 Arguments:
997 handle the fopened FILE stream for a normal file
998 the gzFile pointer when reading is via libz
999 the BZFILE pointer when reading is via libbz2
1000 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1001 printname the file name if it is to be printed for each match
1002 or NULL if the file name is not to be printed
1003 it cannot be NULL if filenames[_nomatch]_only is set
1004
1005 Returns: 0 if there was at least one match
1006 1 otherwise (no matches)
1007 2 if there is a read error on a .bz2 file
1008 */
1009
1010 static int
1011 pcregrep(void *handle, int frtype, char *printname)
1012 {
1013 int rc = 1;
1014 int linenumber = 1;
1015 int lastmatchnumber = 0;
1016 int count = 0;
1017 int filepos = 0;
1018 int offsets[OFFSET_SIZE];
1019 char *lastmatchrestart = NULL;
1020 char buffer[3*MBUFTHIRD];
1021 char *ptr = buffer;
1022 char *endptr;
1023 size_t bufflength;
1024 BOOL endhyphenpending = FALSE;
1025 BOOL input_line_buffered = line_buffered;
1026 FILE *in = NULL; /* Ensure initialized */
1027
1028 #ifdef SUPPORT_LIBZ
1029 gzFile ingz = NULL;
1030 #endif
1031
1032 #ifdef SUPPORT_LIBBZ2
1033 BZFILE *inbz2 = NULL;
1034 #endif
1035
1036
1037 /* Do the first read into the start of the buffer and set up the pointer to end
1038 of what we have. In the case of libz, a non-zipped .gz file will be read as a
1039 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1040 fail. */
1041
1042 #ifdef SUPPORT_LIBZ
1043 if (frtype == FR_LIBZ)
1044 {
1045 ingz = (gzFile)handle;
1046 bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1047 }
1048 else
1049 #endif
1050
1051 #ifdef SUPPORT_LIBBZ2
1052 if (frtype == FR_LIBBZ2)
1053 {
1054 inbz2 = (BZFILE *)handle;
1055 bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1056 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1057 } /* without the cast it is unsigned. */
1058 else
1059 #endif
1060
1061 {
1062 in = (FILE *)handle;
1063 if (is_file_tty(in)) input_line_buffered = TRUE;
1064 bufflength = input_line_buffered?
1065 read_one_line(buffer, 3*MBUFTHIRD, in) :
1066 fread(buffer, 1, 3*MBUFTHIRD, in);
1067 }
1068
1069 endptr = buffer + bufflength;
1070
1071 /* Loop while the current pointer is not at the end of the file. For large
1072 files, endptr will be at the end of the buffer when we are in the middle of the
1073 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1074 way, the buffer is shifted left and re-filled. */
1075
1076 while (ptr < endptr)
1077 {
1078 int endlinelength;
1079 int mrc = 0;
1080 BOOL match;
1081 char *matchptr = ptr;
1082 char *t = ptr;
1083 size_t length, linelength;
1084
1085 /* At this point, ptr is at the start of a line. We need to find the length
1086 of the subject string to pass to pcre_exec(). In multiline mode, it is the
1087 length remainder of the data in the buffer. Otherwise, it is the length of
1088 the next line, excluding the terminating newline. After matching, we always
1089 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1090 option is used for compiling, so that any match is constrained to be in the
1091 first line. */
1092
1093 t = end_of_line(t, endptr, &endlinelength);
1094 linelength = t - ptr - endlinelength;
1095 length = multiline? (size_t)(endptr - ptr) : linelength;
1096
1097 /* Extra processing for Jeffrey Friedl's debugging. */
1098
1099 #ifdef JFRIEDL_DEBUG
1100 if (jfriedl_XT || jfriedl_XR)
1101 {
1102 #include <sys/time.h>
1103 #include <time.h>
1104 struct timeval start_time, end_time;
1105 struct timezone dummy;
1106 int i;
1107
1108 if (jfriedl_XT)
1109 {
1110 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1111 const char *orig = ptr;
1112 ptr = malloc(newlen + 1);
1113 if (!ptr) {
1114 printf("out of memory");
1115 pcregrep_exit(2);
1116 }
1117 endptr = ptr;
1118 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1119 for (i = 0; i < jfriedl_XT; i++) {
1120 strncpy(endptr, orig, length);
1121 endptr += length;
1122 }
1123 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1124 length = newlen;
1125 }
1126
1127 if (gettimeofday(&start_time, &dummy) != 0)
1128 perror("bad gettimeofday");
1129
1130
1131 for (i = 0; i < jfriedl_XR; i++)
1132 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1133 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1134
1135 if (gettimeofday(&end_time, &dummy) != 0)
1136 perror("bad gettimeofday");
1137
1138 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1139 -
1140 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1141
1142 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1143 return 0;
1144 }
1145 #endif
1146
1147 /* We come back here after a match when the -o option (only_matching) is set,
1148 in order to find any further matches in the same line. */
1149
1150 ONLY_MATCHING_RESTART:
1151
1152 /* Run through all the patterns until one matches or there is an error other
1153 than NOMATCH. This code is in a subroutine so that it can be re-used for
1154 finding subsequent matches when colouring matched lines. */
1155
1156 match = match_patterns(matchptr, length, offsets, &mrc);
1157
1158 /* If it's a match or a not-match (as required), do what's wanted. */
1159
1160 if (match != invert)
1161 {
1162 BOOL hyphenprinted = FALSE;
1163
1164 /* We've failed if we want a file that doesn't have any matches. */
1165
1166 if (filenames == FN_NOMATCH_ONLY) return 1;
1167
1168 /* Just count if just counting is wanted. */
1169
1170 if (count_only) count++;
1171
1172 /* If all we want is a file name, there is no need to scan any more lines
1173 in the file. */
1174
1175 else if (filenames == FN_MATCH_ONLY)
1176 {
1177 fprintf(stdout, "%s\n", printname);
1178 return 0;
1179 }
1180
1181 /* Likewise, if all we want is a yes/no answer. */
1182
1183 else if (quiet) return 0;
1184
1185 /* The --only-matching option prints just the substring that matched, or a
1186 captured portion of it, as long as this string is not empty, and the
1187 --file-offsets and --line-offsets options output offsets for the matching
1188 substring (they both force --only-matching = 0). None of these options
1189 prints any context. Afterwards, adjust the start and length, and then jump
1190 back to look for further matches in the same line. If we are in invert
1191 mode, however, nothing is printed and we do not restart - this could still
1192 be useful because the return code is set. */
1193
1194 else if (only_matching >= 0)
1195 {
1196 if (!invert)
1197 {
1198 if (printname != NULL) fprintf(stdout, "%s:", printname);
1199 if (number) fprintf(stdout, "%d:", linenumber);
1200 if (line_offsets)
1201 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1202 offsets[1] - offsets[0]);
1203 else if (file_offsets)
1204 fprintf(stdout, "%d,%d\n",
1205 (int)(filepos + matchptr + offsets[0] - ptr),
1206 offsets[1] - offsets[0]);
1207 else if (only_matching < mrc)
1208 {
1209 int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1210 if (plen > 0)
1211 {
1212 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1213 FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1214 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1215 fprintf(stdout, "\n");
1216 }
1217 }
1218 else if (printname != NULL || number) fprintf(stdout, "\n");
1219 matchptr += offsets[1];
1220 length -= offsets[1];
1221 match = FALSE;
1222 if (line_buffered) fflush(stdout);
1223 rc = 0; /* Had some success */
1224 goto ONLY_MATCHING_RESTART;
1225 }
1226 }
1227
1228 /* This is the default case when none of the above options is set. We print
1229 the matching lines(s), possibly preceded and/or followed by other lines of
1230 context. */
1231
1232 else
1233 {
1234 /* See if there is a requirement to print some "after" lines from a
1235 previous match. We never print any overlaps. */
1236
1237 if (after_context > 0 && lastmatchnumber > 0)
1238 {
1239 int ellength;
1240 int linecount = 0;
1241 char *p = lastmatchrestart;
1242
1243 while (p < ptr && linecount < after_context)
1244 {
1245 p = end_of_line(p, ptr, &ellength);
1246 linecount++;
1247 }
1248
1249 /* It is important to advance lastmatchrestart during this printing so
1250 that it interacts correctly with any "before" printing below. Print
1251 each line's data using fwrite() in case there are binary zeroes. */
1252
1253 while (lastmatchrestart < p)
1254 {
1255 char *pp = lastmatchrestart;
1256 if (printname != NULL) fprintf(stdout, "%s-", printname);
1257 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1258 pp = end_of_line(pp, endptr, &ellength);
1259 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1260 lastmatchrestart = pp;
1261 }
1262 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1263 }
1264
1265 /* If there were non-contiguous lines printed above, insert hyphens. */
1266
1267 if (hyphenpending)
1268 {
1269 fprintf(stdout, "--\n");
1270 hyphenpending = FALSE;
1271 hyphenprinted = TRUE;
1272 }
1273
1274 /* See if there is a requirement to print some "before" lines for this
1275 match. Again, don't print overlaps. */
1276
1277 if (before_context > 0)
1278 {
1279 int linecount = 0;
1280 char *p = ptr;
1281
1282 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1283 linecount < before_context)
1284 {
1285 linecount++;
1286 p = previous_line(p, buffer);
1287 }
1288
1289 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1290 fprintf(stdout, "--\n");
1291
1292 while (p < ptr)
1293 {
1294 int ellength;
1295 char *pp = p;
1296 if (printname != NULL) fprintf(stdout, "%s-", printname);
1297 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1298 pp = end_of_line(pp, endptr, &ellength);
1299 FWRITE(p, 1, pp - p, stdout);
1300 p = pp;
1301 }
1302 }
1303
1304 /* Now print the matching line(s); ensure we set hyphenpending at the end
1305 of the file if any context lines are being output. */
1306
1307 if (after_context > 0 || before_context > 0)
1308 endhyphenpending = TRUE;
1309
1310 if (printname != NULL) fprintf(stdout, "%s:", printname);
1311 if (number) fprintf(stdout, "%d:", linenumber);
1312
1313 /* In multiline mode, we want to print to the end of the line in which
1314 the end of the matched string is found, so we adjust linelength and the
1315 line number appropriately, but only when there actually was a match
1316 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1317 the match will always be before the first newline sequence. */
1318
1319 if (multiline & !invert)
1320 {
1321 char *endmatch = ptr + offsets[1];
1322 t = ptr;
1323 while (t < endmatch)
1324 {
1325 t = end_of_line(t, endptr, &endlinelength);
1326 if (t < endmatch) linenumber++; else break;
1327 }
1328 linelength = t - ptr - endlinelength;
1329 }
1330
1331 /*** NOTE: Use only fwrite() to output the data line, so that binary
1332 zeroes are treated as just another data character. */
1333
1334 /* This extra option, for Jeffrey Friedl's debugging requirements,
1335 replaces the matched string, or a specific captured string if it exists,
1336 with X. When this happens, colouring is ignored. */
1337
1338 #ifdef JFRIEDL_DEBUG
1339 if (S_arg >= 0 && S_arg < mrc)
1340 {
1341 int first = S_arg * 2;
1342 int last = first + 1;
1343 FWRITE(ptr, 1, offsets[first], stdout);
1344 fprintf(stdout, "X");
1345 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1346 }
1347 else
1348 #endif
1349
1350 /* We have to split the line(s) up if colouring, and search for further
1351 matches, but not of course if the line is a non-match. */
1352
1353 if (do_colour && !invert)
1354 {
1355 int plength;
1356 int last_offset = 0;
1357 FWRITE(ptr, 1, offsets[0], stdout);
1358 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1359 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1360 fprintf(stdout, "%c[00m", 0x1b);
1361 for (;;)
1362 {
1363 last_offset += offsets[1];
1364 matchptr += offsets[1];
1365 length -= offsets[1];
1366 if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1367 FWRITE(matchptr, 1, offsets[0], stdout);
1368 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1369 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1370 fprintf(stdout, "%c[00m", 0x1b);
1371 }
1372
1373 /* In multiline mode, we may have already printed the complete line
1374 and its line-ending characters (if they matched the pattern), so there
1375 may be no more to print. */
1376
1377 plength = (linelength + endlinelength) - last_offset;
1378 if (plength > 0)
1379 FWRITE(ptr + last_offset, 1, plength, stdout);
1380 }
1381
1382 /* Not colouring; no need to search for further matches */
1383
1384 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1385 }
1386
1387 /* End of doing what has to be done for a match. If --line-buffered was
1388 given, flush the output. */
1389
1390 if (line_buffered) fflush(stdout);
1391 rc = 0; /* Had some success */
1392
1393 /* Remember where the last match happened for after_context. We remember
1394 where we are about to restart, and that line's number. */
1395
1396 lastmatchrestart = ptr + linelength + endlinelength;
1397 lastmatchnumber = linenumber + 1;
1398 }
1399
1400 /* For a match in multiline inverted mode (which of course did not cause
1401 anything to be printed), we have to move on to the end of the match before
1402 proceeding. */
1403
1404 if (multiline && invert && match)
1405 {
1406 int ellength;
1407 char *endmatch = ptr + offsets[1];
1408 t = ptr;
1409 while (t < endmatch)
1410 {
1411 t = end_of_line(t, endptr, &ellength);
1412 if (t <= endmatch) linenumber++; else break;
1413 }
1414 endmatch = end_of_line(endmatch, endptr, &ellength);
1415 linelength = endmatch - ptr - ellength;
1416 }
1417
1418 /* Advance to after the newline and increment the line number. The file
1419 offset to the current line is maintained in filepos. */
1420
1421 ptr += linelength + endlinelength;
1422 filepos += (int)(linelength + endlinelength);
1423 linenumber++;
1424
1425 /* If input is line buffered, and the buffer is not yet full, read another
1426 line and add it into the buffer. */
1427
1428 if (input_line_buffered && bufflength < sizeof(buffer))
1429 {
1430 int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1431 bufflength += add;
1432 endptr += add;
1433 }
1434
1435 /* If we haven't yet reached the end of the file (the buffer is full), and
1436 the current point is in the top 1/3 of the buffer, slide the buffer down by
1437 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1438 about to be lost, print them. */
1439
1440 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1441 {
1442 if (after_context > 0 &&
1443 lastmatchnumber > 0 &&
1444 lastmatchrestart < buffer + MBUFTHIRD)
1445 {
1446 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1447 lastmatchnumber = 0;
1448 }
1449
1450 /* Now do the shuffle */
1451
1452 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1453 ptr -= MBUFTHIRD;
1454
1455 #ifdef SUPPORT_LIBZ
1456 if (frtype == FR_LIBZ)
1457 bufflength = 2*MBUFTHIRD +
1458 gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1459 else
1460 #endif
1461
1462 #ifdef SUPPORT_LIBBZ2
1463 if (frtype == FR_LIBBZ2)
1464 bufflength = 2*MBUFTHIRD +
1465 BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1466 else
1467 #endif
1468
1469 bufflength = 2*MBUFTHIRD +
1470 (input_line_buffered?
1471 read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1472 fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1473 endptr = buffer + bufflength;
1474
1475 /* Adjust any last match point */
1476
1477 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1478 }
1479 } /* Loop through the whole file */
1480
1481 /* End of file; print final "after" lines if wanted; do_after_lines sets
1482 hyphenpending if it prints something. */
1483
1484 if (only_matching < 0 && !count_only)
1485 {
1486 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1487 hyphenpending |= endhyphenpending;
1488 }
1489
1490 /* Print the file name if we are looking for those without matches and there
1491 were none. If we found a match, we won't have got this far. */
1492
1493 if (filenames == FN_NOMATCH_ONLY)
1494 {
1495 fprintf(stdout, "%s\n", printname);
1496 return 0;
1497 }
1498
1499 /* Print the match count if wanted */
1500
1501 if (count_only)
1502 {
1503 if (count > 0 || !omit_zero_count)
1504 {
1505 if (printname != NULL && filenames != FN_NONE)
1506 fprintf(stdout, "%s:", printname);
1507 fprintf(stdout, "%d\n", count);
1508 }
1509 }
1510
1511 return rc;
1512 }
1513
1514
1515
1516 /*************************************************
1517 * Grep a file or recurse into a directory *
1518 *************************************************/
1519
1520 /* Given a path name, if it's a directory, scan all the files if we are
1521 recursing; if it's a file, grep it.
1522
1523 Arguments:
1524 pathname the path to investigate
1525 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1526 only_one_at_top TRUE if the path is the only one at toplevel
1527
1528 Returns: 0 if there was at least one match
1529 1 if there were no matches
1530 2 there was some kind of error
1531
1532 However, file opening failures are suppressed if "silent" is set.
1533 */
1534
1535 static int
1536 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1537 {
1538 int rc = 1;
1539 int sep;
1540 int frtype;
1541 int pathlen;
1542 void *handle;
1543 FILE *in = NULL; /* Ensure initialized */
1544
1545 #ifdef SUPPORT_LIBZ
1546 gzFile ingz = NULL;
1547 #endif
1548
1549 #ifdef SUPPORT_LIBBZ2
1550 BZFILE *inbz2 = NULL;
1551 #endif
1552
1553 /* If the file name is "-" we scan stdin */
1554
1555 if (strcmp(pathname, "-") == 0)
1556 {
1557 return pcregrep(stdin, FR_PLAIN,
1558 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1559 stdin_name : NULL);
1560 }
1561
1562 /* If the file is a directory, skip if skipping or if we are recursing, scan
1563 each file and directory within it, subject to any include or exclude patterns
1564 that were set. The scanning code is localized so it can be made
1565 system-specific. */
1566
1567 if ((sep = isdirectory(pathname)) != 0)
1568 {
1569 if (dee_action == dee_SKIP) return 1;
1570 if (dee_action == dee_RECURSE)
1571 {
1572 char buffer[1024];
1573 char *nextfile;
1574 directory_type *dir = opendirectory(pathname);
1575
1576 if (dir == NULL)
1577 {
1578 if (!silent)
1579 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1580 strerror(errno));
1581 return 2;
1582 }
1583
1584 while ((nextfile = readdirectory(dir)) != NULL)
1585 {
1586 int frc, nflen;
1587 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1588 nflen = (int)(strlen(nextfile));
1589
1590 if (isdirectory(buffer))
1591 {
1592 if (exclude_dir_compiled != NULL &&
1593 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1594 continue;
1595
1596 if (include_dir_compiled != NULL &&
1597 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1598 continue;
1599 }
1600 else
1601 {
1602 if (exclude_compiled != NULL &&
1603 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1604 continue;
1605
1606 if (include_compiled != NULL &&
1607 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1608 continue;
1609 }
1610
1611 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1612 if (frc > 1) rc = frc;
1613 else if (frc == 0 && rc == 1) rc = 0;
1614 }
1615
1616 closedirectory(dir);
1617 return rc;
1618 }
1619 }
1620
1621 /* If the file is not a directory and not a regular file, skip it if that's
1622 been requested. */
1623
1624 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1625
1626 /* Control reaches here if we have a regular file, or if we have a directory
1627 and recursion or skipping was not requested, or if we have anything else and
1628 skipping was not requested. The scan proceeds. If this is the first and only
1629 argument at top level, we don't show the file name, unless we are only showing
1630 the file name, or the filename was forced (-H). */
1631
1632 pathlen = (int)(strlen(pathname));
1633
1634 /* Open using zlib if it is supported and the file name ends with .gz. */
1635
1636 #ifdef SUPPORT_LIBZ
1637 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1638 {
1639 ingz = gzopen(pathname, "rb");
1640 if (ingz == NULL)
1641 {
1642 if (!silent)
1643 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1644 strerror(errno));
1645 return 2;
1646 }
1647 handle = (void *)ingz;
1648 frtype = FR_LIBZ;
1649 }
1650 else
1651 #endif
1652
1653 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1654
1655 #ifdef SUPPORT_LIBBZ2
1656 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1657 {
1658 inbz2 = BZ2_bzopen(pathname, "rb");
1659 handle = (void *)inbz2;
1660 frtype = FR_LIBBZ2;
1661 }
1662 else
1663 #endif
1664
1665 /* Otherwise use plain fopen(). The label is so that we can come back here if
1666 an attempt to read a .bz2 file indicates that it really is a plain file. */
1667
1668 #ifdef SUPPORT_LIBBZ2
1669 PLAIN_FILE:
1670 #endif
1671 {
1672 in = fopen(pathname, "rb");
1673 handle = (void *)in;
1674 frtype = FR_PLAIN;
1675 }
1676
1677 /* All the opening methods return errno when they fail. */
1678
1679 if (handle == NULL)
1680 {
1681 if (!silent)
1682 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1683 strerror(errno));
1684 return 2;
1685 }
1686
1687 /* Now grep the file */
1688
1689 rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1690 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1691
1692 /* Close in an appropriate manner. */
1693
1694 #ifdef SUPPORT_LIBZ
1695 if (frtype == FR_LIBZ)
1696 gzclose(ingz);
1697 else
1698 #endif
1699
1700 /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1701 read failed. If the error indicates that the file isn't in fact bzipped, try
1702 again as a normal file. */
1703
1704 #ifdef SUPPORT_LIBBZ2
1705 if (frtype == FR_LIBBZ2)
1706 {
1707 if (rc == 2)
1708 {
1709 int errnum;
1710 const char *err = BZ2_bzerror(inbz2, &errnum);
1711 if (errnum == BZ_DATA_ERROR_MAGIC)
1712 {
1713 BZ2_bzclose(inbz2);
1714 goto PLAIN_FILE;
1715 }
1716 else if (!silent)
1717 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1718 pathname, err);
1719 }
1720 BZ2_bzclose(inbz2);
1721 }
1722 else
1723 #endif
1724
1725 /* Normal file close */
1726
1727 fclose(in);
1728
1729 /* Pass back the yield from pcregrep(). */
1730
1731 return rc;
1732 }
1733
1734
1735
1736
1737 /*************************************************
1738 * Usage function *
1739 *************************************************/
1740
1741 static int
1742 usage(int rc)
1743 {
1744 option_item *op;
1745 fprintf(stderr, "Usage: pcregrep [-");
1746 for (op = optionlist; op->one_char != 0; op++)
1747 {
1748 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1749 }
1750 fprintf(stderr, "] [long options] [pattern] [files]\n");
1751 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1752 "options.\n");
1753 return rc;
1754 }
1755
1756
1757
1758
1759 /*************************************************
1760 * Help function *
1761 *************************************************/
1762
1763 static void
1764 help(void)
1765 {
1766 option_item *op;
1767
1768 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1769 printf("Search for PATTERN in each FILE or standard input.\n");
1770 printf("PATTERN must be present if neither -e nor -f is used.\n");
1771 printf("\"-\" can be used as a file name to mean STDIN.\n");
1772
1773 #ifdef SUPPORT_LIBZ
1774 printf("Files whose names end in .gz are read using zlib.\n");
1775 #endif
1776
1777 #ifdef SUPPORT_LIBBZ2
1778 printf("Files whose names end in .bz2 are read using bzlib2.\n");
1779 #endif
1780
1781 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1782 printf("Other files and the standard input are read as plain files.\n\n");
1783 #else
1784 printf("All files are read as plain files, without any interpretation.\n\n");
1785 #endif
1786
1787 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1788 printf("Options:\n");
1789
1790 for (op = optionlist; op->one_char != 0; op++)
1791 {
1792 int n;
1793 char s[4];
1794
1795 /* Two options were accidentally implemented and documented with underscores
1796 instead of hyphens in their names, something that was not noticed for quite a
1797 few releases. When fixing this, I left the underscored versions in the list
1798 in case people were using them. However, we don't want to display them in the
1799 help data. There are no other options that contain underscores, and we do not
1800 expect ever to implement such options. Therefore, just omit any option that
1801 contains an underscore. */
1802
1803 if (strchr(op->long_name, '_') != NULL) continue;
1804
1805 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1806 n = 31 - printf(" %s --%s", s, op->long_name);
1807 if (n < 1) n = 1;
1808 printf("%.*s%s\n", n, " ", op->help_text);
1809 }
1810
1811 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1812 printf("trailing white space is removed and blank lines are ignored.\n");
1813 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1814
1815 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1816 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1817 }
1818
1819
1820
1821
1822 /*************************************************
1823 * Handle a single-letter, no data option *
1824 *************************************************/
1825
1826 static int
1827 handle_option(int letter, int options)
1828 {
1829 switch(letter)
1830 {
1831 case N_FOFFSETS: file_offsets = TRUE; break;
1832 case N_HELP: help(); pcregrep_exit(0);
1833 case N_LOFFSETS: line_offsets = number = TRUE; break;
1834 case N_LBUFFER: line_buffered = TRUE; break;
1835 case 'c': count_only = TRUE; break;
1836 case 'F': process_options |= PO_FIXED_STRINGS; break;
1837 case 'H': filenames = FN_FORCE; break;
1838 case 'h': filenames = FN_NONE; break;
1839 case 'i': options |= PCRE_CASELESS; break;
1840 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1841 case 'L': filenames = FN_NOMATCH_ONLY; break;
1842 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1843 case 'n': number = TRUE; break;
1844 case 'o': only_matching = 0; break;
1845 case 'q': quiet = TRUE; break;
1846 case 'r': dee_action = dee_RECURSE; break;
1847 case 's': silent = TRUE; break;
1848 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1849 case 'v': invert = TRUE; break;
1850 case 'w': process_options |= PO_WORD_MATCH; break;
1851 case 'x': process_options |= PO_LINE_MATCH; break;
1852
1853 case 'V':
1854 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1855 pcregrep_exit(0);
1856 break;
1857
1858 default:
1859 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1860 pcregrep_exit(usage(2));
1861 }
1862
1863 return options;
1864 }
1865
1866
1867
1868
1869 /*************************************************
1870 * Construct printed ordinal *
1871 *************************************************/
1872
1873 /* This turns a number into "1st", "3rd", etc. */
1874
1875 static char *
1876 ordin(int n)
1877 {
1878 static char buffer[8];
1879 char *p = buffer;
1880 sprintf(p, "%d", n);
1881 while (*p != 0) p++;
1882 switch (n%10)
1883 {
1884 case 1: strcpy(p, "st"); break;
1885 case 2: strcpy(p, "nd"); break;
1886 case 3: strcpy(p, "rd"); break;
1887 default: strcpy(p, "th"); break;
1888 }
1889 return buffer;
1890 }
1891
1892
1893
1894 /*************************************************
1895 * Compile a single pattern *
1896 *************************************************/
1897
1898 /* When the -F option has been used, this is called for each substring.
1899 Otherwise it's called for each supplied pattern.
1900
1901 Arguments:
1902 pattern the pattern string
1903 options the PCRE options
1904 filename the file name, or NULL for a command-line pattern
1905 count 0 if this is the only command line pattern, or
1906 number of the command line pattern, or
1907 linenumber for a pattern from a file
1908
1909 Returns: TRUE on success, FALSE after an error
1910 */
1911
1912 static BOOL
1913 compile_single_pattern(char *pattern, int options, char *filename, int count)
1914 {
1915 char buffer[MBUFTHIRD + 16];
1916 const char *error;
1917 int errptr;
1918
1919 if (pattern_count >= MAX_PATTERN_COUNT)
1920 {
1921 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1922 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1923 return FALSE;
1924 }
1925
1926 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1927 suffix[process_options]);
1928 pattern_list[pattern_count] =
1929 pcre_compile(buffer, options, &error, &errptr, pcretables);
1930 if (pattern_list[pattern_count] != NULL)
1931 {
1932 pattern_count++;
1933 return TRUE;
1934 }
1935
1936 /* Handle compile errors */
1937
1938 errptr -= (int)strlen(prefix[process_options]);
1939 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1940
1941 if (filename == NULL)
1942 {
1943 if (count == 0)
1944 fprintf(stderr, "pcregrep: Error in command-line regex "
1945 "at offset %d: %s\n", errptr, error);
1946 else
1947 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1948 "at offset %d: %s\n", ordin(count), errptr, error);
1949 }
1950 else
1951 {
1952 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1953 "at offset %d: %s\n", count, filename, errptr, error);
1954 }
1955
1956 return FALSE;
1957 }
1958
1959
1960
1961 /*************************************************
1962 * Compile one supplied pattern *
1963 *************************************************/
1964
1965 /* When the -F option has been used, each string may be a list of strings,
1966 separated by line breaks. They will be matched literally.
1967
1968 Arguments:
1969 pattern the pattern string
1970 options the PCRE options
1971 filename the file name, or NULL for a command-line pattern
1972 count 0 if this is the only command line pattern, or
1973 number of the command line pattern, or
1974 linenumber for a pattern from a file
1975
1976 Returns: TRUE on success, FALSE after an error
1977 */
1978
1979 static BOOL
1980 compile_pattern(char *pattern, int options, char *filename, int count)
1981 {
1982 if ((process_options & PO_FIXED_STRINGS) != 0)
1983 {
1984 char *eop = pattern + strlen(pattern);
1985 char buffer[MBUFTHIRD];
1986 for(;;)
1987 {
1988 int ellength;
1989 char *p = end_of_line(pattern, eop, &ellength);
1990 if (ellength == 0)
1991 return compile_single_pattern(pattern, options, filename, count);
1992 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1993 pattern = p;
1994 if (!compile_single_pattern(buffer, options, filename, count))
1995 return FALSE;
1996 }
1997 }
1998 else return compile_single_pattern(pattern, options, filename, count);
1999 }
2000
2001
2002
2003 /*************************************************
2004 * Main program *
2005 *************************************************/
2006
2007 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2008
2009 int
2010 main(int argc, char **argv)
2011 {
2012 int i, j;
2013 int rc = 1;
2014 int pcre_options = 0;
2015 int cmd_pattern_count = 0;
2016 int hint_count = 0;
2017 int errptr;
2018 BOOL only_one_at_top;
2019 char *patterns[MAX_PATTERN_COUNT];
2020 const char *locale_from = "--locale";
2021 const char *error;
2022
2023 /* Set the default line ending value from the default in the PCRE library;
2024 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2025 Note that the return values from pcre_config(), though derived from the ASCII
2026 codes, are the same in EBCDIC environments, so we must use the actual values
2027 rather than escapes such as as '\r'. */
2028
2029 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2030 switch(i)
2031 {
2032 default: newline = (char *)"lf"; break;
2033 case 13: newline = (char *)"cr"; break;
2034 case (13 << 8) | 10: newline = (char *)"crlf"; break;
2035 case -1: newline = (char *)"any"; break;
2036 case -2: newline = (char *)"anycrlf"; break;
2037 }
2038
2039 /* Process the options */
2040
2041 for (i = 1; i < argc; i++)
2042 {
2043 option_item *op = NULL;
2044 char *option_data = (char *)""; /* default to keep compiler happy */
2045 BOOL longop;
2046 BOOL longopwasequals = FALSE;
2047
2048 if (argv[i][0] != '-') break;
2049
2050 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2051 but only if we have previously had -e or -f to define the patterns. */
2052
2053 if (argv[i][1] == 0)
2054 {
2055 if (pattern_filename != NULL || pattern_count > 0) break;
2056 else pcregrep_exit(usage(2));
2057 }
2058
2059 /* Handle a long name option, or -- to terminate the options */
2060
2061 if (argv[i][1] == '-')
2062 {
2063 char *arg = argv[i] + 2;
2064 char *argequals = strchr(arg, '=');
2065
2066 if (*arg == 0) /* -- terminates options */
2067 {
2068 i++;
2069 break; /* out of the options-handling loop */
2070 }
2071
2072 longop = TRUE;
2073
2074 /* Some long options have data that follows after =, for example file=name.
2075 Some options have variations in the long name spelling: specifically, we
2076 allow "regexp" because GNU grep allows it, though I personally go along
2077 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2078 These options are entered in the table as "regex(p)". Options can be in
2079 both these categories. */
2080
2081 for (op = optionlist; op->one_char != 0; op++)
2082 {
2083 char *opbra = strchr(op->long_name, '(');
2084 char *equals = strchr(op->long_name, '=');
2085
2086 /* Handle options with only one spelling of the name */
2087
2088 if (opbra == NULL) /* Does not contain '(' */
2089 {
2090 if (equals == NULL) /* Not thing=data case */
2091 {
2092 if (strcmp(arg, op->long_name) == 0) break;
2093 }
2094 else /* Special case xxx=data */
2095 {
2096 int oplen = (int)(equals - op->long_name);
2097 int arglen = (argequals == NULL)?
2098 (int)strlen(arg) : (int)(argequals - arg);
2099 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2100 {
2101 option_data = arg + arglen;
2102 if (*option_data == '=')
2103 {
2104 option_data++;
2105 longopwasequals = TRUE;
2106 }
2107 break;
2108 }
2109 }
2110 }
2111
2112 /* Handle options with an alternate spelling of the name */
2113
2114 else
2115 {
2116 char buff1[24];
2117 char buff2[24];
2118
2119 int baselen = (int)(opbra - op->long_name);
2120 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2121 int arglen = (argequals == NULL || equals == NULL)?
2122 (int)strlen(arg) : (int)(argequals - arg);
2123
2124 sprintf(buff1, "%.*s", baselen, op->long_name);
2125 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2126
2127 if (strncmp(arg, buff1, arglen) == 0 ||
2128 strncmp(arg, buff2, arglen) == 0)
2129 {
2130 if (equals != NULL && argequals != NULL)
2131 {
2132 option_data = argequals;
2133 if (*option_data == '=')
2134 {
2135 option_data++;
2136 longopwasequals = TRUE;
2137 }
2138 }
2139 break;
2140 }
2141 }
2142 }
2143
2144 if (op->one_char == 0)
2145 {
2146 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2147 pcregrep_exit(usage(2));
2148 }
2149 }
2150
2151 /* Jeffrey Friedl's debugging harness uses these additional options which
2152 are not in the right form for putting in the option table because they use
2153 only one hyphen, yet are more than one character long. By putting them
2154 separately here, they will not get displayed as part of the help() output,
2155 but I don't think Jeffrey will care about that. */
2156
2157 #ifdef JFRIEDL_DEBUG
2158 else if (strcmp(argv[i], "-pre") == 0) {
2159 jfriedl_prefix = argv[++i];
2160 continue;
2161 } else if (strcmp(argv[i], "-post") == 0) {
2162 jfriedl_postfix = argv[++i];
2163 continue;
2164 } else if (strcmp(argv[i], "-XT") == 0) {
2165 sscanf(argv[++i], "%d", &jfriedl_XT);
2166 continue;
2167 } else if (strcmp(argv[i], "-XR") == 0) {
2168 sscanf(argv[++i], "%d", &jfriedl_XR);
2169 continue;
2170 }
2171 #endif
2172
2173
2174 /* One-char options; many that have no data may be in a single argument; we
2175 continue till we hit the last one or one that needs data. */
2176
2177 else
2178 {
2179 char *s = argv[i] + 1;
2180 longop = FALSE;
2181 while (*s != 0)
2182 {
2183 for (op = optionlist; op->one_char != 0; op++)
2184 {
2185 if (*s == op->one_char) break;
2186 }
2187 if (op->one_char == 0)
2188 {
2189 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2190 *s, argv[i]);
2191 pcregrep_exit(usage(2));
2192 }
2193
2194 /* Check for a single-character option that has data: OP_OP_NUMBER
2195 is used for one that either has a numerical number or defaults, i.e. the
2196 data is optional. If a digit follows, there is data; if not, carry on
2197 with other single-character options in the same string. */
2198
2199 option_data = s+1;
2200 if (op->type == OP_OP_NUMBER)
2201 {
2202 if (isdigit((unsigned char)s[1])) break;
2203 }
2204 else /* Check for end or a dataless option */
2205 {
2206 if (op->type != OP_NODATA || s[1] == 0) break;
2207 }
2208
2209 /* Handle a single-character option with no data, then loop for the
2210 next character in the string. */
2211
2212 pcre_options = handle_option(*s++, pcre_options);
2213 }
2214 }
2215
2216 /* At this point we should have op pointing to a matched option. If the type
2217 is NO_DATA, it means that there is no data, and the option might set
2218 something in the PCRE options. */
2219
2220 if (op->type == OP_NODATA)
2221 {
2222 pcre_options = handle_option(op->one_char, pcre_options);
2223 continue;
2224 }
2225
2226 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2227 either has a value or defaults to something. It cannot have data in a
2228 separate item. At the moment, the only such options are "colo(u)r",
2229 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2230
2231 if (*option_data == 0 &&
2232 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2233 {
2234 switch (op->one_char)
2235 {
2236 case N_COLOUR:
2237 colour_option = (char *)"auto";
2238 break;
2239
2240 case 'o':
2241 only_matching = 0;
2242 break;
2243
2244 #ifdef JFRIEDL_DEBUG
2245 case 'S':
2246 S_arg = 0;
2247 break;
2248 #endif
2249 }
2250 continue;
2251 }
2252
2253 /* Otherwise, find the data string for the option. */
2254
2255 if (*option_data == 0)
2256 {
2257 if (i >= argc - 1 || longopwasequals)
2258 {
2259 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2260 pcregrep_exit(usage(2));
2261 }
2262 option_data = argv[++i];
2263 }
2264
2265 /* If the option type is OP_PATLIST, it's the -e option, which can be called
2266 multiple times to create a list of patterns. */
2267
2268 if (op->type == OP_PATLIST)
2269 {
2270 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2271 {
2272 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2273 MAX_PATTERN_COUNT);
2274 return 2;
2275 }
2276 patterns[cmd_pattern_count++] = option_data;
2277 }
2278
2279 /* Otherwise, deal with single string or numeric data values. */
2280
2281 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2282 op->type != OP_OP_NUMBER)
2283 {
2284 *((char **)op->dataptr) = option_data;
2285 }
2286
2287 /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2288 only for unpicking arguments, so just keep it simple. */
2289
2290 else
2291 {
2292 unsigned long int n = 0;
2293 char *endptr = option_data;
2294 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2295 while (isdigit((unsigned char)(*endptr)))
2296 n = n * 10 + (int)(*endptr++ - '0');
2297 if (*endptr != 0)
2298 {
2299 if (longop)
2300 {
2301 char *equals = strchr(op->long_name, '=');
2302 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2303 (int)(equals - op->long_name);
2304 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2305 option_data, nlen, op->long_name);
2306 }
2307 else
2308 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2309 option_data, op->one_char);
2310 pcregrep_exit(usage(2));
2311 }
2312 if (op->type == OP_LONGNUMBER)
2313 *((unsigned long int *)op->dataptr) = n;
2314 else
2315 *((int *)op->dataptr) = n;
2316 }
2317 }
2318
2319 /* Options have been decoded. If -C was used, its value is used as a default
2320 for -A and -B. */
2321
2322 if (both_context > 0)
2323 {
2324 if (after_context == 0) after_context = both_context;
2325 if (before_context == 0) before_context = both_context;
2326 }
2327
2328 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2329 However, the latter two set only_matching. */
2330
2331 if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2332 (file_offsets && line_offsets))
2333 {
2334 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2335 "and/or --line-offsets\n");
2336 pcregrep_exit(usage(2));
2337 }
2338
2339 if (file_offsets || line_offsets) only_matching = 0;
2340
2341 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2342 LC_ALL environment variable is set, and if so, use it. */
2343
2344 if (locale == NULL)
2345 {
2346 locale = getenv("LC_ALL");
2347 locale_from = "LCC_ALL";
2348 }
2349
2350 if (locale == NULL)
2351 {
2352 locale = getenv("LC_CTYPE");
2353 locale_from = "LC_CTYPE";
2354 }
2355
2356 /* If a locale has been provided, set it, and generate the tables the PCRE
2357 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2358
2359 if (locale != NULL)
2360 {
2361 if (setlocale(LC_CTYPE, locale) == NULL)
2362 {
2363 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2364 locale, locale_from);
2365 return 2;
2366 }
2367 pcretables = pcre_maketables();
2368 }
2369
2370 /* Sort out colouring */
2371
2372 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2373 {
2374 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2375 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2376 else
2377 {
2378 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2379 colour_option);
2380 return 2;
2381 }
2382 if (do_colour)
2383 {
2384 char *cs = getenv("PCREGREP_COLOUR");
2385 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2386 if (cs != NULL) colour_string = cs;
2387 }
2388 }
2389
2390 /* Interpret the newline type; the default settings are Unix-like. */
2391
2392 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2393 {
2394 pcre_options |= PCRE_NEWLINE_CR;
2395 endlinetype = EL_CR;
2396 }
2397 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2398 {
2399 pcre_options |= PCRE_NEWLINE_LF;
2400 endlinetype = EL_LF;
2401 }
2402 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2403 {
2404 pcre_options |= PCRE_NEWLINE_CRLF;
2405 endlinetype = EL_CRLF;
2406 }
2407 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2408 {
2409 pcre_options |= PCRE_NEWLINE_ANY;
2410 endlinetype = EL_ANY;
2411 }
2412 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2413 {
2414 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2415 endlinetype = EL_ANYCRLF;
2416 }
2417 else
2418 {
2419 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2420 return 2;
2421 }
2422
2423 /* Interpret the text values for -d and -D */
2424
2425 if (dee_option != NULL)
2426 {
2427 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2428 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2429 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2430 else
2431 {
2432 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2433 return 2;
2434 }
2435 }
2436
2437 if (DEE_option != NULL)
2438 {
2439 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2440 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2441 else
2442 {
2443 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2444 return 2;
2445 }
2446 }
2447
2448 /* Check the values for Jeffrey Friedl's debugging options. */
2449
2450 #ifdef JFRIEDL_DEBUG
2451 if (S_arg > 9)
2452 {
2453 fprintf(stderr, "pcregrep: bad value for -S option\n");
2454 return 2;
2455 }
2456 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2457 {
2458 if (jfriedl_XT == 0) jfriedl_XT = 1;
2459 if (jfriedl_XR == 0) jfriedl_XR = 1;
2460 }
2461 #endif
2462
2463 /* Get memory to store the pattern and hints lists. */
2464
2465 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2466 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2467
2468 if (pattern_list == NULL || hints_list == NULL)
2469 {
2470 fprintf(stderr, "pcregrep: malloc failed\n");
2471 goto EXIT2;
2472 }
2473
2474 /* If no patterns were provided by -e, and there is no file provided by -f,
2475 the first argument is the one and only pattern, and it must exist. */
2476
2477 if (cmd_pattern_count == 0 && pattern_filename == NULL)
2478 {
2479 if (i >= argc) return usage(2);
2480 patterns[cmd_pattern_count++] = argv[i++];
2481 }
2482
2483 /* Compile the patterns that were provided on the command line, either by
2484 multiple uses of -e or as a single unkeyed pattern. */
2485
2486 for (j = 0; j < cmd_pattern_count; j++)
2487 {
2488 if (!compile_pattern(patterns[j], pcre_options, NULL,
2489 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2490 goto EXIT2;
2491 }
2492
2493 /* Compile the regular expressions that are provided in a file. */
2494
2495 if (pattern_filename != NULL)
2496 {
2497 int linenumber = 0;
2498 FILE *f;
2499 char *filename;
2500 char buffer[MBUFTHIRD];
2501
2502 if (strcmp(pattern_filename, "-") == 0)
2503 {
2504 f = stdin;
2505 filename = stdin_name;
2506 }
2507 else
2508 {
2509 f = fopen(pattern_filename, "r");
2510 if (f == NULL)
2511 {
2512 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2513 strerror(errno));
2514 goto EXIT2;
2515 }
2516 filename = pattern_filename;
2517 }
2518
2519 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2520 {
2521 char *s = buffer + (int)strlen(buffer);
2522 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2523 *s = 0;
2524 linenumber++;
2525 if (buffer[0] == 0) continue; /* Skip blank lines */
2526 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2527 goto EXIT2;
2528 }
2529
2530 if (f != stdin) fclose(f);
2531 }
2532
2533 /* Study the regular expressions, as we will be running them many times */
2534
2535 for (j = 0; j < pattern_count; j++)
2536 {
2537 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2538 if (error != NULL)
2539 {
2540 char s[16];
2541 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2542 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2543 goto EXIT2;
2544 }
2545 hint_count++;
2546 }
2547
2548 /* If --match-limit or --recursion-limit was set, put the value(s) into the
2549 pcre_extra block for each pattern. */
2550
2551 if (match_limit > 0 || match_limit_recursion > 0)
2552 {
2553 for (j = 0; j < pattern_count; j++)
2554 {
2555 if (hints_list[j] == NULL)
2556 {
2557 hints_list[j] = malloc(sizeof(pcre_extra));
2558 if (hints_list[j] == NULL)
2559 {
2560 fprintf(stderr, "pcregrep: malloc failed\n");
2561 pcregrep_exit(2);
2562 }
2563 }
2564 if (match_limit > 0)
2565 {
2566 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2567 hints_list[j]->match_limit = match_limit;
2568 }
2569 if (match_limit_recursion > 0)
2570 {
2571 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2572 hints_list[j]->match_limit_recursion = match_limit_recursion;
2573 }
2574 }
2575 }
2576
2577 /* If there are include or exclude patterns, compile them. */
2578
2579 if (exclude_pattern != NULL)
2580 {
2581 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2582 pcretables);
2583 if (exclude_compiled == NULL)
2584 {
2585 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2586 errptr, error);
2587 goto EXIT2;
2588 }
2589 }
2590
2591 if (include_pattern != NULL)
2592 {
2593 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2594 pcretables);
2595 if (include_compiled == NULL)
2596 {
2597 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2598 errptr, error);
2599 goto EXIT2;
2600 }
2601 }
2602
2603 if (exclude_dir_pattern != NULL)
2604 {
2605 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2606 pcretables);
2607 if (exclude_dir_compiled == NULL)
2608 {
2609 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2610 errptr, error);
2611 goto EXIT2;
2612 }
2613 }
2614
2615 if (include_dir_pattern != NULL)
2616 {
2617 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2618 pcretables);
2619 if (include_dir_compiled == NULL)
2620 {
2621 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2622 errptr, error);
2623 goto EXIT2;
2624 }
2625 }
2626
2627 /* If there are no further arguments, do the business on stdin and exit. */
2628
2629 if (i >= argc)
2630 {
2631 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2632 goto EXIT;
2633 }
2634
2635 /* Otherwise, work through the remaining arguments as files or directories.
2636 Pass in the fact that there is only one argument at top level - this suppresses
2637 the file name if the argument is not a directory and filenames are not
2638 otherwise forced. */
2639
2640 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2641
2642 for (; i < argc; i++)
2643 {
2644 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2645 only_one_at_top);
2646 if (frc > 1) rc = frc;
2647 else if (frc == 0 && rc == 1) rc = 0;
2648 }
2649
2650 EXIT:
2651 if (pattern_list != NULL)
2652 {
2653 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2654 free(pattern_list);
2655 }
2656 if (hints_list != NULL)
2657 {
2658 for (i = 0; i < hint_count; i++)
2659 {
2660 if (hints_list[i] != NULL) free(hints_list[i]);
2661 }
2662 free(hints_list);
2663 }
2664 pcregrep_exit(rc);
2665
2666 EXIT2:
2667 rc = 2;
2668 goto EXIT;
2669 }
2670
2671 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5