/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 571 - (show annotations)
Tue Nov 16 17:51:37 2010 UTC (4 years, 9 months ago) by ph10
File MIME type: text/plain
File size: 76791 byte(s)
Error occurred while calculating annotation data.
Rename --{in,ex}clude_dir with hyphen instead of underscore in pcregrep, but 
leave the old versions as undocumented synonyms. (In GNU grep, hyphens are 
used.)
1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2010 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65
66 #include "pcre.h"
67
68 #define FALSE 0
69 #define TRUE 1
70
71 typedef int BOOL;
72
73 #define MAX_PATTERN_COUNT 100
74 #define OFFSET_SIZE 99
75
76 #if BUFSIZ > 8192
77 #define MBUFTHIRD BUFSIZ
78 #else
79 #define MBUFTHIRD 8192
80 #endif
81
82 /* Values for the "filenames" variable, which specifies options for file name
83 output. The order is important; it is assumed that a file name is wanted for
84 all values greater than FN_DEFAULT. */
85
86 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87
88 /* File reading styles */
89
90 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91
92 /* Actions for the -d and -D options */
93
94 enum { dee_READ, dee_SKIP, dee_RECURSE };
95 enum { DEE_READ, DEE_SKIP };
96
97 /* Actions for special processing options (flag bits) */
98
99 #define PO_WORD_MATCH 0x0001
100 #define PO_LINE_MATCH 0x0002
101 #define PO_FIXED_STRINGS 0x0004
102
103 /* Line ending types */
104
105 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106
107 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108 environments), a warning is issued if the value of fwrite() is ignored.
109 Unfortunately, casting to (void) does not suppress the warning. To get round
110 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 apply to fprintf(). */
112
113 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114
115
116
117 /*************************************************
118 * Global variables *
119 *************************************************/
120
121 /* Jeffrey Friedl has some debugging requirements that are not part of the
122 regular code. */
123
124 #ifdef JFRIEDL_DEBUG
125 static int S_arg = -1;
126 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128 static const char *jfriedl_prefix = "";
129 static const char *jfriedl_postfix = "";
130 #endif
131
132 static int endlinetype;
133
134 static char *colour_string = (char *)"1;31";
135 static char *colour_option = NULL;
136 static char *dee_option = NULL;
137 static char *DEE_option = NULL;
138 static char *newline = NULL;
139 static char *pattern_filename = NULL;
140 static char *stdin_name = (char *)"(standard input)";
141 static char *locale = NULL;
142
143 static const unsigned char *pcretables = NULL;
144
145 static int pattern_count = 0;
146 static pcre **pattern_list = NULL;
147 static pcre_extra **hints_list = NULL;
148
149 static char *include_pattern = NULL;
150 static char *exclude_pattern = NULL;
151 static char *include_dir_pattern = NULL;
152 static char *exclude_dir_pattern = NULL;
153
154 static pcre *include_compiled = NULL;
155 static pcre *exclude_compiled = NULL;
156 static pcre *include_dir_compiled = NULL;
157 static pcre *exclude_dir_compiled = NULL;
158
159 static int after_context = 0;
160 static int before_context = 0;
161 static int both_context = 0;
162 static int dee_action = dee_READ;
163 static int DEE_action = DEE_READ;
164 static int error_count = 0;
165 static int filenames = FN_DEFAULT;
166 static int only_matching = -1;
167 static int process_options = 0;
168
169 static unsigned long int match_limit = 0;
170 static unsigned long int match_limit_recursion = 0;
171
172 static BOOL count_only = FALSE;
173 static BOOL do_colour = FALSE;
174 static BOOL file_offsets = FALSE;
175 static BOOL hyphenpending = FALSE;
176 static BOOL invert = FALSE;
177 static BOOL line_buffered = FALSE;
178 static BOOL line_offsets = FALSE;
179 static BOOL multiline = FALSE;
180 static BOOL number = FALSE;
181 static BOOL omit_zero_count = FALSE;
182 static BOOL resource_error = FALSE;
183 static BOOL quiet = FALSE;
184 static BOOL silent = FALSE;
185 static BOOL utf8 = FALSE;
186
187 /* Structure for options and list of them */
188
189 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
190 OP_PATLIST };
191
192 typedef struct option_item {
193 int type;
194 int one_char;
195 void *dataptr;
196 const char *long_name;
197 const char *help_text;
198 } option_item;
199
200 /* Options without a single-letter equivalent get a negative value. This can be
201 used to identify them. */
202
203 #define N_COLOUR (-1)
204 #define N_EXCLUDE (-2)
205 #define N_EXCLUDE_DIR (-3)
206 #define N_HELP (-4)
207 #define N_INCLUDE (-5)
208 #define N_INCLUDE_DIR (-6)
209 #define N_LABEL (-7)
210 #define N_LOCALE (-8)
211 #define N_NULL (-9)
212 #define N_LOFFSETS (-10)
213 #define N_FOFFSETS (-11)
214 #define N_LBUFFER (-12)
215 #define N_M_LIMIT (-13)
216 #define N_M_LIMIT_REC (-14)
217
218 static option_item optionlist[] = {
219 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
220 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
221 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
222 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
223 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
224 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
225 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
226 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
227 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
228 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
229 { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
230 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
231 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
232 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
233 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
234 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
235 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
236 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
237 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
238 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
239 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
240 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
241 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
242 { OP_NUMBER, N_M_LIMIT,&match_limit, "match-limit=number", "set PCRE match limit option" },
243 { OP_NUMBER, N_M_LIMIT_REC,&match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
244 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
245 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
246 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
247 { OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" },
248 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
249 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
250 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
251 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
252 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
253 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
254
255 /* These two were accidentally implemented with underscores instead of
256 hyphens in the option names. As this was not discovered for several releases,
257 the incorrect versions are left in the table for compatibility. However, the
258 --help function misses out any option that has an underscore in its name. */
259
260 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
261 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
262
263 #ifdef JFRIEDL_DEBUG
264 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
265 #endif
266 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
267 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
268 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
269 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
270 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
271 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
272 { OP_NODATA, 0, NULL, NULL, NULL }
273 };
274
275 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
276 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
277 that the combination of -w and -x has the same effect as -x on its own, so we
278 can treat them as the same. */
279
280 static const char *prefix[] = {
281 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
282
283 static const char *suffix[] = {
284 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
285
286 /* UTF-8 tables - used only when the newline setting is "any". */
287
288 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
289
290 const char utf8_table4[] = {
291 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
292 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
293 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
294 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
295
296
297
298 /*************************************************
299 * OS-specific functions *
300 *************************************************/
301
302 /* These functions are defined so that they can be made system specific,
303 although at present the only ones are for Unix, Win32, and for "no support". */
304
305
306 /************* Directory scanning in Unix ***********/
307
308 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
309 #include <sys/types.h>
310 #include <sys/stat.h>
311 #include <dirent.h>
312
313 typedef DIR directory_type;
314
315 static int
316 isdirectory(char *filename)
317 {
318 struct stat statbuf;
319 if (stat(filename, &statbuf) < 0)
320 return 0; /* In the expectation that opening as a file will fail */
321 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
322 }
323
324 static directory_type *
325 opendirectory(char *filename)
326 {
327 return opendir(filename);
328 }
329
330 static char *
331 readdirectory(directory_type *dir)
332 {
333 for (;;)
334 {
335 struct dirent *dent = readdir(dir);
336 if (dent == NULL) return NULL;
337 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
338 return dent->d_name;
339 }
340 /* Control never reaches here */
341 }
342
343 static void
344 closedirectory(directory_type *dir)
345 {
346 closedir(dir);
347 }
348
349
350 /************* Test for regular file in Unix **********/
351
352 static int
353 isregfile(char *filename)
354 {
355 struct stat statbuf;
356 if (stat(filename, &statbuf) < 0)
357 return 1; /* In the expectation that opening as a file will fail */
358 return (statbuf.st_mode & S_IFMT) == S_IFREG;
359 }
360
361
362 /************* Test for a terminal in Unix **********/
363
364 static BOOL
365 is_stdout_tty(void)
366 {
367 return isatty(fileno(stdout));
368 }
369
370 static BOOL
371 is_file_tty(FILE *f)
372 {
373 return isatty(fileno(f));
374 }
375
376
377 /************* Directory scanning in Win32 ***********/
378
379 /* I (Philip Hazel) have no means of testing this code. It was contributed by
380 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
381 when it did not exist. David Byron added a patch that moved the #include of
382 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
383 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
384 undefined when it is indeed undefined. */
385
386 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
387
388 #ifndef STRICT
389 # define STRICT
390 #endif
391 #ifndef WIN32_LEAN_AND_MEAN
392 # define WIN32_LEAN_AND_MEAN
393 #endif
394
395 #include <windows.h>
396
397 #ifndef INVALID_FILE_ATTRIBUTES
398 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
399 #endif
400
401 typedef struct directory_type
402 {
403 HANDLE handle;
404 BOOL first;
405 WIN32_FIND_DATA data;
406 } directory_type;
407
408 int
409 isdirectory(char *filename)
410 {
411 DWORD attr = GetFileAttributes(filename);
412 if (attr == INVALID_FILE_ATTRIBUTES)
413 return 0;
414 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
415 }
416
417 directory_type *
418 opendirectory(char *filename)
419 {
420 size_t len;
421 char *pattern;
422 directory_type *dir;
423 DWORD err;
424 len = strlen(filename);
425 pattern = (char *) malloc(len + 3);
426 dir = (directory_type *) malloc(sizeof(*dir));
427 if ((pattern == NULL) || (dir == NULL))
428 {
429 fprintf(stderr, "pcregrep: malloc failed\n");
430 pcregrep_exit(2);
431 }
432 memcpy(pattern, filename, len);
433 memcpy(&(pattern[len]), "\\*", 3);
434 dir->handle = FindFirstFile(pattern, &(dir->data));
435 if (dir->handle != INVALID_HANDLE_VALUE)
436 {
437 free(pattern);
438 dir->first = TRUE;
439 return dir;
440 }
441 err = GetLastError();
442 free(pattern);
443 free(dir);
444 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
445 return NULL;
446 }
447
448 char *
449 readdirectory(directory_type *dir)
450 {
451 for (;;)
452 {
453 if (!dir->first)
454 {
455 if (!FindNextFile(dir->handle, &(dir->data)))
456 return NULL;
457 }
458 else
459 {
460 dir->first = FALSE;
461 }
462 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
463 return dir->data.cFileName;
464 }
465 #ifndef _MSC_VER
466 return NULL; /* Keep compiler happy; never executed */
467 #endif
468 }
469
470 void
471 closedirectory(directory_type *dir)
472 {
473 FindClose(dir->handle);
474 free(dir);
475 }
476
477
478 /************* Test for regular file in Win32 **********/
479
480 /* I don't know how to do this, or if it can be done; assume all paths are
481 regular if they are not directories. */
482
483 int isregfile(char *filename)
484 {
485 return !isdirectory(filename);
486 }
487
488
489 /************* Test for a terminal in Win32 **********/
490
491 /* I don't know how to do this; assume never */
492
493 static BOOL
494 is_stdout_tty(void)
495 {
496 return FALSE;
497 }
498
499 static BOOL
500 is_file_tty(FILE *f)
501 {
502 return FALSE;
503 }
504
505
506 /************* Directory scanning when we can't do it ***********/
507
508 /* The type is void, and apart from isdirectory(), the functions do nothing. */
509
510 #else
511
512 typedef void directory_type;
513
514 int isdirectory(char *filename) { return 0; }
515 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
516 char *readdirectory(directory_type *dir) { return (char*)0;}
517 void closedirectory(directory_type *dir) {}
518
519
520 /************* Test for regular when we can't do it **********/
521
522 /* Assume all files are regular. */
523
524 int isregfile(char *filename) { return 1; }
525
526
527 /************* Test for a terminal when we can't do it **********/
528
529 static BOOL
530 is_stdout_tty(void)
531 {
532 return FALSE;
533 }
534
535 static BOOL
536 is_file_tty(FILE *f)
537 {
538 return FALSE;
539 }
540
541 #endif
542
543
544
545 #ifndef HAVE_STRERROR
546 /*************************************************
547 * Provide strerror() for non-ANSI libraries *
548 *************************************************/
549
550 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
551 in their libraries, but can provide the same facility by this simple
552 alternative function. */
553
554 extern int sys_nerr;
555 extern char *sys_errlist[];
556
557 char *
558 strerror(int n)
559 {
560 if (n < 0 || n >= sys_nerr) return "unknown error number";
561 return sys_errlist[n];
562 }
563 #endif /* HAVE_STRERROR */
564
565
566
567 /*************************************************
568 * Exit from the program *
569 *************************************************/
570
571 /* If there has been a resource error, give a suitable message.
572
573 Argument: the return code
574 Returns: does not return
575 */
576
577 static void
578 pcregrep_exit(int rc)
579 {
580 if (resource_error)
581 {
582 fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
583 "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
584 fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
585 }
586
587 exit(rc);
588 }
589
590
591
592 /*************************************************
593 * Read one line of input *
594 *************************************************/
595
596 /* Normally, input is read using fread() into a large buffer, so many lines may
597 be read at once. However, doing this for tty input means that no output appears
598 until a lot of input has been typed. Instead, tty input is handled line by
599 line. We cannot use fgets() for this, because it does not stop at a binary
600 zero, and therefore there is no way of telling how many characters it has read,
601 because there may be binary zeros embedded in the data.
602
603 Arguments:
604 buffer the buffer to read into
605 length the maximum number of characters to read
606 f the file
607
608 Returns: the number of characters read, zero at end of file
609 */
610
611 static int
612 read_one_line(char *buffer, int length, FILE *f)
613 {
614 int c;
615 int yield = 0;
616 while ((c = fgetc(f)) != EOF)
617 {
618 buffer[yield++] = c;
619 if (c == '\n' || yield >= length) break;
620 }
621 return yield;
622 }
623
624
625
626 /*************************************************
627 * Find end of line *
628 *************************************************/
629
630 /* The length of the endline sequence that is found is set via lenptr. This may
631 be zero at the very end of the file if there is no line-ending sequence there.
632
633 Arguments:
634 p current position in line
635 endptr end of available data
636 lenptr where to put the length of the eol sequence
637
638 Returns: pointer to the last byte of the line
639 */
640
641 static char *
642 end_of_line(char *p, char *endptr, int *lenptr)
643 {
644 switch(endlinetype)
645 {
646 default: /* Just in case */
647 case EL_LF:
648 while (p < endptr && *p != '\n') p++;
649 if (p < endptr)
650 {
651 *lenptr = 1;
652 return p + 1;
653 }
654 *lenptr = 0;
655 return endptr;
656
657 case EL_CR:
658 while (p < endptr && *p != '\r') p++;
659 if (p < endptr)
660 {
661 *lenptr = 1;
662 return p + 1;
663 }
664 *lenptr = 0;
665 return endptr;
666
667 case EL_CRLF:
668 for (;;)
669 {
670 while (p < endptr && *p != '\r') p++;
671 if (++p >= endptr)
672 {
673 *lenptr = 0;
674 return endptr;
675 }
676 if (*p == '\n')
677 {
678 *lenptr = 2;
679 return p + 1;
680 }
681 }
682 break;
683
684 case EL_ANYCRLF:
685 while (p < endptr)
686 {
687 int extra = 0;
688 register int c = *((unsigned char *)p);
689
690 if (utf8 && c >= 0xc0)
691 {
692 int gcii, gcss;
693 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
694 gcss = 6*extra;
695 c = (c & utf8_table3[extra]) << gcss;
696 for (gcii = 1; gcii <= extra; gcii++)
697 {
698 gcss -= 6;
699 c |= (p[gcii] & 0x3f) << gcss;
700 }
701 }
702
703 p += 1 + extra;
704
705 switch (c)
706 {
707 case 0x0a: /* LF */
708 *lenptr = 1;
709 return p;
710
711 case 0x0d: /* CR */
712 if (p < endptr && *p == 0x0a)
713 {
714 *lenptr = 2;
715 p++;
716 }
717 else *lenptr = 1;
718 return p;
719
720 default:
721 break;
722 }
723 } /* End of loop for ANYCRLF case */
724
725 *lenptr = 0; /* Must have hit the end */
726 return endptr;
727
728 case EL_ANY:
729 while (p < endptr)
730 {
731 int extra = 0;
732 register int c = *((unsigned char *)p);
733
734 if (utf8 && c >= 0xc0)
735 {
736 int gcii, gcss;
737 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
738 gcss = 6*extra;
739 c = (c & utf8_table3[extra]) << gcss;
740 for (gcii = 1; gcii <= extra; gcii++)
741 {
742 gcss -= 6;
743 c |= (p[gcii] & 0x3f) << gcss;
744 }
745 }
746
747 p += 1 + extra;
748
749 switch (c)
750 {
751 case 0x0a: /* LF */
752 case 0x0b: /* VT */
753 case 0x0c: /* FF */
754 *lenptr = 1;
755 return p;
756
757 case 0x0d: /* CR */
758 if (p < endptr && *p == 0x0a)
759 {
760 *lenptr = 2;
761 p++;
762 }
763 else *lenptr = 1;
764 return p;
765
766 case 0x85: /* NEL */
767 *lenptr = utf8? 2 : 1;
768 return p;
769
770 case 0x2028: /* LS */
771 case 0x2029: /* PS */
772 *lenptr = 3;
773 return p;
774
775 default:
776 break;
777 }
778 } /* End of loop for ANY case */
779
780 *lenptr = 0; /* Must have hit the end */
781 return endptr;
782 } /* End of overall switch */
783 }
784
785
786
787 /*************************************************
788 * Find start of previous line *
789 *************************************************/
790
791 /* This is called when looking back for before lines to print.
792
793 Arguments:
794 p start of the subsequent line
795 startptr start of available data
796
797 Returns: pointer to the start of the previous line
798 */
799
800 static char *
801 previous_line(char *p, char *startptr)
802 {
803 switch(endlinetype)
804 {
805 default: /* Just in case */
806 case EL_LF:
807 p--;
808 while (p > startptr && p[-1] != '\n') p--;
809 return p;
810
811 case EL_CR:
812 p--;
813 while (p > startptr && p[-1] != '\n') p--;
814 return p;
815
816 case EL_CRLF:
817 for (;;)
818 {
819 p -= 2;
820 while (p > startptr && p[-1] != '\n') p--;
821 if (p <= startptr + 1 || p[-2] == '\r') return p;
822 }
823 return p; /* But control should never get here */
824
825 case EL_ANY:
826 case EL_ANYCRLF:
827 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
828 if (utf8) while ((*p & 0xc0) == 0x80) p--;
829
830 while (p > startptr)
831 {
832 register int c;
833 char *pp = p - 1;
834
835 if (utf8)
836 {
837 int extra = 0;
838 while ((*pp & 0xc0) == 0x80) pp--;
839 c = *((unsigned char *)pp);
840 if (c >= 0xc0)
841 {
842 int gcii, gcss;
843 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
844 gcss = 6*extra;
845 c = (c & utf8_table3[extra]) << gcss;
846 for (gcii = 1; gcii <= extra; gcii++)
847 {
848 gcss -= 6;
849 c |= (pp[gcii] & 0x3f) << gcss;
850 }
851 }
852 }
853 else c = *((unsigned char *)pp);
854
855 if (endlinetype == EL_ANYCRLF) switch (c)
856 {
857 case 0x0a: /* LF */
858 case 0x0d: /* CR */
859 return p;
860
861 default:
862 break;
863 }
864
865 else switch (c)
866 {
867 case 0x0a: /* LF */
868 case 0x0b: /* VT */
869 case 0x0c: /* FF */
870 case 0x0d: /* CR */
871 case 0x85: /* NEL */
872 case 0x2028: /* LS */
873 case 0x2029: /* PS */
874 return p;
875
876 default:
877 break;
878 }
879
880 p = pp; /* Back one character */
881 } /* End of loop for ANY case */
882
883 return startptr; /* Hit start of data */
884 } /* End of overall switch */
885 }
886
887
888
889
890
891 /*************************************************
892 * Print the previous "after" lines *
893 *************************************************/
894
895 /* This is called if we are about to lose said lines because of buffer filling,
896 and at the end of the file. The data in the line is written using fwrite() so
897 that a binary zero does not terminate it.
898
899 Arguments:
900 lastmatchnumber the number of the last matching line, plus one
901 lastmatchrestart where we restarted after the last match
902 endptr end of available data
903 printname filename for printing
904
905 Returns: nothing
906 */
907
908 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
909 char *endptr, char *printname)
910 {
911 if (after_context > 0 && lastmatchnumber > 0)
912 {
913 int count = 0;
914 while (lastmatchrestart < endptr && count++ < after_context)
915 {
916 int ellength;
917 char *pp = lastmatchrestart;
918 if (printname != NULL) fprintf(stdout, "%s-", printname);
919 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
920 pp = end_of_line(pp, endptr, &ellength);
921 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
922 lastmatchrestart = pp;
923 }
924 hyphenpending = TRUE;
925 }
926 }
927
928
929
930 /*************************************************
931 * Apply patterns to subject till one matches *
932 *************************************************/
933
934 /* This function is called to run through all patterns, looking for a match. It
935 is used multiple times for the same subject when colouring is enabled, in order
936 to find all possible matches.
937
938 Arguments:
939 matchptr the start of the subject
940 length the length of the subject to match
941 offsets the offets vector to fill in
942 mrc address of where to put the result of pcre_exec()
943
944 Returns: TRUE if there was a match
945 FALSE if there was no match
946 invert if there was a non-fatal error
947 */
948
949 static BOOL
950 match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
951 {
952 int i;
953 size_t slen = length;
954 const char *msg = "this text:\n\n";
955 if (slen > 200)
956 {
957 slen = 200;
958 msg = "text that starts:\n\n";
959 }
960 for (i = 0; i < pattern_count; i++)
961 {
962 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
963 PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
964 if (*mrc >= 0) return TRUE;
965 if (*mrc == PCRE_ERROR_NOMATCH) continue;
966 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
967 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
968 fprintf(stderr, "%s", msg);
969 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
970 fprintf(stderr, "\n\n");
971 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
972 resource_error = TRUE;
973 if (error_count++ > 20)
974 {
975 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
976 pcregrep_exit(2);
977 }
978 return invert; /* No more matching; don't show the line again */
979 }
980
981 return FALSE; /* No match, no errors */
982 }
983
984
985
986 /*************************************************
987 * Grep an individual file *
988 *************************************************/
989
990 /* This is called from grep_or_recurse() below. It uses a buffer that is three
991 times the value of MBUFTHIRD. The matching point is never allowed to stray into
992 the top third of the buffer, thus keeping more of the file available for
993 context printing or for multiline scanning. For large files, the pointer will
994 be in the middle third most of the time, so the bottom third is available for
995 "before" context printing.
996
997 Arguments:
998 handle the fopened FILE stream for a normal file
999 the gzFile pointer when reading is via libz
1000 the BZFILE pointer when reading is via libbz2
1001 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1002 printname the file name if it is to be printed for each match
1003 or NULL if the file name is not to be printed
1004 it cannot be NULL if filenames[_nomatch]_only is set
1005
1006 Returns: 0 if there was at least one match
1007 1 otherwise (no matches)
1008 2 if there is a read error on a .bz2 file
1009 */
1010
1011 static int
1012 pcregrep(void *handle, int frtype, char *printname)
1013 {
1014 int rc = 1;
1015 int linenumber = 1;
1016 int lastmatchnumber = 0;
1017 int count = 0;
1018 int filepos = 0;
1019 int offsets[OFFSET_SIZE];
1020 char *lastmatchrestart = NULL;
1021 char buffer[3*MBUFTHIRD];
1022 char *ptr = buffer;
1023 char *endptr;
1024 size_t bufflength;
1025 BOOL endhyphenpending = FALSE;
1026 BOOL input_line_buffered = line_buffered;
1027 FILE *in = NULL; /* Ensure initialized */
1028
1029 #ifdef SUPPORT_LIBZ
1030 gzFile ingz = NULL;
1031 #endif
1032
1033 #ifdef SUPPORT_LIBBZ2
1034 BZFILE *inbz2 = NULL;
1035 #endif
1036
1037
1038 /* Do the first read into the start of the buffer and set up the pointer to end
1039 of what we have. In the case of libz, a non-zipped .gz file will be read as a
1040 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1041 fail. */
1042
1043 #ifdef SUPPORT_LIBZ
1044 if (frtype == FR_LIBZ)
1045 {
1046 ingz = (gzFile)handle;
1047 bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1048 }
1049 else
1050 #endif
1051
1052 #ifdef SUPPORT_LIBBZ2
1053 if (frtype == FR_LIBBZ2)
1054 {
1055 inbz2 = (BZFILE *)handle;
1056 bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1057 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1058 } /* without the cast it is unsigned. */
1059 else
1060 #endif
1061
1062 {
1063 in = (FILE *)handle;
1064 if (is_file_tty(in)) input_line_buffered = TRUE;
1065 bufflength = input_line_buffered?
1066 read_one_line(buffer, 3*MBUFTHIRD, in) :
1067 fread(buffer, 1, 3*MBUFTHIRD, in);
1068 }
1069
1070 endptr = buffer + bufflength;
1071
1072 /* Loop while the current pointer is not at the end of the file. For large
1073 files, endptr will be at the end of the buffer when we are in the middle of the
1074 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1075 way, the buffer is shifted left and re-filled. */
1076
1077 while (ptr < endptr)
1078 {
1079 int endlinelength;
1080 int mrc = 0;
1081 BOOL match;
1082 char *matchptr = ptr;
1083 char *t = ptr;
1084 size_t length, linelength;
1085
1086 /* At this point, ptr is at the start of a line. We need to find the length
1087 of the subject string to pass to pcre_exec(). In multiline mode, it is the
1088 length remainder of the data in the buffer. Otherwise, it is the length of
1089 the next line, excluding the terminating newline. After matching, we always
1090 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1091 option is used for compiling, so that any match is constrained to be in the
1092 first line. */
1093
1094 t = end_of_line(t, endptr, &endlinelength);
1095 linelength = t - ptr - endlinelength;
1096 length = multiline? (size_t)(endptr - ptr) : linelength;
1097
1098 /* Extra processing for Jeffrey Friedl's debugging. */
1099
1100 #ifdef JFRIEDL_DEBUG
1101 if (jfriedl_XT || jfriedl_XR)
1102 {
1103 #include <sys/time.h>
1104 #include <time.h>
1105 struct timeval start_time, end_time;
1106 struct timezone dummy;
1107 int i;
1108
1109 if (jfriedl_XT)
1110 {
1111 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1112 const char *orig = ptr;
1113 ptr = malloc(newlen + 1);
1114 if (!ptr) {
1115 printf("out of memory");
1116 pcregrep_exit(2);
1117 }
1118 endptr = ptr;
1119 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1120 for (i = 0; i < jfriedl_XT; i++) {
1121 strncpy(endptr, orig, length);
1122 endptr += length;
1123 }
1124 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1125 length = newlen;
1126 }
1127
1128 if (gettimeofday(&start_time, &dummy) != 0)
1129 perror("bad gettimeofday");
1130
1131
1132 for (i = 0; i < jfriedl_XR; i++)
1133 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1134 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1135
1136 if (gettimeofday(&end_time, &dummy) != 0)
1137 perror("bad gettimeofday");
1138
1139 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1140 -
1141 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1142
1143 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1144 return 0;
1145 }
1146 #endif
1147
1148 /* We come back here after a match when the -o option (only_matching) is set,
1149 in order to find any further matches in the same line. */
1150
1151 ONLY_MATCHING_RESTART:
1152
1153 /* Run through all the patterns until one matches or there is an error other
1154 than NOMATCH. This code is in a subroutine so that it can be re-used for
1155 finding subsequent matches when colouring matched lines. */
1156
1157 match = match_patterns(matchptr, length, offsets, &mrc);
1158
1159 /* If it's a match or a not-match (as required), do what's wanted. */
1160
1161 if (match != invert)
1162 {
1163 BOOL hyphenprinted = FALSE;
1164
1165 /* We've failed if we want a file that doesn't have any matches. */
1166
1167 if (filenames == FN_NOMATCH_ONLY) return 1;
1168
1169 /* Just count if just counting is wanted. */
1170
1171 if (count_only) count++;
1172
1173 /* If all we want is a file name, there is no need to scan any more lines
1174 in the file. */
1175
1176 else if (filenames == FN_MATCH_ONLY)
1177 {
1178 fprintf(stdout, "%s\n", printname);
1179 return 0;
1180 }
1181
1182 /* Likewise, if all we want is a yes/no answer. */
1183
1184 else if (quiet) return 0;
1185
1186 /* The --only-matching option prints just the substring that matched, or a
1187 captured portion of it, as long as this string is not empty, and the
1188 --file-offsets and --line-offsets options output offsets for the matching
1189 substring (they both force --only-matching = 0). None of these options
1190 prints any context. Afterwards, adjust the start and length, and then jump
1191 back to look for further matches in the same line. If we are in invert
1192 mode, however, nothing is printed and we do not restart - this could still
1193 be useful because the return code is set. */
1194
1195 else if (only_matching >= 0)
1196 {
1197 if (!invert)
1198 {
1199 if (printname != NULL) fprintf(stdout, "%s:", printname);
1200 if (number) fprintf(stdout, "%d:", linenumber);
1201 if (line_offsets)
1202 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1203 offsets[1] - offsets[0]);
1204 else if (file_offsets)
1205 fprintf(stdout, "%d,%d\n",
1206 (int)(filepos + matchptr + offsets[0] - ptr),
1207 offsets[1] - offsets[0]);
1208 else if (only_matching < mrc)
1209 {
1210 int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1211 if (plen > 0)
1212 {
1213 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1214 FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1215 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1216 fprintf(stdout, "\n");
1217 }
1218 }
1219 else if (printname != NULL || number) fprintf(stdout, "\n");
1220 matchptr += offsets[1];
1221 length -= offsets[1];
1222 match = FALSE;
1223 if (line_buffered) fflush(stdout);
1224 rc = 0; /* Had some success */
1225 goto ONLY_MATCHING_RESTART;
1226 }
1227 }
1228
1229 /* This is the default case when none of the above options is set. We print
1230 the matching lines(s), possibly preceded and/or followed by other lines of
1231 context. */
1232
1233 else
1234 {
1235 /* See if there is a requirement to print some "after" lines from a
1236 previous match. We never print any overlaps. */
1237
1238 if (after_context > 0 && lastmatchnumber > 0)
1239 {
1240 int ellength;
1241 int linecount = 0;
1242 char *p = lastmatchrestart;
1243
1244 while (p < ptr && linecount < after_context)
1245 {
1246 p = end_of_line(p, ptr, &ellength);
1247 linecount++;
1248 }
1249
1250 /* It is important to advance lastmatchrestart during this printing so
1251 that it interacts correctly with any "before" printing below. Print
1252 each line's data using fwrite() in case there are binary zeroes. */
1253
1254 while (lastmatchrestart < p)
1255 {
1256 char *pp = lastmatchrestart;
1257 if (printname != NULL) fprintf(stdout, "%s-", printname);
1258 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1259 pp = end_of_line(pp, endptr, &ellength);
1260 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1261 lastmatchrestart = pp;
1262 }
1263 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1264 }
1265
1266 /* If there were non-contiguous lines printed above, insert hyphens. */
1267
1268 if (hyphenpending)
1269 {
1270 fprintf(stdout, "--\n");
1271 hyphenpending = FALSE;
1272 hyphenprinted = TRUE;
1273 }
1274
1275 /* See if there is a requirement to print some "before" lines for this
1276 match. Again, don't print overlaps. */
1277
1278 if (before_context > 0)
1279 {
1280 int linecount = 0;
1281 char *p = ptr;
1282
1283 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1284 linecount < before_context)
1285 {
1286 linecount++;
1287 p = previous_line(p, buffer);
1288 }
1289
1290 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1291 fprintf(stdout, "--\n");
1292
1293 while (p < ptr)
1294 {
1295 int ellength;
1296 char *pp = p;
1297 if (printname != NULL) fprintf(stdout, "%s-", printname);
1298 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1299 pp = end_of_line(pp, endptr, &ellength);
1300 FWRITE(p, 1, pp - p, stdout);
1301 p = pp;
1302 }
1303 }
1304
1305 /* Now print the matching line(s); ensure we set hyphenpending at the end
1306 of the file if any context lines are being output. */
1307
1308 if (after_context > 0 || before_context > 0)
1309 endhyphenpending = TRUE;
1310
1311 if (printname != NULL) fprintf(stdout, "%s:", printname);
1312 if (number) fprintf(stdout, "%d:", linenumber);
1313
1314 /* In multiline mode, we want to print to the end of the line in which
1315 the end of the matched string is found, so we adjust linelength and the
1316 line number appropriately, but only when there actually was a match
1317 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1318 the match will always be before the first newline sequence. */
1319
1320 if (multiline)
1321 {
1322 int ellength;
1323 char *endmatch = ptr;
1324 if (!invert)
1325 {
1326 endmatch += offsets[1];
1327 t = ptr;
1328 while (t < endmatch)
1329 {
1330 t = end_of_line(t, endptr, &ellength);
1331 if (t <= endmatch) linenumber++; else break;
1332 }
1333 }
1334 endmatch = end_of_line(endmatch, endptr, &ellength);
1335 linelength = endmatch - ptr - ellength;
1336 }
1337
1338 /*** NOTE: Use only fwrite() to output the data line, so that binary
1339 zeroes are treated as just another data character. */
1340
1341 /* This extra option, for Jeffrey Friedl's debugging requirements,
1342 replaces the matched string, or a specific captured string if it exists,
1343 with X. When this happens, colouring is ignored. */
1344
1345 #ifdef JFRIEDL_DEBUG
1346 if (S_arg >= 0 && S_arg < mrc)
1347 {
1348 int first = S_arg * 2;
1349 int last = first + 1;
1350 FWRITE(ptr, 1, offsets[first], stdout);
1351 fprintf(stdout, "X");
1352 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1353 }
1354 else
1355 #endif
1356
1357 /* We have to split the line(s) up if colouring, and search for further
1358 matches. */
1359
1360 if (do_colour)
1361 {
1362 int last_offset = 0;
1363 FWRITE(ptr, 1, offsets[0], stdout);
1364 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1365 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1366 fprintf(stdout, "%c[00m", 0x1b);
1367 for (;;)
1368 {
1369 last_offset += offsets[1];
1370 matchptr += offsets[1];
1371 length -= offsets[1];
1372 if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1373 FWRITE(matchptr, 1, offsets[0], stdout);
1374 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1375 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1376 fprintf(stdout, "%c[00m", 0x1b);
1377 }
1378 FWRITE(ptr + last_offset, 1,
1379 (linelength + endlinelength) - last_offset, stdout);
1380 }
1381
1382 /* Not colouring; no need to search for further matches */
1383
1384 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1385 }
1386
1387 /* End of doing what has to be done for a match. If --line-buffered was
1388 given, flush the output. */
1389
1390 if (line_buffered) fflush(stdout);
1391 rc = 0; /* Had some success */
1392
1393 /* Remember where the last match happened for after_context. We remember
1394 where we are about to restart, and that line's number. */
1395
1396 lastmatchrestart = ptr + linelength + endlinelength;
1397 lastmatchnumber = linenumber + 1;
1398 }
1399
1400 /* For a match in multiline inverted mode (which of course did not cause
1401 anything to be printed), we have to move on to the end of the match before
1402 proceeding. */
1403
1404 if (multiline && invert && match)
1405 {
1406 int ellength;
1407 char *endmatch = ptr + offsets[1];
1408 t = ptr;
1409 while (t < endmatch)
1410 {
1411 t = end_of_line(t, endptr, &ellength);
1412 if (t <= endmatch) linenumber++; else break;
1413 }
1414 endmatch = end_of_line(endmatch, endptr, &ellength);
1415 linelength = endmatch - ptr - ellength;
1416 }
1417
1418 /* Advance to after the newline and increment the line number. The file
1419 offset to the current line is maintained in filepos. */
1420
1421 ptr += linelength + endlinelength;
1422 filepos += (int)(linelength + endlinelength);
1423 linenumber++;
1424
1425 /* If input is line buffered, and the buffer is not yet full, read another
1426 line and add it into the buffer. */
1427
1428 if (input_line_buffered && bufflength < sizeof(buffer))
1429 {
1430 int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1431 bufflength += add;
1432 endptr += add;
1433 }
1434
1435 /* If we haven't yet reached the end of the file (the buffer is full), and
1436 the current point is in the top 1/3 of the buffer, slide the buffer down by
1437 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1438 about to be lost, print them. */
1439
1440 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1441 {
1442 if (after_context > 0 &&
1443 lastmatchnumber > 0 &&
1444 lastmatchrestart < buffer + MBUFTHIRD)
1445 {
1446 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1447 lastmatchnumber = 0;
1448 }
1449
1450 /* Now do the shuffle */
1451
1452 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1453 ptr -= MBUFTHIRD;
1454
1455 #ifdef SUPPORT_LIBZ
1456 if (frtype == FR_LIBZ)
1457 bufflength = 2*MBUFTHIRD +
1458 gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1459 else
1460 #endif
1461
1462 #ifdef SUPPORT_LIBBZ2
1463 if (frtype == FR_LIBBZ2)
1464 bufflength = 2*MBUFTHIRD +
1465 BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1466 else
1467 #endif
1468
1469 bufflength = 2*MBUFTHIRD +
1470 (input_line_buffered?
1471 read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1472 fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1473 endptr = buffer + bufflength;
1474
1475 /* Adjust any last match point */
1476
1477 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1478 }
1479 } /* Loop through the whole file */
1480
1481 /* End of file; print final "after" lines if wanted; do_after_lines sets
1482 hyphenpending if it prints something. */
1483
1484 if (only_matching < 0 && !count_only)
1485 {
1486 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1487 hyphenpending |= endhyphenpending;
1488 }
1489
1490 /* Print the file name if we are looking for those without matches and there
1491 were none. If we found a match, we won't have got this far. */
1492
1493 if (filenames == FN_NOMATCH_ONLY)
1494 {
1495 fprintf(stdout, "%s\n", printname);
1496 return 0;
1497 }
1498
1499 /* Print the match count if wanted */
1500
1501 if (count_only)
1502 {
1503 if (count > 0 || !omit_zero_count)
1504 {
1505 if (printname != NULL && filenames != FN_NONE)
1506 fprintf(stdout, "%s:", printname);
1507 fprintf(stdout, "%d\n", count);
1508 }
1509 }
1510
1511 return rc;
1512 }
1513
1514
1515
1516 /*************************************************
1517 * Grep a file or recurse into a directory *
1518 *************************************************/
1519
1520 /* Given a path name, if it's a directory, scan all the files if we are
1521 recursing; if it's a file, grep it.
1522
1523 Arguments:
1524 pathname the path to investigate
1525 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1526 only_one_at_top TRUE if the path is the only one at toplevel
1527
1528 Returns: 0 if there was at least one match
1529 1 if there were no matches
1530 2 there was some kind of error
1531
1532 However, file opening failures are suppressed if "silent" is set.
1533 */
1534
1535 static int
1536 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1537 {
1538 int rc = 1;
1539 int sep;
1540 int frtype;
1541 int pathlen;
1542 void *handle;
1543 FILE *in = NULL; /* Ensure initialized */
1544
1545 #ifdef SUPPORT_LIBZ
1546 gzFile ingz = NULL;
1547 #endif
1548
1549 #ifdef SUPPORT_LIBBZ2
1550 BZFILE *inbz2 = NULL;
1551 #endif
1552
1553 /* If the file name is "-" we scan stdin */
1554
1555 if (strcmp(pathname, "-") == 0)
1556 {
1557 return pcregrep(stdin, FR_PLAIN,
1558 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1559 stdin_name : NULL);
1560 }
1561
1562 /* If the file is a directory, skip if skipping or if we are recursing, scan
1563 each file and directory within it, subject to any include or exclude patterns
1564 that were set. The scanning code is localized so it can be made
1565 system-specific. */
1566
1567 if ((sep = isdirectory(pathname)) != 0)
1568 {
1569 if (dee_action == dee_SKIP) return 1;
1570 if (dee_action == dee_RECURSE)
1571 {
1572 char buffer[1024];
1573 char *nextfile;
1574 directory_type *dir = opendirectory(pathname);
1575
1576 if (dir == NULL)
1577 {
1578 if (!silent)
1579 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1580 strerror(errno));
1581 return 2;
1582 }
1583
1584 while ((nextfile = readdirectory(dir)) != NULL)
1585 {
1586 int frc, nflen;
1587 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1588 nflen = (int)(strlen(nextfile));
1589
1590 if (isdirectory(buffer))
1591 {
1592 if (exclude_dir_compiled != NULL &&
1593 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1594 continue;
1595
1596 if (include_dir_compiled != NULL &&
1597 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1598 continue;
1599 }
1600 else
1601 {
1602 if (exclude_compiled != NULL &&
1603 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1604 continue;
1605
1606 if (include_compiled != NULL &&
1607 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1608 continue;
1609 }
1610
1611 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1612 if (frc > 1) rc = frc;
1613 else if (frc == 0 && rc == 1) rc = 0;
1614 }
1615
1616 closedirectory(dir);
1617 return rc;
1618 }
1619 }
1620
1621 /* If the file is not a directory and not a regular file, skip it if that's
1622 been requested. */
1623
1624 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1625
1626 /* Control reaches here if we have a regular file, or if we have a directory
1627 and recursion or skipping was not requested, or if we have anything else and
1628 skipping was not requested. The scan proceeds. If this is the first and only
1629 argument at top level, we don't show the file name, unless we are only showing
1630 the file name, or the filename was forced (-H). */
1631
1632 pathlen = (int)(strlen(pathname));
1633
1634 /* Open using zlib if it is supported and the file name ends with .gz. */
1635
1636 #ifdef SUPPORT_LIBZ
1637 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1638 {
1639 ingz = gzopen(pathname, "rb");
1640 if (ingz == NULL)
1641 {
1642 if (!silent)
1643 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1644 strerror(errno));
1645 return 2;
1646 }
1647 handle = (void *)ingz;
1648 frtype = FR_LIBZ;
1649 }
1650 else
1651 #endif
1652
1653 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1654
1655 #ifdef SUPPORT_LIBBZ2
1656 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1657 {
1658 inbz2 = BZ2_bzopen(pathname, "rb");
1659 handle = (void *)inbz2;
1660 frtype = FR_LIBBZ2;
1661 }
1662 else
1663 #endif
1664
1665 /* Otherwise use plain fopen(). The label is so that we can come back here if
1666 an attempt to read a .bz2 file indicates that it really is a plain file. */
1667
1668 #ifdef SUPPORT_LIBBZ2
1669 PLAIN_FILE:
1670 #endif
1671 {
1672 in = fopen(pathname, "rb");
1673 handle = (void *)in;
1674 frtype = FR_PLAIN;
1675 }
1676
1677 /* All the opening methods return errno when they fail. */
1678
1679 if (handle == NULL)
1680 {
1681 if (!silent)
1682 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1683 strerror(errno));
1684 return 2;
1685 }
1686
1687 /* Now grep the file */
1688
1689 rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1690 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1691
1692 /* Close in an appropriate manner. */
1693
1694 #ifdef SUPPORT_LIBZ
1695 if (frtype == FR_LIBZ)
1696 gzclose(ingz);
1697 else
1698 #endif
1699
1700 /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1701 read failed. If the error indicates that the file isn't in fact bzipped, try
1702 again as a normal file. */
1703
1704 #ifdef SUPPORT_LIBBZ2
1705 if (frtype == FR_LIBBZ2)
1706 {
1707 if (rc == 2)
1708 {
1709 int errnum;
1710 const char *err = BZ2_bzerror(inbz2, &errnum);
1711 if (errnum == BZ_DATA_ERROR_MAGIC)
1712 {
1713 BZ2_bzclose(inbz2);
1714 goto PLAIN_FILE;
1715 }
1716 else if (!silent)
1717 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1718 pathname, err);
1719 }
1720 BZ2_bzclose(inbz2);
1721 }
1722 else
1723 #endif
1724
1725 /* Normal file close */
1726
1727 fclose(in);
1728
1729 /* Pass back the yield from pcregrep(). */
1730
1731 return rc;
1732 }
1733
1734
1735
1736
1737 /*************************************************
1738 * Usage function *
1739 *************************************************/
1740
1741 static int
1742 usage(int rc)
1743 {
1744 option_item *op;
1745 fprintf(stderr, "Usage: pcregrep [-");
1746 for (op = optionlist; op->one_char != 0; op++)
1747 {
1748 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1749 }
1750 fprintf(stderr, "] [long options] [pattern] [files]\n");
1751 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1752 "options.\n");
1753 return rc;
1754 }
1755
1756
1757
1758
1759 /*************************************************
1760 * Help function *
1761 *************************************************/
1762
1763 static void
1764 help(void)
1765 {
1766 option_item *op;
1767
1768 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1769 printf("Search for PATTERN in each FILE or standard input.\n");
1770 printf("PATTERN must be present if neither -e nor -f is used.\n");
1771 printf("\"-\" can be used as a file name to mean STDIN.\n");
1772
1773 #ifdef SUPPORT_LIBZ
1774 printf("Files whose names end in .gz are read using zlib.\n");
1775 #endif
1776
1777 #ifdef SUPPORT_LIBBZ2
1778 printf("Files whose names end in .bz2 are read using bzlib2.\n");
1779 #endif
1780
1781 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1782 printf("Other files and the standard input are read as plain files.\n\n");
1783 #else
1784 printf("All files are read as plain files, without any interpretation.\n\n");
1785 #endif
1786
1787 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1788 printf("Options:\n");
1789
1790 for (op = optionlist; op->one_char != 0; op++)
1791 {
1792 int n;
1793 char s[4];
1794
1795 /* Two options were accidentally implemented and documented with underscores
1796 instead of hyphens in their names, something that was not noticed for quite a
1797 few releases. When fixing this, I left the underscored versions in the list
1798 in case people were using them. However, we don't want to display them in the
1799 help data. There are no other options that contain underscores, and we do not
1800 expect ever to implement such options. Therefore, just omit any option that
1801 contains an underscore. */
1802
1803 if (strchr(op->long_name, '_') != NULL) continue;
1804
1805 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1806 n = 31 - printf(" %s --%s", s, op->long_name);
1807 if (n < 1) n = 1;
1808 printf("%.*s%s\n", n, " ", op->help_text);
1809 }
1810
1811 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1812 printf("trailing white space is removed and blank lines are ignored.\n");
1813 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1814
1815 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1816 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1817 }
1818
1819
1820
1821
1822 /*************************************************
1823 * Handle a single-letter, no data option *
1824 *************************************************/
1825
1826 static int
1827 handle_option(int letter, int options)
1828 {
1829 switch(letter)
1830 {
1831 case N_FOFFSETS: file_offsets = TRUE; break;
1832 case N_HELP: help(); pcregrep_exit(0);
1833 case N_LOFFSETS: line_offsets = number = TRUE; break;
1834 case N_LBUFFER: line_buffered = TRUE; break;
1835 case 'c': count_only = TRUE; break;
1836 case 'F': process_options |= PO_FIXED_STRINGS; break;
1837 case 'H': filenames = FN_FORCE; break;
1838 case 'h': filenames = FN_NONE; break;
1839 case 'i': options |= PCRE_CASELESS; break;
1840 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1841 case 'L': filenames = FN_NOMATCH_ONLY; break;
1842 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1843 case 'n': number = TRUE; break;
1844 case 'o': only_matching = 0; break;
1845 case 'q': quiet = TRUE; break;
1846 case 'r': dee_action = dee_RECURSE; break;
1847 case 's': silent = TRUE; break;
1848 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1849 case 'v': invert = TRUE; break;
1850 case 'w': process_options |= PO_WORD_MATCH; break;
1851 case 'x': process_options |= PO_LINE_MATCH; break;
1852
1853 case 'V':
1854 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1855 pcregrep_exit(0);
1856 break;
1857
1858 default:
1859 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1860 pcregrep_exit(usage(2));
1861 }
1862
1863 return options;
1864 }
1865
1866
1867
1868
1869 /*************************************************
1870 * Construct printed ordinal *
1871 *************************************************/
1872
1873 /* This turns a number into "1st", "3rd", etc. */
1874
1875 static char *
1876 ordin(int n)
1877 {
1878 static char buffer[8];
1879 char *p = buffer;
1880 sprintf(p, "%d", n);
1881 while (*p != 0) p++;
1882 switch (n%10)
1883 {
1884 case 1: strcpy(p, "st"); break;
1885 case 2: strcpy(p, "nd"); break;
1886 case 3: strcpy(p, "rd"); break;
1887 default: strcpy(p, "th"); break;
1888 }
1889 return buffer;
1890 }
1891
1892
1893
1894 /*************************************************
1895 * Compile a single pattern *
1896 *************************************************/
1897
1898 /* When the -F option has been used, this is called for each substring.
1899 Otherwise it's called for each supplied pattern.
1900
1901 Arguments:
1902 pattern the pattern string
1903 options the PCRE options
1904 filename the file name, or NULL for a command-line pattern
1905 count 0 if this is the only command line pattern, or
1906 number of the command line pattern, or
1907 linenumber for a pattern from a file
1908
1909 Returns: TRUE on success, FALSE after an error
1910 */
1911
1912 static BOOL
1913 compile_single_pattern(char *pattern, int options, char *filename, int count)
1914 {
1915 char buffer[MBUFTHIRD + 16];
1916 const char *error;
1917 int errptr;
1918
1919 if (pattern_count >= MAX_PATTERN_COUNT)
1920 {
1921 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1922 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1923 return FALSE;
1924 }
1925
1926 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1927 suffix[process_options]);
1928 pattern_list[pattern_count] =
1929 pcre_compile(buffer, options, &error, &errptr, pcretables);
1930 if (pattern_list[pattern_count] != NULL)
1931 {
1932 pattern_count++;
1933 return TRUE;
1934 }
1935
1936 /* Handle compile errors */
1937
1938 errptr -= (int)strlen(prefix[process_options]);
1939 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1940
1941 if (filename == NULL)
1942 {
1943 if (count == 0)
1944 fprintf(stderr, "pcregrep: Error in command-line regex "
1945 "at offset %d: %s\n", errptr, error);
1946 else
1947 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1948 "at offset %d: %s\n", ordin(count), errptr, error);
1949 }
1950 else
1951 {
1952 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1953 "at offset %d: %s\n", count, filename, errptr, error);
1954 }
1955
1956 return FALSE;
1957 }
1958
1959
1960
1961 /*************************************************
1962 * Compile one supplied pattern *
1963 *************************************************/
1964
1965 /* When the -F option has been used, each string may be a list of strings,
1966 separated by line breaks. They will be matched literally.
1967
1968 Arguments:
1969 pattern the pattern string
1970 options the PCRE options
1971 filename the file name, or NULL for a command-line pattern
1972 count 0 if this is the only command line pattern, or
1973 number of the command line pattern, or
1974 linenumber for a pattern from a file
1975
1976 Returns: TRUE on success, FALSE after an error
1977 */
1978
1979 static BOOL
1980 compile_pattern(char *pattern, int options, char *filename, int count)
1981 {
1982 if ((process_options & PO_FIXED_STRINGS) != 0)
1983 {
1984 char *eop = pattern + strlen(pattern);
1985 char buffer[MBUFTHIRD];
1986 for(;;)
1987 {
1988 int ellength;
1989 char *p = end_of_line(pattern, eop, &ellength);
1990 if (ellength == 0)
1991 return compile_single_pattern(pattern, options, filename, count);
1992 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1993 pattern = p;
1994 if (!compile_single_pattern(buffer, options, filename, count))
1995 return FALSE;
1996 }
1997 }
1998 else return compile_single_pattern(pattern, options, filename, count);
1999 }
2000
2001
2002
2003 /*************************************************
2004 * Main program *
2005 *************************************************/
2006
2007 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2008
2009 int
2010 main(int argc, char **argv)
2011 {
2012 int i, j;
2013 int rc = 1;
2014 int pcre_options = 0;
2015 int cmd_pattern_count = 0;
2016 int hint_count = 0;
2017 int errptr;
2018 BOOL only_one_at_top;
2019 char *patterns[MAX_PATTERN_COUNT];
2020 const char *locale_from = "--locale";
2021 const char *error;
2022
2023 /* Set the default line ending value from the default in the PCRE library;
2024 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2025 Note that the return values from pcre_config(), though derived from the ASCII
2026 codes, are the same in EBCDIC environments, so we must use the actual values
2027 rather than escapes such as as '\r'. */
2028
2029 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2030 switch(i)
2031 {
2032 default: newline = (char *)"lf"; break;
2033 case 13: newline = (char *)"cr"; break;
2034 case (13 << 8) | 10: newline = (char *)"crlf"; break;
2035 case -1: newline = (char *)"any"; break;
2036 case -2: newline = (char *)"anycrlf"; break;
2037 }
2038
2039 /* Process the options */
2040
2041 for (i = 1; i < argc; i++)
2042 {
2043 option_item *op = NULL;
2044 char *option_data = (char *)""; /* default to keep compiler happy */
2045 BOOL longop;
2046 BOOL longopwasequals = FALSE;
2047
2048 if (argv[i][0] != '-') break;
2049
2050 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2051 but only if we have previously had -e or -f to define the patterns. */
2052
2053 if (argv[i][1] == 0)
2054 {
2055 if (pattern_filename != NULL || pattern_count > 0) break;
2056 else pcregrep_exit(usage(2));
2057 }
2058
2059 /* Handle a long name option, or -- to terminate the options */
2060
2061 if (argv[i][1] == '-')
2062 {
2063 char *arg = argv[i] + 2;
2064 char *argequals = strchr(arg, '=');
2065
2066 if (*arg == 0) /* -- terminates options */
2067 {
2068 i++;
2069 break; /* out of the options-handling loop */
2070 }
2071
2072 longop = TRUE;
2073
2074 /* Some long options have data that follows after =, for example file=name.
2075 Some options have variations in the long name spelling: specifically, we
2076 allow "regexp" because GNU grep allows it, though I personally go along
2077 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2078 These options are entered in the table as "regex(p)". Options can be in
2079 both these categories. */
2080
2081 for (op = optionlist; op->one_char != 0; op++)
2082 {
2083 char *opbra = strchr(op->long_name, '(');
2084 char *equals = strchr(op->long_name, '=');
2085
2086 /* Handle options with only one spelling of the name */
2087
2088 if (opbra == NULL) /* Does not contain '(' */
2089 {
2090 if (equals == NULL) /* Not thing=data case */
2091 {
2092 if (strcmp(arg, op->long_name) == 0) break;
2093 }
2094 else /* Special case xxx=data */
2095 {
2096 int oplen = (int)(equals - op->long_name);
2097 int arglen = (argequals == NULL)?
2098 (int)strlen(arg) : (int)(argequals - arg);
2099 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2100 {
2101 option_data = arg + arglen;
2102 if (*option_data == '=')
2103 {
2104 option_data++;
2105 longopwasequals = TRUE;
2106 }
2107 break;
2108 }
2109 }
2110 }
2111
2112 /* Handle options with an alternate spelling of the name */
2113
2114 else
2115 {
2116 char buff1[24];
2117 char buff2[24];
2118
2119 int baselen = (int)(opbra - op->long_name);
2120 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2121 int arglen = (argequals == NULL || equals == NULL)?
2122 (int)strlen(arg) : (int)(argequals - arg);
2123
2124 sprintf(buff1, "%.*s", baselen, op->long_name);
2125 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2126
2127 if (strncmp(arg, buff1, arglen) == 0 ||
2128 strncmp(arg, buff2, arglen) == 0)
2129 {
2130 if (equals != NULL && argequals != NULL)
2131 {
2132 option_data = argequals;
2133 if (*option_data == '=')
2134 {
2135 option_data++;
2136 longopwasequals = TRUE;
2137 }
2138 }
2139 break;
2140 }
2141 }
2142 }
2143
2144 if (op->one_char == 0)
2145 {
2146 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2147 pcregrep_exit(usage(2));
2148 }
2149 }
2150
2151 /* Jeffrey Friedl's debugging harness uses these additional options which
2152 are not in the right form for putting in the option table because they use
2153 only one hyphen, yet are more than one character long. By putting them
2154 separately here, they will not get displayed as part of the help() output,
2155 but I don't think Jeffrey will care about that. */
2156
2157 #ifdef JFRIEDL_DEBUG
2158 else if (strcmp(argv[i], "-pre") == 0) {
2159 jfriedl_prefix = argv[++i];
2160 continue;
2161 } else if (strcmp(argv[i], "-post") == 0) {
2162 jfriedl_postfix = argv[++i];
2163 continue;
2164 } else if (strcmp(argv[i], "-XT") == 0) {
2165 sscanf(argv[++i], "%d", &jfriedl_XT);
2166 continue;
2167 } else if (strcmp(argv[i], "-XR") == 0) {
2168 sscanf(argv[++i], "%d", &jfriedl_XR);
2169 continue;
2170 }
2171 #endif
2172
2173
2174 /* One-char options; many that have no data may be in a single argument; we
2175 continue till we hit the last one or one that needs data. */
2176
2177 else
2178 {
2179 char *s = argv[i] + 1;
2180 longop = FALSE;
2181 while (*s != 0)
2182 {
2183 for (op = optionlist; op->one_char != 0; op++)
2184 {
2185 if (*s == op->one_char) break;
2186 }
2187 if (op->one_char == 0)
2188 {
2189 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2190 *s, argv[i]);
2191 pcregrep_exit(usage(2));
2192 }
2193
2194 /* Check for a single-character option that has data: OP_OP_NUMBER
2195 is used for one that either has a numerical number or defaults, i.e. the
2196 data is optional. If a digit follows, there is data; if not, carry on
2197 with other single-character options in the same string. */
2198
2199 option_data = s+1;
2200 if (op->type == OP_OP_NUMBER)
2201 {
2202 if (isdigit((unsigned char)s[1])) break;
2203 }
2204 else /* Check for end or a dataless option */
2205 {
2206 if (op->type != OP_NODATA || s[1] == 0) break;
2207 }
2208
2209 /* Handle a single-character option with no data, then loop for the
2210 next character in the string. */
2211
2212 pcre_options = handle_option(*s++, pcre_options);
2213 }
2214 }
2215
2216 /* At this point we should have op pointing to a matched option. If the type
2217 is NO_DATA, it means that there is no data, and the option might set
2218 something in the PCRE options. */
2219
2220 if (op->type == OP_NODATA)
2221 {
2222 pcre_options = handle_option(op->one_char, pcre_options);
2223 continue;
2224 }
2225
2226 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2227 either has a value or defaults to something. It cannot have data in a
2228 separate item. At the moment, the only such options are "colo(u)r",
2229 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2230
2231 if (*option_data == 0 &&
2232 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2233 {
2234 switch (op->one_char)
2235 {
2236 case N_COLOUR:
2237 colour_option = (char *)"auto";
2238 break;
2239
2240 case 'o':
2241 only_matching = 0;
2242 break;
2243
2244 #ifdef JFRIEDL_DEBUG
2245 case 'S':
2246 S_arg = 0;
2247 break;
2248 #endif
2249 }
2250 continue;
2251 }
2252
2253 /* Otherwise, find the data string for the option. */
2254
2255 if (*option_data == 0)
2256 {
2257 if (i >= argc - 1 || longopwasequals)
2258 {
2259 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2260 pcregrep_exit(usage(2));
2261 }
2262 option_data = argv[++i];
2263 }
2264
2265 /* If the option type is OP_PATLIST, it's the -e option, which can be called
2266 multiple times to create a list of patterns. */
2267
2268 if (op->type == OP_PATLIST)
2269 {
2270 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2271 {
2272 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2273 MAX_PATTERN_COUNT);
2274 return 2;
2275 }
2276 patterns[cmd_pattern_count++] = option_data;
2277 }
2278
2279 /* Otherwise, deal with single string or numeric data values. */
2280
2281 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2282 {
2283 *((char **)op->dataptr) = option_data;
2284 }
2285
2286 /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2287 only for unpicking arguments, so just keep it simple. */
2288
2289 else
2290 {
2291 unsigned long int n = 0;
2292 char *endptr = option_data;
2293 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2294 while (isdigit((unsigned char)(*endptr)))
2295 n = n * 10 + (int)(*endptr++ - '0');
2296 if (*endptr != 0)
2297 {
2298 if (longop)
2299 {
2300 char *equals = strchr(op->long_name, '=');
2301 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2302 (int)(equals - op->long_name);
2303 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2304 option_data, nlen, op->long_name);
2305 }
2306 else
2307 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2308 option_data, op->one_char);
2309 pcregrep_exit(usage(2));
2310 }
2311 *((int *)op->dataptr) = n;
2312 }
2313 }
2314
2315 /* Options have been decoded. If -C was used, its value is used as a default
2316 for -A and -B. */
2317
2318 if (both_context > 0)
2319 {
2320 if (after_context == 0) after_context = both_context;
2321 if (before_context == 0) before_context = both_context;
2322 }
2323
2324 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2325 However, the latter two set only_matching. */
2326
2327 if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2328 (file_offsets && line_offsets))
2329 {
2330 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2331 "and/or --line-offsets\n");
2332 pcregrep_exit(usage(2));
2333 }
2334
2335 if (file_offsets || line_offsets) only_matching = 0;
2336
2337 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2338 LC_ALL environment variable is set, and if so, use it. */
2339
2340 if (locale == NULL)
2341 {
2342 locale = getenv("LC_ALL");
2343 locale_from = "LCC_ALL";
2344 }
2345
2346 if (locale == NULL)
2347 {
2348 locale = getenv("LC_CTYPE");
2349 locale_from = "LC_CTYPE";
2350 }
2351
2352 /* If a locale has been provided, set it, and generate the tables the PCRE
2353 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2354
2355 if (locale != NULL)
2356 {
2357 if (setlocale(LC_CTYPE, locale) == NULL)
2358 {
2359 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2360 locale, locale_from);
2361 return 2;
2362 }
2363 pcretables = pcre_maketables();
2364 }
2365
2366 /* Sort out colouring */
2367
2368 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2369 {
2370 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2371 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2372 else
2373 {
2374 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2375 colour_option);
2376 return 2;
2377 }
2378 if (do_colour)
2379 {
2380 char *cs = getenv("PCREGREP_COLOUR");
2381 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2382 if (cs != NULL) colour_string = cs;
2383 }
2384 }
2385
2386 /* Interpret the newline type; the default settings are Unix-like. */
2387
2388 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2389 {
2390 pcre_options |= PCRE_NEWLINE_CR;
2391 endlinetype = EL_CR;
2392 }
2393 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2394 {
2395 pcre_options |= PCRE_NEWLINE_LF;
2396 endlinetype = EL_LF;
2397 }
2398 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2399 {
2400 pcre_options |= PCRE_NEWLINE_CRLF;
2401 endlinetype = EL_CRLF;
2402 }
2403 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2404 {
2405 pcre_options |= PCRE_NEWLINE_ANY;
2406 endlinetype = EL_ANY;
2407 }
2408 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2409 {
2410 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2411 endlinetype = EL_ANYCRLF;
2412 }
2413 else
2414 {
2415 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2416 return 2;
2417 }
2418
2419 /* Interpret the text values for -d and -D */
2420
2421 if (dee_option != NULL)
2422 {
2423 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2424 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2425 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2426 else
2427 {
2428 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2429 return 2;
2430 }
2431 }
2432
2433 if (DEE_option != NULL)
2434 {
2435 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2436 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2437 else
2438 {
2439 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2440 return 2;
2441 }
2442 }
2443
2444 /* Check the values for Jeffrey Friedl's debugging options. */
2445
2446 #ifdef JFRIEDL_DEBUG
2447 if (S_arg > 9)
2448 {
2449 fprintf(stderr, "pcregrep: bad value for -S option\n");
2450 return 2;
2451 }
2452 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2453 {
2454 if (jfriedl_XT == 0) jfriedl_XT = 1;
2455 if (jfriedl_XR == 0) jfriedl_XR = 1;
2456 }
2457 #endif
2458
2459 /* Get memory to store the pattern and hints lists. */
2460
2461 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2462 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2463
2464 if (pattern_list == NULL || hints_list == NULL)
2465 {
2466 fprintf(stderr, "pcregrep: malloc failed\n");
2467 goto EXIT2;
2468 }
2469
2470 /* If no patterns were provided by -e, and there is no file provided by -f,
2471 the first argument is the one and only pattern, and it must exist. */
2472
2473 if (cmd_pattern_count == 0 && pattern_filename == NULL)
2474 {
2475 if (i >= argc) return usage(2);
2476 patterns[cmd_pattern_count++] = argv[i++];
2477 }
2478
2479 /* Compile the patterns that were provided on the command line, either by
2480 multiple uses of -e or as a single unkeyed pattern. */
2481
2482 for (j = 0; j < cmd_pattern_count; j++)
2483 {
2484 if (!compile_pattern(patterns[j], pcre_options, NULL,
2485 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2486 goto EXIT2;
2487 }
2488
2489 /* Compile the regular expressions that are provided in a file. */
2490
2491 if (pattern_filename != NULL)
2492 {
2493 int linenumber = 0;
2494 FILE *f;
2495 char *filename;
2496 char buffer[MBUFTHIRD];
2497
2498 if (strcmp(pattern_filename, "-") == 0)
2499 {
2500 f = stdin;
2501 filename = stdin_name;
2502 }
2503 else
2504 {
2505 f = fopen(pattern_filename, "r");
2506 if (f == NULL)
2507 {
2508 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2509 strerror(errno));
2510 goto EXIT2;
2511 }
2512 filename = pattern_filename;
2513 }
2514
2515 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2516 {
2517 char *s = buffer + (int)strlen(buffer);
2518 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2519 *s = 0;
2520 linenumber++;
2521 if (buffer[0] == 0) continue; /* Skip blank lines */
2522 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2523 goto EXIT2;
2524 }
2525
2526 if (f != stdin) fclose(f);
2527 }
2528
2529 /* Study the regular expressions, as we will be running them many times */
2530
2531 for (j = 0; j < pattern_count; j++)
2532 {
2533 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2534 if (error != NULL)
2535 {
2536 char s[16];
2537 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2538 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2539 goto EXIT2;
2540 }
2541 hint_count++;
2542 }
2543
2544 /* If --match-limit or --recursion-limit was set, put the value(s) into the
2545 pcre_extra block for each pattern. */
2546
2547 if (match_limit > 0 || match_limit_recursion > 0)
2548 {
2549 for (j = 0; j < pattern_count; j++)
2550 {
2551 if (hints_list[j] == NULL)
2552 {
2553 hints_list[j] = malloc(sizeof(pcre_extra));
2554 if (hints_list[j] == NULL)
2555 {
2556 fprintf(stderr, "pcregrep: malloc failed\n");
2557 pcregrep_exit(2);
2558 }
2559 }
2560 if (match_limit > 0)
2561 {
2562 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2563 hints_list[j]->match_limit = match_limit;
2564 }
2565 if (match_limit_recursion > 0)
2566 {
2567 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2568 hints_list[j]->match_limit_recursion = match_limit_recursion;
2569 }
2570 }
2571 }
2572
2573 /* If there are include or exclude patterns, compile them. */
2574
2575 if (exclude_pattern != NULL)
2576 {
2577 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2578 pcretables);
2579 if (exclude_compiled == NULL)
2580 {
2581 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2582 errptr, error);
2583 goto EXIT2;
2584 }
2585 }
2586
2587 if (include_pattern != NULL)
2588 {
2589 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2590 pcretables);
2591 if (include_compiled == NULL)
2592 {
2593 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2594 errptr, error);
2595 goto EXIT2;
2596 }
2597 }
2598
2599 if (exclude_dir_pattern != NULL)
2600 {
2601 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2602 pcretables);
2603 if (exclude_dir_compiled == NULL)
2604 {
2605 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2606 errptr, error);
2607 goto EXIT2;
2608 }
2609 }
2610
2611 if (include_dir_pattern != NULL)
2612 {
2613 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2614 pcretables);
2615 if (include_dir_compiled == NULL)
2616 {
2617 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2618 errptr, error);
2619 goto EXIT2;
2620 }
2621 }
2622
2623 /* If there are no further arguments, do the business on stdin and exit. */
2624
2625 if (i >= argc)
2626 {
2627 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2628 goto EXIT;
2629 }
2630
2631 /* Otherwise, work through the remaining arguments as files or directories.
2632 Pass in the fact that there is only one argument at top level - this suppresses
2633 the file name if the argument is not a directory and filenames are not
2634 otherwise forced. */
2635
2636 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2637
2638 for (; i < argc; i++)
2639 {
2640 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2641 only_one_at_top);
2642 if (frc > 1) rc = frc;
2643 else if (frc == 0 && rc == 1) rc = 0;
2644 }
2645
2646 EXIT:
2647 if (pattern_list != NULL)
2648 {
2649 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2650 free(pattern_list);
2651 }
2652 if (hints_list != NULL)
2653 {
2654 for (i = 0; i < hint_count; i++)
2655 {
2656 if (hints_list[i] != NULL) free(hints_list[i]);
2657 }
2658 free(hints_list);
2659 }
2660 pcregrep_exit(rc);
2661
2662 EXIT2:
2663 rc = 2;
2664 goto EXIT;
2665 }
2666
2667 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5