/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 667 - (show annotations)
Mon Aug 22 14:57:32 2011 UTC (3 years, 11 months ago) by ph10
File MIME type: text/plain
File size: 78994 byte(s)
Error occurred while calculating annotation data.
Commit all the changes for JIT support, but without any documentation yet.
1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2011 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65
66 #include "pcre.h"
67
68 #define FALSE 0
69 #define TRUE 1
70
71 typedef int BOOL;
72
73 #define MAX_PATTERN_COUNT 100
74 #define OFFSET_SIZE 99
75
76 #if BUFSIZ > 8192
77 #define PATBUFSIZE BUFSIZ
78 #else
79 #define PATBUFSIZE 8192
80 #endif
81
82 /* Values for the "filenames" variable, which specifies options for file name
83 output. The order is important; it is assumed that a file name is wanted for
84 all values greater than FN_DEFAULT. */
85
86 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87
88 /* File reading styles */
89
90 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91
92 /* Actions for the -d and -D options */
93
94 enum { dee_READ, dee_SKIP, dee_RECURSE };
95 enum { DEE_READ, DEE_SKIP };
96
97 /* Actions for special processing options (flag bits) */
98
99 #define PO_WORD_MATCH 0x0001
100 #define PO_LINE_MATCH 0x0002
101 #define PO_FIXED_STRINGS 0x0004
102
103 /* Line ending types */
104
105 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106
107 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108 environments), a warning is issued if the value of fwrite() is ignored.
109 Unfortunately, casting to (void) does not suppress the warning. To get round
110 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 apply to fprintf(). */
112
113 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114
115
116
117 /*************************************************
118 * Global variables *
119 *************************************************/
120
121 /* Jeffrey Friedl has some debugging requirements that are not part of the
122 regular code. */
123
124 #ifdef JFRIEDL_DEBUG
125 static int S_arg = -1;
126 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128 static const char *jfriedl_prefix = "";
129 static const char *jfriedl_postfix = "";
130 #endif
131
132 static int endlinetype;
133
134 static char *colour_string = (char *)"1;31";
135 static char *colour_option = NULL;
136 static char *dee_option = NULL;
137 static char *DEE_option = NULL;
138 static char *main_buffer = NULL;
139 static char *newline = NULL;
140 static char *pattern_filename = NULL;
141 static char *stdin_name = (char *)"(standard input)";
142 static char *locale = NULL;
143
144 static const unsigned char *pcretables = NULL;
145
146 static int pattern_count = 0;
147 static pcre **pattern_list = NULL;
148 static pcre_extra **hints_list = NULL;
149
150 static char *include_pattern = NULL;
151 static char *exclude_pattern = NULL;
152 static char *include_dir_pattern = NULL;
153 static char *exclude_dir_pattern = NULL;
154
155 static pcre *include_compiled = NULL;
156 static pcre *exclude_compiled = NULL;
157 static pcre *include_dir_compiled = NULL;
158 static pcre *exclude_dir_compiled = NULL;
159
160 static int after_context = 0;
161 static int before_context = 0;
162 static int both_context = 0;
163 static int bufthird = PCREGREP_BUFSIZE;
164 static int bufsize = 3*PCREGREP_BUFSIZE;
165 static int dee_action = dee_READ;
166 static int DEE_action = DEE_READ;
167 static int error_count = 0;
168 static int filenames = FN_DEFAULT;
169 static int only_matching = -1;
170 static int process_options = 0;
171 static int study_options = 0;
172
173 static unsigned long int match_limit = 0;
174 static unsigned long int match_limit_recursion = 0;
175
176 static BOOL count_only = FALSE;
177 static BOOL do_colour = FALSE;
178 static BOOL file_offsets = FALSE;
179 static BOOL hyphenpending = FALSE;
180 static BOOL invert = FALSE;
181 static BOOL line_buffered = FALSE;
182 static BOOL line_offsets = FALSE;
183 static BOOL multiline = FALSE;
184 static BOOL number = FALSE;
185 static BOOL omit_zero_count = FALSE;
186 static BOOL resource_error = FALSE;
187 static BOOL quiet = FALSE;
188 static BOOL silent = FALSE;
189 static BOOL utf8 = FALSE;
190
191 /* Structure for options and list of them */
192
193 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
194 OP_OP_NUMBER, OP_PATLIST };
195
196 typedef struct option_item {
197 int type;
198 int one_char;
199 void *dataptr;
200 const char *long_name;
201 const char *help_text;
202 } option_item;
203
204 /* Options without a single-letter equivalent get a negative value. This can be
205 used to identify them. */
206
207 #define N_COLOUR (-1)
208 #define N_EXCLUDE (-2)
209 #define N_EXCLUDE_DIR (-3)
210 #define N_HELP (-4)
211 #define N_INCLUDE (-5)
212 #define N_INCLUDE_DIR (-6)
213 #define N_LABEL (-7)
214 #define N_LOCALE (-8)
215 #define N_NULL (-9)
216 #define N_LOFFSETS (-10)
217 #define N_FOFFSETS (-11)
218 #define N_LBUFFER (-12)
219 #define N_M_LIMIT (-13)
220 #define N_M_LIMIT_REC (-14)
221 #define N_BUFSIZE (-15)
222
223 static option_item optionlist[] = {
224 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
225 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
226 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
227 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
228 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" },
229 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
230 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
231 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
232 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
233 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
234 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
235 { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
236 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
237 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
238 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
239 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
240 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
241 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
242 { OP_NODATA, 'j', NULL, "jit", "use JIT compiler if available" },
243 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
244 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
245 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
246 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
247 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
248 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
249 { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
250 { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
251 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
252 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
253 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
254 { OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" },
255 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
256 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
257 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
258 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
259 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
260 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
261
262 /* These two were accidentally implemented with underscores instead of
263 hyphens in the option names. As this was not discovered for several releases,
264 the incorrect versions are left in the table for compatibility. However, the
265 --help function misses out any option that has an underscore in its name. */
266
267 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
268 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
269
270 #ifdef JFRIEDL_DEBUG
271 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
272 #endif
273 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
274 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
275 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
276 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
277 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
278 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
279 { OP_NODATA, 0, NULL, NULL, NULL }
280 };
281
282 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
283 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
284 that the combination of -w and -x has the same effect as -x on its own, so we
285 can treat them as the same. */
286
287 static const char *prefix[] = {
288 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
289
290 static const char *suffix[] = {
291 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
292
293 /* UTF-8 tables - used only when the newline setting is "any". */
294
295 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
296
297 const char utf8_table4[] = {
298 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
299 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
300 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
301 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
302
303
304
305 /*************************************************
306 * Exit from the program *
307 *************************************************/
308
309 /* If there has been a resource error, give a suitable message.
310
311 Argument: the return code
312 Returns: does not return
313 */
314
315 static void
316 pcregrep_exit(int rc)
317 {
318 if (resource_error)
319 {
320 fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
321 "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
322 fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
323 }
324
325 exit(rc);
326 }
327
328
329 /*************************************************
330 * OS-specific functions *
331 *************************************************/
332
333 /* These functions are defined so that they can be made system specific,
334 although at present the only ones are for Unix, Win32, and for "no support". */
335
336
337 /************* Directory scanning in Unix ***********/
338
339 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
340 #include <sys/types.h>
341 #include <sys/stat.h>
342 #include <dirent.h>
343
344 typedef DIR directory_type;
345
346 static int
347 isdirectory(char *filename)
348 {
349 struct stat statbuf;
350 if (stat(filename, &statbuf) < 0)
351 return 0; /* In the expectation that opening as a file will fail */
352 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
353 }
354
355 static directory_type *
356 opendirectory(char *filename)
357 {
358 return opendir(filename);
359 }
360
361 static char *
362 readdirectory(directory_type *dir)
363 {
364 for (;;)
365 {
366 struct dirent *dent = readdir(dir);
367 if (dent == NULL) return NULL;
368 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
369 return dent->d_name;
370 }
371 /* Control never reaches here */
372 }
373
374 static void
375 closedirectory(directory_type *dir)
376 {
377 closedir(dir);
378 }
379
380
381 /************* Test for regular file in Unix **********/
382
383 static int
384 isregfile(char *filename)
385 {
386 struct stat statbuf;
387 if (stat(filename, &statbuf) < 0)
388 return 1; /* In the expectation that opening as a file will fail */
389 return (statbuf.st_mode & S_IFMT) == S_IFREG;
390 }
391
392
393 /************* Test for a terminal in Unix **********/
394
395 static BOOL
396 is_stdout_tty(void)
397 {
398 return isatty(fileno(stdout));
399 }
400
401 static BOOL
402 is_file_tty(FILE *f)
403 {
404 return isatty(fileno(f));
405 }
406
407
408 /************* Directory scanning in Win32 ***********/
409
410 /* I (Philip Hazel) have no means of testing this code. It was contributed by
411 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
412 when it did not exist. David Byron added a patch that moved the #include of
413 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
414 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
415 undefined when it is indeed undefined. */
416
417 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
418
419 #ifndef STRICT
420 # define STRICT
421 #endif
422 #ifndef WIN32_LEAN_AND_MEAN
423 # define WIN32_LEAN_AND_MEAN
424 #endif
425
426 #include <windows.h>
427
428 #ifndef INVALID_FILE_ATTRIBUTES
429 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
430 #endif
431
432 typedef struct directory_type
433 {
434 HANDLE handle;
435 BOOL first;
436 WIN32_FIND_DATA data;
437 } directory_type;
438
439 int
440 isdirectory(char *filename)
441 {
442 DWORD attr = GetFileAttributes(filename);
443 if (attr == INVALID_FILE_ATTRIBUTES)
444 return 0;
445 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
446 }
447
448 directory_type *
449 opendirectory(char *filename)
450 {
451 size_t len;
452 char *pattern;
453 directory_type *dir;
454 DWORD err;
455 len = strlen(filename);
456 pattern = (char *) malloc(len + 3);
457 dir = (directory_type *) malloc(sizeof(*dir));
458 if ((pattern == NULL) || (dir == NULL))
459 {
460 fprintf(stderr, "pcregrep: malloc failed\n");
461 pcregrep_exit(2);
462 }
463 memcpy(pattern, filename, len);
464 memcpy(&(pattern[len]), "\\*", 3);
465 dir->handle = FindFirstFile(pattern, &(dir->data));
466 if (dir->handle != INVALID_HANDLE_VALUE)
467 {
468 free(pattern);
469 dir->first = TRUE;
470 return dir;
471 }
472 err = GetLastError();
473 free(pattern);
474 free(dir);
475 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
476 return NULL;
477 }
478
479 char *
480 readdirectory(directory_type *dir)
481 {
482 for (;;)
483 {
484 if (!dir->first)
485 {
486 if (!FindNextFile(dir->handle, &(dir->data)))
487 return NULL;
488 }
489 else
490 {
491 dir->first = FALSE;
492 }
493 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
494 return dir->data.cFileName;
495 }
496 #ifndef _MSC_VER
497 return NULL; /* Keep compiler happy; never executed */
498 #endif
499 }
500
501 void
502 closedirectory(directory_type *dir)
503 {
504 FindClose(dir->handle);
505 free(dir);
506 }
507
508
509 /************* Test for regular file in Win32 **********/
510
511 /* I don't know how to do this, or if it can be done; assume all paths are
512 regular if they are not directories. */
513
514 int isregfile(char *filename)
515 {
516 return !isdirectory(filename);
517 }
518
519
520 /************* Test for a terminal in Win32 **********/
521
522 /* I don't know how to do this; assume never */
523
524 static BOOL
525 is_stdout_tty(void)
526 {
527 return FALSE;
528 }
529
530 static BOOL
531 is_file_tty(FILE *f)
532 {
533 return FALSE;
534 }
535
536
537 /************* Directory scanning when we can't do it ***********/
538
539 /* The type is void, and apart from isdirectory(), the functions do nothing. */
540
541 #else
542
543 typedef void directory_type;
544
545 int isdirectory(char *filename) { return 0; }
546 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
547 char *readdirectory(directory_type *dir) { return (char*)0;}
548 void closedirectory(directory_type *dir) {}
549
550
551 /************* Test for regular when we can't do it **********/
552
553 /* Assume all files are regular. */
554
555 int isregfile(char *filename) { return 1; }
556
557
558 /************* Test for a terminal when we can't do it **********/
559
560 static BOOL
561 is_stdout_tty(void)
562 {
563 return FALSE;
564 }
565
566 static BOOL
567 is_file_tty(FILE *f)
568 {
569 return FALSE;
570 }
571
572 #endif
573
574
575
576 #ifndef HAVE_STRERROR
577 /*************************************************
578 * Provide strerror() for non-ANSI libraries *
579 *************************************************/
580
581 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
582 in their libraries, but can provide the same facility by this simple
583 alternative function. */
584
585 extern int sys_nerr;
586 extern char *sys_errlist[];
587
588 char *
589 strerror(int n)
590 {
591 if (n < 0 || n >= sys_nerr) return "unknown error number";
592 return sys_errlist[n];
593 }
594 #endif /* HAVE_STRERROR */
595
596
597
598 /*************************************************
599 * Read one line of input *
600 *************************************************/
601
602 /* Normally, input is read using fread() into a large buffer, so many lines may
603 be read at once. However, doing this for tty input means that no output appears
604 until a lot of input has been typed. Instead, tty input is handled line by
605 line. We cannot use fgets() for this, because it does not stop at a binary
606 zero, and therefore there is no way of telling how many characters it has read,
607 because there may be binary zeros embedded in the data.
608
609 Arguments:
610 buffer the buffer to read into
611 length the maximum number of characters to read
612 f the file
613
614 Returns: the number of characters read, zero at end of file
615 */
616
617 static int
618 read_one_line(char *buffer, int length, FILE *f)
619 {
620 int c;
621 int yield = 0;
622 while ((c = fgetc(f)) != EOF)
623 {
624 buffer[yield++] = c;
625 if (c == '\n' || yield >= length) break;
626 }
627 return yield;
628 }
629
630
631
632 /*************************************************
633 * Find end of line *
634 *************************************************/
635
636 /* The length of the endline sequence that is found is set via lenptr. This may
637 be zero at the very end of the file if there is no line-ending sequence there.
638
639 Arguments:
640 p current position in line
641 endptr end of available data
642 lenptr where to put the length of the eol sequence
643
644 Returns: pointer after the last byte of the line,
645 including the newline byte(s)
646 */
647
648 static char *
649 end_of_line(char *p, char *endptr, int *lenptr)
650 {
651 switch(endlinetype)
652 {
653 default: /* Just in case */
654 case EL_LF:
655 while (p < endptr && *p != '\n') p++;
656 if (p < endptr)
657 {
658 *lenptr = 1;
659 return p + 1;
660 }
661 *lenptr = 0;
662 return endptr;
663
664 case EL_CR:
665 while (p < endptr && *p != '\r') p++;
666 if (p < endptr)
667 {
668 *lenptr = 1;
669 return p + 1;
670 }
671 *lenptr = 0;
672 return endptr;
673
674 case EL_CRLF:
675 for (;;)
676 {
677 while (p < endptr && *p != '\r') p++;
678 if (++p >= endptr)
679 {
680 *lenptr = 0;
681 return endptr;
682 }
683 if (*p == '\n')
684 {
685 *lenptr = 2;
686 return p + 1;
687 }
688 }
689 break;
690
691 case EL_ANYCRLF:
692 while (p < endptr)
693 {
694 int extra = 0;
695 register int c = *((unsigned char *)p);
696
697 if (utf8 && c >= 0xc0)
698 {
699 int gcii, gcss;
700 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
701 gcss = 6*extra;
702 c = (c & utf8_table3[extra]) << gcss;
703 for (gcii = 1; gcii <= extra; gcii++)
704 {
705 gcss -= 6;
706 c |= (p[gcii] & 0x3f) << gcss;
707 }
708 }
709
710 p += 1 + extra;
711
712 switch (c)
713 {
714 case 0x0a: /* LF */
715 *lenptr = 1;
716 return p;
717
718 case 0x0d: /* CR */
719 if (p < endptr && *p == 0x0a)
720 {
721 *lenptr = 2;
722 p++;
723 }
724 else *lenptr = 1;
725 return p;
726
727 default:
728 break;
729 }
730 } /* End of loop for ANYCRLF case */
731
732 *lenptr = 0; /* Must have hit the end */
733 return endptr;
734
735 case EL_ANY:
736 while (p < endptr)
737 {
738 int extra = 0;
739 register int c = *((unsigned char *)p);
740
741 if (utf8 && c >= 0xc0)
742 {
743 int gcii, gcss;
744 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
745 gcss = 6*extra;
746 c = (c & utf8_table3[extra]) << gcss;
747 for (gcii = 1; gcii <= extra; gcii++)
748 {
749 gcss -= 6;
750 c |= (p[gcii] & 0x3f) << gcss;
751 }
752 }
753
754 p += 1 + extra;
755
756 switch (c)
757 {
758 case 0x0a: /* LF */
759 case 0x0b: /* VT */
760 case 0x0c: /* FF */
761 *lenptr = 1;
762 return p;
763
764 case 0x0d: /* CR */
765 if (p < endptr && *p == 0x0a)
766 {
767 *lenptr = 2;
768 p++;
769 }
770 else *lenptr = 1;
771 return p;
772
773 case 0x85: /* NEL */
774 *lenptr = utf8? 2 : 1;
775 return p;
776
777 case 0x2028: /* LS */
778 case 0x2029: /* PS */
779 *lenptr = 3;
780 return p;
781
782 default:
783 break;
784 }
785 } /* End of loop for ANY case */
786
787 *lenptr = 0; /* Must have hit the end */
788 return endptr;
789 } /* End of overall switch */
790 }
791
792
793
794 /*************************************************
795 * Find start of previous line *
796 *************************************************/
797
798 /* This is called when looking back for before lines to print.
799
800 Arguments:
801 p start of the subsequent line
802 startptr start of available data
803
804 Returns: pointer to the start of the previous line
805 */
806
807 static char *
808 previous_line(char *p, char *startptr)
809 {
810 switch(endlinetype)
811 {
812 default: /* Just in case */
813 case EL_LF:
814 p--;
815 while (p > startptr && p[-1] != '\n') p--;
816 return p;
817
818 case EL_CR:
819 p--;
820 while (p > startptr && p[-1] != '\n') p--;
821 return p;
822
823 case EL_CRLF:
824 for (;;)
825 {
826 p -= 2;
827 while (p > startptr && p[-1] != '\n') p--;
828 if (p <= startptr + 1 || p[-2] == '\r') return p;
829 }
830 return p; /* But control should never get here */
831
832 case EL_ANY:
833 case EL_ANYCRLF:
834 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
835 if (utf8) while ((*p & 0xc0) == 0x80) p--;
836
837 while (p > startptr)
838 {
839 register int c;
840 char *pp = p - 1;
841
842 if (utf8)
843 {
844 int extra = 0;
845 while ((*pp & 0xc0) == 0x80) pp--;
846 c = *((unsigned char *)pp);
847 if (c >= 0xc0)
848 {
849 int gcii, gcss;
850 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
851 gcss = 6*extra;
852 c = (c & utf8_table3[extra]) << gcss;
853 for (gcii = 1; gcii <= extra; gcii++)
854 {
855 gcss -= 6;
856 c |= (pp[gcii] & 0x3f) << gcss;
857 }
858 }
859 }
860 else c = *((unsigned char *)pp);
861
862 if (endlinetype == EL_ANYCRLF) switch (c)
863 {
864 case 0x0a: /* LF */
865 case 0x0d: /* CR */
866 return p;
867
868 default:
869 break;
870 }
871
872 else switch (c)
873 {
874 case 0x0a: /* LF */
875 case 0x0b: /* VT */
876 case 0x0c: /* FF */
877 case 0x0d: /* CR */
878 case 0x85: /* NEL */
879 case 0x2028: /* LS */
880 case 0x2029: /* PS */
881 return p;
882
883 default:
884 break;
885 }
886
887 p = pp; /* Back one character */
888 } /* End of loop for ANY case */
889
890 return startptr; /* Hit start of data */
891 } /* End of overall switch */
892 }
893
894
895
896
897
898 /*************************************************
899 * Print the previous "after" lines *
900 *************************************************/
901
902 /* This is called if we are about to lose said lines because of buffer filling,
903 and at the end of the file. The data in the line is written using fwrite() so
904 that a binary zero does not terminate it.
905
906 Arguments:
907 lastmatchnumber the number of the last matching line, plus one
908 lastmatchrestart where we restarted after the last match
909 endptr end of available data
910 printname filename for printing
911
912 Returns: nothing
913 */
914
915 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
916 char *endptr, char *printname)
917 {
918 if (after_context > 0 && lastmatchnumber > 0)
919 {
920 int count = 0;
921 while (lastmatchrestart < endptr && count++ < after_context)
922 {
923 int ellength;
924 char *pp = lastmatchrestart;
925 if (printname != NULL) fprintf(stdout, "%s-", printname);
926 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
927 pp = end_of_line(pp, endptr, &ellength);
928 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
929 lastmatchrestart = pp;
930 }
931 hyphenpending = TRUE;
932 }
933 }
934
935
936
937 /*************************************************
938 * Apply patterns to subject till one matches *
939 *************************************************/
940
941 /* This function is called to run through all patterns, looking for a match. It
942 is used multiple times for the same subject when colouring is enabled, in order
943 to find all possible matches.
944
945 Arguments:
946 matchptr the start of the subject
947 length the length of the subject to match
948 startoffset where to start matching
949 offsets the offets vector to fill in
950 mrc address of where to put the result of pcre_exec()
951
952 Returns: TRUE if there was a match
953 FALSE if there was no match
954 invert if there was a non-fatal error
955 */
956
957 static BOOL
958 match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
959 int *mrc)
960 {
961 int i;
962 size_t slen = length;
963 const char *msg = "this text:\n\n";
964 if (slen > 200)
965 {
966 slen = 200;
967 msg = "text that starts:\n\n";
968 }
969 for (i = 0; i < pattern_count; i++)
970 {
971 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
972 startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
973 if (*mrc >= 0) return TRUE;
974 if (*mrc == PCRE_ERROR_NOMATCH) continue;
975 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
976 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
977 fprintf(stderr, "%s", msg);
978 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
979 fprintf(stderr, "\n\n");
980 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
981 resource_error = TRUE;
982 if (error_count++ > 20)
983 {
984 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
985 pcregrep_exit(2);
986 }
987 return invert; /* No more matching; don't show the line again */
988 }
989
990 return FALSE; /* No match, no errors */
991 }
992
993
994
995 /*************************************************
996 * Grep an individual file *
997 *************************************************/
998
999 /* This is called from grep_or_recurse() below. It uses a buffer that is three
1000 times the value of bufthird. The matching point is never allowed to stray into
1001 the top third of the buffer, thus keeping more of the file available for
1002 context printing or for multiline scanning. For large files, the pointer will
1003 be in the middle third most of the time, so the bottom third is available for
1004 "before" context printing.
1005
1006 Arguments:
1007 handle the fopened FILE stream for a normal file
1008 the gzFile pointer when reading is via libz
1009 the BZFILE pointer when reading is via libbz2
1010 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1011 filename the file name or NULL (for errors)
1012 printname the file name if it is to be printed for each match
1013 or NULL if the file name is not to be printed
1014 it cannot be NULL if filenames[_nomatch]_only is set
1015
1016 Returns: 0 if there was at least one match
1017 1 otherwise (no matches)
1018 2 if an overlong line is encountered
1019 3 if there is a read error on a .bz2 file
1020 */
1021
1022 static int
1023 pcregrep(void *handle, int frtype, char *filename, char *printname)
1024 {
1025 int rc = 1;
1026 int linenumber = 1;
1027 int lastmatchnumber = 0;
1028 int count = 0;
1029 int filepos = 0;
1030 int offsets[OFFSET_SIZE];
1031 char *lastmatchrestart = NULL;
1032 char *ptr = main_buffer;
1033 char *endptr;
1034 size_t bufflength;
1035 BOOL endhyphenpending = FALSE;
1036 BOOL input_line_buffered = line_buffered;
1037 FILE *in = NULL; /* Ensure initialized */
1038
1039 #ifdef SUPPORT_LIBZ
1040 gzFile ingz = NULL;
1041 #endif
1042
1043 #ifdef SUPPORT_LIBBZ2
1044 BZFILE *inbz2 = NULL;
1045 #endif
1046
1047
1048 /* Do the first read into the start of the buffer and set up the pointer to end
1049 of what we have. In the case of libz, a non-zipped .gz file will be read as a
1050 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1051 fail. */
1052
1053 #ifdef SUPPORT_LIBZ
1054 if (frtype == FR_LIBZ)
1055 {
1056 ingz = (gzFile)handle;
1057 bufflength = gzread (ingz, main_buffer, bufsize);
1058 }
1059 else
1060 #endif
1061
1062 #ifdef SUPPORT_LIBBZ2
1063 if (frtype == FR_LIBBZ2)
1064 {
1065 inbz2 = (BZFILE *)handle;
1066 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1067 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1068 } /* without the cast it is unsigned. */
1069 else
1070 #endif
1071
1072 {
1073 in = (FILE *)handle;
1074 if (is_file_tty(in)) input_line_buffered = TRUE;
1075 bufflength = input_line_buffered?
1076 read_one_line(main_buffer, bufsize, in) :
1077 fread(main_buffer, 1, bufsize, in);
1078 }
1079
1080 endptr = main_buffer + bufflength;
1081
1082 /* Loop while the current pointer is not at the end of the file. For large
1083 files, endptr will be at the end of the buffer when we are in the middle of the
1084 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1085 way, the buffer is shifted left and re-filled. */
1086
1087 while (ptr < endptr)
1088 {
1089 int endlinelength;
1090 int mrc = 0;
1091 int startoffset = 0;
1092 BOOL match;
1093 char *matchptr = ptr;
1094 char *t = ptr;
1095 size_t length, linelength;
1096
1097 /* At this point, ptr is at the start of a line. We need to find the length
1098 of the subject string to pass to pcre_exec(). In multiline mode, it is the
1099 length remainder of the data in the buffer. Otherwise, it is the length of
1100 the next line, excluding the terminating newline. After matching, we always
1101 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1102 option is used for compiling, so that any match is constrained to be in the
1103 first line. */
1104
1105 t = end_of_line(t, endptr, &endlinelength);
1106 linelength = t - ptr - endlinelength;
1107 length = multiline? (size_t)(endptr - ptr) : linelength;
1108
1109 /* Check to see if the line we are looking at extends right to the very end
1110 of the buffer without a line terminator. This means the line is too long to
1111 handle. */
1112
1113 if (endlinelength == 0 && t == main_buffer + bufsize)
1114 {
1115 fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1116 "pcregrep: check the --buffer-size option\n",
1117 linenumber,
1118 (filename == NULL)? "" : " of file ",
1119 (filename == NULL)? "" : filename);
1120 return 2;
1121 }
1122
1123 /* Extra processing for Jeffrey Friedl's debugging. */
1124
1125 #ifdef JFRIEDL_DEBUG
1126 if (jfriedl_XT || jfriedl_XR)
1127 {
1128 #include <sys/time.h>
1129 #include <time.h>
1130 struct timeval start_time, end_time;
1131 struct timezone dummy;
1132 int i;
1133
1134 if (jfriedl_XT)
1135 {
1136 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1137 const char *orig = ptr;
1138 ptr = malloc(newlen + 1);
1139 if (!ptr) {
1140 printf("out of memory");
1141 pcregrep_exit(2);
1142 }
1143 endptr = ptr;
1144 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1145 for (i = 0; i < jfriedl_XT; i++) {
1146 strncpy(endptr, orig, length);
1147 endptr += length;
1148 }
1149 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1150 length = newlen;
1151 }
1152
1153 if (gettimeofday(&start_time, &dummy) != 0)
1154 perror("bad gettimeofday");
1155
1156
1157 for (i = 0; i < jfriedl_XR; i++)
1158 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1159 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1160
1161 if (gettimeofday(&end_time, &dummy) != 0)
1162 perror("bad gettimeofday");
1163
1164 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1165 -
1166 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1167
1168 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1169 return 0;
1170 }
1171 #endif
1172
1173 /* We come back here after a match when the -o option (only_matching) is set,
1174 in order to find any further matches in the same line. */
1175
1176 ONLY_MATCHING_RESTART:
1177
1178 /* Run through all the patterns until one matches or there is an error other
1179 than NOMATCH. This code is in a subroutine so that it can be re-used for
1180 finding subsequent matches when colouring matched lines. */
1181
1182 match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1183
1184 /* If it's a match or a not-match (as required), do what's wanted. */
1185
1186 if (match != invert)
1187 {
1188 BOOL hyphenprinted = FALSE;
1189
1190 /* We've failed if we want a file that doesn't have any matches. */
1191
1192 if (filenames == FN_NOMATCH_ONLY) return 1;
1193
1194 /* Just count if just counting is wanted. */
1195
1196 if (count_only) count++;
1197
1198 /* If all we want is a file name, there is no need to scan any more lines
1199 in the file. */
1200
1201 else if (filenames == FN_MATCH_ONLY)
1202 {
1203 fprintf(stdout, "%s\n", printname);
1204 return 0;
1205 }
1206
1207 /* Likewise, if all we want is a yes/no answer. */
1208
1209 else if (quiet) return 0;
1210
1211 /* The --only-matching option prints just the substring that matched, or a
1212 captured portion of it, as long as this string is not empty, and the
1213 --file-offsets and --line-offsets options output offsets for the matching
1214 substring (they both force --only-matching = 0). None of these options
1215 prints any context. Afterwards, adjust the start and then jump back to look
1216 for further matches in the same line. If we are in invert mode, however,
1217 nothing is printed and we do not restart - this could still be useful
1218 because the return code is set. */
1219
1220 else if (only_matching >= 0)
1221 {
1222 if (!invert)
1223 {
1224 if (printname != NULL) fprintf(stdout, "%s:", printname);
1225 if (number) fprintf(stdout, "%d:", linenumber);
1226 if (line_offsets)
1227 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1228 offsets[1] - offsets[0]);
1229 else if (file_offsets)
1230 fprintf(stdout, "%d,%d\n",
1231 (int)(filepos + matchptr + offsets[0] - ptr),
1232 offsets[1] - offsets[0]);
1233 else if (only_matching < mrc)
1234 {
1235 int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1236 if (plen > 0)
1237 {
1238 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1239 FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1240 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1241 fprintf(stdout, "\n");
1242 }
1243 }
1244 else if (printname != NULL || number) fprintf(stdout, "\n");
1245 match = FALSE;
1246 if (line_buffered) fflush(stdout);
1247 rc = 0; /* Had some success */
1248 startoffset = offsets[1]; /* Restart after the match */
1249 goto ONLY_MATCHING_RESTART;
1250 }
1251 }
1252
1253 /* This is the default case when none of the above options is set. We print
1254 the matching lines(s), possibly preceded and/or followed by other lines of
1255 context. */
1256
1257 else
1258 {
1259 /* See if there is a requirement to print some "after" lines from a
1260 previous match. We never print any overlaps. */
1261
1262 if (after_context > 0 && lastmatchnumber > 0)
1263 {
1264 int ellength;
1265 int linecount = 0;
1266 char *p = lastmatchrestart;
1267
1268 while (p < ptr && linecount < after_context)
1269 {
1270 p = end_of_line(p, ptr, &ellength);
1271 linecount++;
1272 }
1273
1274 /* It is important to advance lastmatchrestart during this printing so
1275 that it interacts correctly with any "before" printing below. Print
1276 each line's data using fwrite() in case there are binary zeroes. */
1277
1278 while (lastmatchrestart < p)
1279 {
1280 char *pp = lastmatchrestart;
1281 if (printname != NULL) fprintf(stdout, "%s-", printname);
1282 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1283 pp = end_of_line(pp, endptr, &ellength);
1284 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1285 lastmatchrestart = pp;
1286 }
1287 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1288 }
1289
1290 /* If there were non-contiguous lines printed above, insert hyphens. */
1291
1292 if (hyphenpending)
1293 {
1294 fprintf(stdout, "--\n");
1295 hyphenpending = FALSE;
1296 hyphenprinted = TRUE;
1297 }
1298
1299 /* See if there is a requirement to print some "before" lines for this
1300 match. Again, don't print overlaps. */
1301
1302 if (before_context > 0)
1303 {
1304 int linecount = 0;
1305 char *p = ptr;
1306
1307 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1308 linecount < before_context)
1309 {
1310 linecount++;
1311 p = previous_line(p, main_buffer);
1312 }
1313
1314 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1315 fprintf(stdout, "--\n");
1316
1317 while (p < ptr)
1318 {
1319 int ellength;
1320 char *pp = p;
1321 if (printname != NULL) fprintf(stdout, "%s-", printname);
1322 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1323 pp = end_of_line(pp, endptr, &ellength);
1324 FWRITE(p, 1, pp - p, stdout);
1325 p = pp;
1326 }
1327 }
1328
1329 /* Now print the matching line(s); ensure we set hyphenpending at the end
1330 of the file if any context lines are being output. */
1331
1332 if (after_context > 0 || before_context > 0)
1333 endhyphenpending = TRUE;
1334
1335 if (printname != NULL) fprintf(stdout, "%s:", printname);
1336 if (number) fprintf(stdout, "%d:", linenumber);
1337
1338 /* In multiline mode, we want to print to the end of the line in which
1339 the end of the matched string is found, so we adjust linelength and the
1340 line number appropriately, but only when there actually was a match
1341 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1342 the match will always be before the first newline sequence. */
1343
1344 if (multiline & !invert)
1345 {
1346 char *endmatch = ptr + offsets[1];
1347 t = ptr;
1348 while (t < endmatch)
1349 {
1350 t = end_of_line(t, endptr, &endlinelength);
1351 if (t < endmatch) linenumber++; else break;
1352 }
1353 linelength = t - ptr - endlinelength;
1354 }
1355
1356 /*** NOTE: Use only fwrite() to output the data line, so that binary
1357 zeroes are treated as just another data character. */
1358
1359 /* This extra option, for Jeffrey Friedl's debugging requirements,
1360 replaces the matched string, or a specific captured string if it exists,
1361 with X. When this happens, colouring is ignored. */
1362
1363 #ifdef JFRIEDL_DEBUG
1364 if (S_arg >= 0 && S_arg < mrc)
1365 {
1366 int first = S_arg * 2;
1367 int last = first + 1;
1368 FWRITE(ptr, 1, offsets[first], stdout);
1369 fprintf(stdout, "X");
1370 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1371 }
1372 else
1373 #endif
1374
1375 /* We have to split the line(s) up if colouring, and search for further
1376 matches, but not of course if the line is a non-match. */
1377
1378 if (do_colour && !invert)
1379 {
1380 int plength;
1381 FWRITE(ptr, 1, offsets[0], stdout);
1382 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1383 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1384 fprintf(stdout, "%c[00m", 0x1b);
1385 for (;;)
1386 {
1387 startoffset = offsets[1];
1388 if (startoffset >= linelength + endlinelength ||
1389 !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1390 break;
1391 FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1392 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1393 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1394 fprintf(stdout, "%c[00m", 0x1b);
1395 }
1396
1397 /* In multiline mode, we may have already printed the complete line
1398 and its line-ending characters (if they matched the pattern), so there
1399 may be no more to print. */
1400
1401 plength = (linelength + endlinelength) - startoffset;
1402 if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1403 }
1404
1405 /* Not colouring; no need to search for further matches */
1406
1407 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1408 }
1409
1410 /* End of doing what has to be done for a match. If --line-buffered was
1411 given, flush the output. */
1412
1413 if (line_buffered) fflush(stdout);
1414 rc = 0; /* Had some success */
1415
1416 /* Remember where the last match happened for after_context. We remember
1417 where we are about to restart, and that line's number. */
1418
1419 lastmatchrestart = ptr + linelength + endlinelength;
1420 lastmatchnumber = linenumber + 1;
1421 }
1422
1423 /* For a match in multiline inverted mode (which of course did not cause
1424 anything to be printed), we have to move on to the end of the match before
1425 proceeding. */
1426
1427 if (multiline && invert && match)
1428 {
1429 int ellength;
1430 char *endmatch = ptr + offsets[1];
1431 t = ptr;
1432 while (t < endmatch)
1433 {
1434 t = end_of_line(t, endptr, &ellength);
1435 if (t <= endmatch) linenumber++; else break;
1436 }
1437 endmatch = end_of_line(endmatch, endptr, &ellength);
1438 linelength = endmatch - ptr - ellength;
1439 }
1440
1441 /* Advance to after the newline and increment the line number. The file
1442 offset to the current line is maintained in filepos. */
1443
1444 ptr += linelength + endlinelength;
1445 filepos += (int)(linelength + endlinelength);
1446 linenumber++;
1447
1448 /* If input is line buffered, and the buffer is not yet full, read another
1449 line and add it into the buffer. */
1450
1451 if (input_line_buffered && bufflength < bufsize)
1452 {
1453 int add = read_one_line(ptr, bufsize - (ptr - main_buffer), in);
1454 bufflength += add;
1455 endptr += add;
1456 }
1457
1458 /* If we haven't yet reached the end of the file (the buffer is full), and
1459 the current point is in the top 1/3 of the buffer, slide the buffer down by
1460 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1461 about to be lost, print them. */
1462
1463 if (bufflength >= bufsize && ptr > main_buffer + 2*bufthird)
1464 {
1465 if (after_context > 0 &&
1466 lastmatchnumber > 0 &&
1467 lastmatchrestart < main_buffer + bufthird)
1468 {
1469 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1470 lastmatchnumber = 0;
1471 }
1472
1473 /* Now do the shuffle */
1474
1475 memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1476 ptr -= bufthird;
1477
1478 #ifdef SUPPORT_LIBZ
1479 if (frtype == FR_LIBZ)
1480 bufflength = 2*bufthird +
1481 gzread (ingz, main_buffer + 2*bufthird, bufthird);
1482 else
1483 #endif
1484
1485 #ifdef SUPPORT_LIBBZ2
1486 if (frtype == FR_LIBBZ2)
1487 bufflength = 2*bufthird +
1488 BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1489 else
1490 #endif
1491
1492 bufflength = 2*bufthird +
1493 (input_line_buffered?
1494 read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1495 fread(main_buffer + 2*bufthird, 1, bufthird, in));
1496 endptr = main_buffer + bufflength;
1497
1498 /* Adjust any last match point */
1499
1500 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1501 }
1502 } /* Loop through the whole file */
1503
1504 /* End of file; print final "after" lines if wanted; do_after_lines sets
1505 hyphenpending if it prints something. */
1506
1507 if (only_matching < 0 && !count_only)
1508 {
1509 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1510 hyphenpending |= endhyphenpending;
1511 }
1512
1513 /* Print the file name if we are looking for those without matches and there
1514 were none. If we found a match, we won't have got this far. */
1515
1516 if (filenames == FN_NOMATCH_ONLY)
1517 {
1518 fprintf(stdout, "%s\n", printname);
1519 return 0;
1520 }
1521
1522 /* Print the match count if wanted */
1523
1524 if (count_only)
1525 {
1526 if (count > 0 || !omit_zero_count)
1527 {
1528 if (printname != NULL && filenames != FN_NONE)
1529 fprintf(stdout, "%s:", printname);
1530 fprintf(stdout, "%d\n", count);
1531 }
1532 }
1533
1534 return rc;
1535 }
1536
1537
1538
1539 /*************************************************
1540 * Grep a file or recurse into a directory *
1541 *************************************************/
1542
1543 /* Given a path name, if it's a directory, scan all the files if we are
1544 recursing; if it's a file, grep it.
1545
1546 Arguments:
1547 pathname the path to investigate
1548 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1549 only_one_at_top TRUE if the path is the only one at toplevel
1550
1551 Returns: 0 if there was at least one match
1552 1 if there were no matches
1553 2 there was some kind of error
1554
1555 However, file opening failures are suppressed if "silent" is set.
1556 */
1557
1558 static int
1559 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1560 {
1561 int rc = 1;
1562 int sep;
1563 int frtype;
1564 int pathlen;
1565 void *handle;
1566 FILE *in = NULL; /* Ensure initialized */
1567
1568 #ifdef SUPPORT_LIBZ
1569 gzFile ingz = NULL;
1570 #endif
1571
1572 #ifdef SUPPORT_LIBBZ2
1573 BZFILE *inbz2 = NULL;
1574 #endif
1575
1576 /* If the file name is "-" we scan stdin */
1577
1578 if (strcmp(pathname, "-") == 0)
1579 {
1580 return pcregrep(stdin, FR_PLAIN, stdin_name,
1581 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1582 stdin_name : NULL);
1583 }
1584
1585 /* If the file is a directory, skip if skipping or if we are recursing, scan
1586 each file and directory within it, subject to any include or exclude patterns
1587 that were set. The scanning code is localized so it can be made
1588 system-specific. */
1589
1590 if ((sep = isdirectory(pathname)) != 0)
1591 {
1592 if (dee_action == dee_SKIP) return 1;
1593 if (dee_action == dee_RECURSE)
1594 {
1595 char buffer[1024];
1596 char *nextfile;
1597 directory_type *dir = opendirectory(pathname);
1598
1599 if (dir == NULL)
1600 {
1601 if (!silent)
1602 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1603 strerror(errno));
1604 return 2;
1605 }
1606
1607 while ((nextfile = readdirectory(dir)) != NULL)
1608 {
1609 int frc, nflen;
1610 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1611 nflen = (int)(strlen(nextfile));
1612
1613 if (isdirectory(buffer))
1614 {
1615 if (exclude_dir_compiled != NULL &&
1616 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1617 continue;
1618
1619 if (include_dir_compiled != NULL &&
1620 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1621 continue;
1622 }
1623 else
1624 {
1625 if (exclude_compiled != NULL &&
1626 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1627 continue;
1628
1629 if (include_compiled != NULL &&
1630 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1631 continue;
1632 }
1633
1634 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1635 if (frc > 1) rc = frc;
1636 else if (frc == 0 && rc == 1) rc = 0;
1637 }
1638
1639 closedirectory(dir);
1640 return rc;
1641 }
1642 }
1643
1644 /* If the file is not a directory and not a regular file, skip it if that's
1645 been requested. */
1646
1647 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1648
1649 /* Control reaches here if we have a regular file, or if we have a directory
1650 and recursion or skipping was not requested, or if we have anything else and
1651 skipping was not requested. The scan proceeds. If this is the first and only
1652 argument at top level, we don't show the file name, unless we are only showing
1653 the file name, or the filename was forced (-H). */
1654
1655 pathlen = (int)(strlen(pathname));
1656
1657 /* Open using zlib if it is supported and the file name ends with .gz. */
1658
1659 #ifdef SUPPORT_LIBZ
1660 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1661 {
1662 ingz = gzopen(pathname, "rb");
1663 if (ingz == NULL)
1664 {
1665 if (!silent)
1666 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1667 strerror(errno));
1668 return 2;
1669 }
1670 handle = (void *)ingz;
1671 frtype = FR_LIBZ;
1672 }
1673 else
1674 #endif
1675
1676 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1677
1678 #ifdef SUPPORT_LIBBZ2
1679 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1680 {
1681 inbz2 = BZ2_bzopen(pathname, "rb");
1682 handle = (void *)inbz2;
1683 frtype = FR_LIBBZ2;
1684 }
1685 else
1686 #endif
1687
1688 /* Otherwise use plain fopen(). The label is so that we can come back here if
1689 an attempt to read a .bz2 file indicates that it really is a plain file. */
1690
1691 #ifdef SUPPORT_LIBBZ2
1692 PLAIN_FILE:
1693 #endif
1694 {
1695 in = fopen(pathname, "rb");
1696 handle = (void *)in;
1697 frtype = FR_PLAIN;
1698 }
1699
1700 /* All the opening methods return errno when they fail. */
1701
1702 if (handle == NULL)
1703 {
1704 if (!silent)
1705 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1706 strerror(errno));
1707 return 2;
1708 }
1709
1710 /* Now grep the file */
1711
1712 rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1713 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1714
1715 /* Close in an appropriate manner. */
1716
1717 #ifdef SUPPORT_LIBZ
1718 if (frtype == FR_LIBZ)
1719 gzclose(ingz);
1720 else
1721 #endif
1722
1723 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1724 read failed. If the error indicates that the file isn't in fact bzipped, try
1725 again as a normal file. */
1726
1727 #ifdef SUPPORT_LIBBZ2
1728 if (frtype == FR_LIBBZ2)
1729 {
1730 if (rc == 3)
1731 {
1732 int errnum;
1733 const char *err = BZ2_bzerror(inbz2, &errnum);
1734 if (errnum == BZ_DATA_ERROR_MAGIC)
1735 {
1736 BZ2_bzclose(inbz2);
1737 goto PLAIN_FILE;
1738 }
1739 else if (!silent)
1740 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1741 pathname, err);
1742 rc = 2; /* The normal "something went wrong" code */
1743 }
1744 BZ2_bzclose(inbz2);
1745 }
1746 else
1747 #endif
1748
1749 /* Normal file close */
1750
1751 fclose(in);
1752
1753 /* Pass back the yield from pcregrep(). */
1754
1755 return rc;
1756 }
1757
1758
1759
1760
1761 /*************************************************
1762 * Usage function *
1763 *************************************************/
1764
1765 static int
1766 usage(int rc)
1767 {
1768 option_item *op;
1769 fprintf(stderr, "Usage: pcregrep [-");
1770 for (op = optionlist; op->one_char != 0; op++)
1771 {
1772 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1773 }
1774 fprintf(stderr, "] [long options] [pattern] [files]\n");
1775 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1776 "options.\n");
1777 return rc;
1778 }
1779
1780
1781
1782
1783 /*************************************************
1784 * Help function *
1785 *************************************************/
1786
1787 static void
1788 help(void)
1789 {
1790 option_item *op;
1791
1792 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1793 printf("Search for PATTERN in each FILE or standard input.\n");
1794 printf("PATTERN must be present if neither -e nor -f is used.\n");
1795 printf("\"-\" can be used as a file name to mean STDIN.\n");
1796
1797 #ifdef SUPPORT_LIBZ
1798 printf("Files whose names end in .gz are read using zlib.\n");
1799 #endif
1800
1801 #ifdef SUPPORT_LIBBZ2
1802 printf("Files whose names end in .bz2 are read using bzlib2.\n");
1803 #endif
1804
1805 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1806 printf("Other files and the standard input are read as plain files.\n\n");
1807 #else
1808 printf("All files are read as plain files, without any interpretation.\n\n");
1809 #endif
1810
1811 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1812 printf("Options:\n");
1813
1814 for (op = optionlist; op->one_char != 0; op++)
1815 {
1816 int n;
1817 char s[4];
1818
1819 /* Two options were accidentally implemented and documented with underscores
1820 instead of hyphens in their names, something that was not noticed for quite a
1821 few releases. When fixing this, I left the underscored versions in the list
1822 in case people were using them. However, we don't want to display them in the
1823 help data. There are no other options that contain underscores, and we do not
1824 expect ever to implement such options. Therefore, just omit any option that
1825 contains an underscore. */
1826
1827 if (strchr(op->long_name, '_') != NULL) continue;
1828
1829 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1830 n = 31 - printf(" %s --%s", s, op->long_name);
1831 if (n < 1) n = 1;
1832 printf("%.*s%s\n", n, " ", op->help_text);
1833 }
1834
1835 printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
1836 printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
1837 printf("When reading patterns from a file instead of using a command line option,\n");
1838 printf("trailing white space is removed and blank lines are ignored.\n");
1839 printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",
1840 MAX_PATTERN_COUNT, PATBUFSIZE);
1841
1842 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1843 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1844 }
1845
1846
1847
1848
1849 /*************************************************
1850 * Handle a single-letter, no data option *
1851 *************************************************/
1852
1853 static int
1854 handle_option(int letter, int options)
1855 {
1856 switch(letter)
1857 {
1858 case N_FOFFSETS: file_offsets = TRUE; break;
1859 case N_HELP: help(); pcregrep_exit(0);
1860 case N_LOFFSETS: line_offsets = number = TRUE; break;
1861 case N_LBUFFER: line_buffered = TRUE; break;
1862 case 'c': count_only = TRUE; break;
1863 case 'F': process_options |= PO_FIXED_STRINGS; break;
1864 case 'H': filenames = FN_FORCE; break;
1865 case 'h': filenames = FN_NONE; break;
1866 case 'i': options |= PCRE_CASELESS; break;
1867 case 'j': study_options |= PCRE_STUDY_JIT_COMPILE; break;
1868 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1869 case 'L': filenames = FN_NOMATCH_ONLY; break;
1870 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1871 case 'n': number = TRUE; break;
1872 case 'o': only_matching = 0; break;
1873 case 'q': quiet = TRUE; break;
1874 case 'r': dee_action = dee_RECURSE; break;
1875 case 's': silent = TRUE; break;
1876 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1877 case 'v': invert = TRUE; break;
1878 case 'w': process_options |= PO_WORD_MATCH; break;
1879 case 'x': process_options |= PO_LINE_MATCH; break;
1880
1881 case 'V':
1882 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1883 pcregrep_exit(0);
1884 break;
1885
1886 default:
1887 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1888 pcregrep_exit(usage(2));
1889 }
1890
1891 return options;
1892 }
1893
1894
1895
1896
1897 /*************************************************
1898 * Construct printed ordinal *
1899 *************************************************/
1900
1901 /* This turns a number into "1st", "3rd", etc. */
1902
1903 static char *
1904 ordin(int n)
1905 {
1906 static char buffer[8];
1907 char *p = buffer;
1908 sprintf(p, "%d", n);
1909 while (*p != 0) p++;
1910 switch (n%10)
1911 {
1912 case 1: strcpy(p, "st"); break;
1913 case 2: strcpy(p, "nd"); break;
1914 case 3: strcpy(p, "rd"); break;
1915 default: strcpy(p, "th"); break;
1916 }
1917 return buffer;
1918 }
1919
1920
1921
1922 /*************************************************
1923 * Compile a single pattern *
1924 *************************************************/
1925
1926 /* When the -F option has been used, this is called for each substring.
1927 Otherwise it's called for each supplied pattern.
1928
1929 Arguments:
1930 pattern the pattern string
1931 options the PCRE options
1932 filename the file name, or NULL for a command-line pattern
1933 count 0 if this is the only command line pattern, or
1934 number of the command line pattern, or
1935 linenumber for a pattern from a file
1936
1937 Returns: TRUE on success, FALSE after an error
1938 */
1939
1940 static BOOL
1941 compile_single_pattern(char *pattern, int options, char *filename, int count)
1942 {
1943 char buffer[PATBUFSIZE];
1944 const char *error;
1945 int errptr;
1946
1947 if (pattern_count >= MAX_PATTERN_COUNT)
1948 {
1949 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1950 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1951 return FALSE;
1952 }
1953
1954 sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,
1955 suffix[process_options]);
1956 pattern_list[pattern_count] =
1957 pcre_compile(buffer, options, &error, &errptr, pcretables);
1958 if (pattern_list[pattern_count] != NULL)
1959 {
1960 pattern_count++;
1961 return TRUE;
1962 }
1963
1964 /* Handle compile errors */
1965
1966 errptr -= (int)strlen(prefix[process_options]);
1967 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1968
1969 if (filename == NULL)
1970 {
1971 if (count == 0)
1972 fprintf(stderr, "pcregrep: Error in command-line regex "
1973 "at offset %d: %s\n", errptr, error);
1974 else
1975 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1976 "at offset %d: %s\n", ordin(count), errptr, error);
1977 }
1978 else
1979 {
1980 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1981 "at offset %d: %s\n", count, filename, errptr, error);
1982 }
1983
1984 return FALSE;
1985 }
1986
1987
1988
1989 /*************************************************
1990 * Compile one supplied pattern *
1991 *************************************************/
1992
1993 /* When the -F option has been used, each string may be a list of strings,
1994 separated by line breaks. They will be matched literally.
1995
1996 Arguments:
1997 pattern the pattern string
1998 options the PCRE options
1999 filename the file name, or NULL for a command-line pattern
2000 count 0 if this is the only command line pattern, or
2001 number of the command line pattern, or
2002 linenumber for a pattern from a file
2003
2004 Returns: TRUE on success, FALSE after an error
2005 */
2006
2007 static BOOL
2008 compile_pattern(char *pattern, int options, char *filename, int count)
2009 {
2010 if ((process_options & PO_FIXED_STRINGS) != 0)
2011 {
2012 char *eop = pattern + strlen(pattern);
2013 char buffer[PATBUFSIZE];
2014 for(;;)
2015 {
2016 int ellength;
2017 char *p = end_of_line(pattern, eop, &ellength);
2018 if (ellength == 0)
2019 return compile_single_pattern(pattern, options, filename, count);
2020 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
2021 pattern = p;
2022 if (!compile_single_pattern(buffer, options, filename, count))
2023 return FALSE;
2024 }
2025 }
2026 else return compile_single_pattern(pattern, options, filename, count);
2027 }
2028
2029
2030
2031 /*************************************************
2032 * Main program *
2033 *************************************************/
2034
2035 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2036
2037 int
2038 main(int argc, char **argv)
2039 {
2040 int i, j;
2041 int rc = 1;
2042 int pcre_options = 0;
2043 int cmd_pattern_count = 0;
2044 int hint_count = 0;
2045 int errptr;
2046 BOOL only_one_at_top;
2047 char *patterns[MAX_PATTERN_COUNT];
2048 const char *locale_from = "--locale";
2049 const char *error;
2050
2051 /* Set the default line ending value from the default in the PCRE library;
2052 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2053 Note that the return values from pcre_config(), though derived from the ASCII
2054 codes, are the same in EBCDIC environments, so we must use the actual values
2055 rather than escapes such as as '\r'. */
2056
2057 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2058 switch(i)
2059 {
2060 default: newline = (char *)"lf"; break;
2061 case 13: newline = (char *)"cr"; break;
2062 case (13 << 8) | 10: newline = (char *)"crlf"; break;
2063 case -1: newline = (char *)"any"; break;
2064 case -2: newline = (char *)"anycrlf"; break;
2065 }
2066
2067 /* Process the options */
2068
2069 for (i = 1; i < argc; i++)
2070 {
2071 option_item *op = NULL;
2072 char *option_data = (char *)""; /* default to keep compiler happy */
2073 BOOL longop;
2074 BOOL longopwasequals = FALSE;
2075
2076 if (argv[i][0] != '-') break;
2077
2078 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2079 but only if we have previously had -e or -f to define the patterns. */
2080
2081 if (argv[i][1] == 0)
2082 {
2083 if (pattern_filename != NULL || pattern_count > 0) break;
2084 else pcregrep_exit(usage(2));
2085 }
2086
2087 /* Handle a long name option, or -- to terminate the options */
2088
2089 if (argv[i][1] == '-')
2090 {
2091 char *arg = argv[i] + 2;
2092 char *argequals = strchr(arg, '=');
2093
2094 if (*arg == 0) /* -- terminates options */
2095 {
2096 i++;
2097 break; /* out of the options-handling loop */
2098 }
2099
2100 longop = TRUE;
2101
2102 /* Some long options have data that follows after =, for example file=name.
2103 Some options have variations in the long name spelling: specifically, we
2104 allow "regexp" because GNU grep allows it, though I personally go along
2105 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2106 These options are entered in the table as "regex(p)". Options can be in
2107 both these categories. */
2108
2109 for (op = optionlist; op->one_char != 0; op++)
2110 {
2111 char *opbra = strchr(op->long_name, '(');
2112 char *equals = strchr(op->long_name, '=');
2113
2114 /* Handle options with only one spelling of the name */
2115
2116 if (opbra == NULL) /* Does not contain '(' */
2117 {
2118 if (equals == NULL) /* Not thing=data case */
2119 {
2120 if (strcmp(arg, op->long_name) == 0) break;
2121 }
2122 else /* Special case xxx=data */
2123 {
2124 int oplen = (int)(equals - op->long_name);
2125 int arglen = (argequals == NULL)?
2126 (int)strlen(arg) : (int)(argequals - arg);
2127 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2128 {
2129 option_data = arg + arglen;
2130 if (*option_data == '=')
2131 {
2132 option_data++;
2133 longopwasequals = TRUE;
2134 }
2135 break;
2136 }
2137 }
2138 }
2139
2140 /* Handle options with an alternate spelling of the name */
2141
2142 else
2143 {
2144 char buff1[24];
2145 char buff2[24];
2146
2147 int baselen = (int)(opbra - op->long_name);
2148 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2149 int arglen = (argequals == NULL || equals == NULL)?
2150 (int)strlen(arg) : (int)(argequals - arg);
2151
2152 sprintf(buff1, "%.*s", baselen, op->long_name);
2153 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2154
2155 if (strncmp(arg, buff1, arglen) == 0 ||
2156 strncmp(arg, buff2, arglen) == 0)
2157 {
2158 if (equals != NULL && argequals != NULL)
2159 {
2160 option_data = argequals;
2161 if (*option_data == '=')
2162 {
2163 option_data++;
2164 longopwasequals = TRUE;
2165 }
2166 }
2167 break;
2168 }
2169 }
2170 }
2171
2172 if (op->one_char == 0)
2173 {
2174 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2175 pcregrep_exit(usage(2));
2176 }
2177 }
2178
2179 /* Jeffrey Friedl's debugging harness uses these additional options which
2180 are not in the right form for putting in the option table because they use
2181 only one hyphen, yet are more than one character long. By putting them
2182 separately here, they will not get displayed as part of the help() output,
2183 but I don't think Jeffrey will care about that. */
2184
2185 #ifdef JFRIEDL_DEBUG
2186 else if (strcmp(argv[i], "-pre") == 0) {
2187 jfriedl_prefix = argv[++i];
2188 continue;
2189 } else if (strcmp(argv[i], "-post") == 0) {
2190 jfriedl_postfix = argv[++i];
2191 continue;
2192 } else if (strcmp(argv[i], "-XT") == 0) {
2193 sscanf(argv[++i], "%d", &jfriedl_XT);
2194 continue;
2195 } else if (strcmp(argv[i], "-XR") == 0) {
2196 sscanf(argv[++i], "%d", &jfriedl_XR);
2197 continue;
2198 }
2199 #endif
2200
2201
2202 /* One-char options; many that have no data may be in a single argument; we
2203 continue till we hit the last one or one that needs data. */
2204
2205 else
2206 {
2207 char *s = argv[i] + 1;
2208 longop = FALSE;
2209 while (*s != 0)
2210 {
2211 for (op = optionlist; op->one_char != 0; op++)
2212 {
2213 if (*s == op->one_char) break;
2214 }
2215 if (op->one_char == 0)
2216 {
2217 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2218 *s, argv[i]);
2219 pcregrep_exit(usage(2));
2220 }
2221
2222 /* Check for a single-character option that has data: OP_OP_NUMBER
2223 is used for one that either has a numerical number or defaults, i.e. the
2224 data is optional. If a digit follows, there is data; if not, carry on
2225 with other single-character options in the same string. */
2226
2227 option_data = s+1;
2228 if (op->type == OP_OP_NUMBER)
2229 {
2230 if (isdigit((unsigned char)s[1])) break;
2231 }
2232 else /* Check for end or a dataless option */
2233 {
2234 if (op->type != OP_NODATA || s[1] == 0) break;
2235 }
2236
2237 /* Handle a single-character option with no data, then loop for the
2238 next character in the string. */
2239
2240 pcre_options = handle_option(*s++, pcre_options);
2241 }
2242 }
2243
2244 /* At this point we should have op pointing to a matched option. If the type
2245 is NO_DATA, it means that there is no data, and the option might set
2246 something in the PCRE options. */
2247
2248 if (op->type == OP_NODATA)
2249 {
2250 pcre_options = handle_option(op->one_char, pcre_options);
2251 continue;
2252 }
2253
2254 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2255 either has a value or defaults to something. It cannot have data in a
2256 separate item. At the moment, the only such options are "colo(u)r",
2257 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2258
2259 if (*option_data == 0 &&
2260 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2261 {
2262 switch (op->one_char)
2263 {
2264 case N_COLOUR:
2265 colour_option = (char *)"auto";
2266 break;
2267
2268 case 'o':
2269 only_matching = 0;
2270 break;
2271
2272 #ifdef JFRIEDL_DEBUG
2273 case 'S':
2274 S_arg = 0;
2275 break;
2276 #endif
2277 }
2278 continue;
2279 }
2280
2281 /* Otherwise, find the data string for the option. */
2282
2283 if (*option_data == 0)
2284 {
2285 if (i >= argc - 1 || longopwasequals)
2286 {
2287 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2288 pcregrep_exit(usage(2));
2289 }
2290 option_data = argv[++i];
2291 }
2292
2293 /* If the option type is OP_PATLIST, it's the -e option, which can be called
2294 multiple times to create a list of patterns. */
2295
2296 if (op->type == OP_PATLIST)
2297 {
2298 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2299 {
2300 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2301 MAX_PATTERN_COUNT);
2302 return 2;
2303 }
2304 patterns[cmd_pattern_count++] = option_data;
2305 }
2306
2307 /* Otherwise, deal with single string or numeric data values. */
2308
2309 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2310 op->type != OP_OP_NUMBER)
2311 {
2312 *((char **)op->dataptr) = option_data;
2313 }
2314
2315 /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2316 only for unpicking arguments, so just keep it simple. */
2317
2318 else
2319 {
2320 unsigned long int n = 0;
2321 char *endptr = option_data;
2322 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2323 while (isdigit((unsigned char)(*endptr)))
2324 n = n * 10 + (int)(*endptr++ - '0');
2325 if (toupper(*endptr) == 'K')
2326 {
2327 n *= 1024;
2328 endptr++;
2329 }
2330 else if (toupper(*endptr) == 'M')
2331 {
2332 n *= 1024*1024;
2333 endptr++;
2334 }
2335 if (*endptr != 0)
2336 {
2337 if (longop)
2338 {
2339 char *equals = strchr(op->long_name, '=');
2340 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2341 (int)(equals - op->long_name);
2342 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2343 option_data, nlen, op->long_name);
2344 }
2345 else
2346 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2347 option_data, op->one_char);
2348 pcregrep_exit(usage(2));
2349 }
2350 if (op->type == OP_LONGNUMBER)
2351 *((unsigned long int *)op->dataptr) = n;
2352 else
2353 *((int *)op->dataptr) = n;
2354 }
2355 }
2356
2357 /* Options have been decoded. If -C was used, its value is used as a default
2358 for -A and -B. */
2359
2360 if (both_context > 0)
2361 {
2362 if (after_context == 0) after_context = both_context;
2363 if (before_context == 0) before_context = both_context;
2364 }
2365
2366 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2367 However, the latter two set only_matching. */
2368
2369 if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2370 (file_offsets && line_offsets))
2371 {
2372 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2373 "and/or --line-offsets\n");
2374 pcregrep_exit(usage(2));
2375 }
2376
2377 if (file_offsets || line_offsets) only_matching = 0;
2378
2379 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2380 LC_ALL environment variable is set, and if so, use it. */
2381
2382 if (locale == NULL)
2383 {
2384 locale = getenv("LC_ALL");
2385 locale_from = "LCC_ALL";
2386 }
2387
2388 if (locale == NULL)
2389 {
2390 locale = getenv("LC_CTYPE");
2391 locale_from = "LC_CTYPE";
2392 }
2393
2394 /* If a locale has been provided, set it, and generate the tables the PCRE
2395 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2396
2397 if (locale != NULL)
2398 {
2399 if (setlocale(LC_CTYPE, locale) == NULL)
2400 {
2401 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2402 locale, locale_from);
2403 return 2;
2404 }
2405 pcretables = pcre_maketables();
2406 }
2407
2408 /* Sort out colouring */
2409
2410 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2411 {
2412 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2413 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2414 else
2415 {
2416 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2417 colour_option);
2418 return 2;
2419 }
2420 if (do_colour)
2421 {
2422 char *cs = getenv("PCREGREP_COLOUR");
2423 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2424 if (cs != NULL) colour_string = cs;
2425 }
2426 }
2427
2428 /* Interpret the newline type; the default settings are Unix-like. */
2429
2430 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2431 {
2432 pcre_options |= PCRE_NEWLINE_CR;
2433 endlinetype = EL_CR;
2434 }
2435 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2436 {
2437 pcre_options |= PCRE_NEWLINE_LF;
2438 endlinetype = EL_LF;
2439 }
2440 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2441 {
2442 pcre_options |= PCRE_NEWLINE_CRLF;
2443 endlinetype = EL_CRLF;
2444 }
2445 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2446 {
2447 pcre_options |= PCRE_NEWLINE_ANY;
2448 endlinetype = EL_ANY;
2449 }
2450 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2451 {
2452 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2453 endlinetype = EL_ANYCRLF;
2454 }
2455 else
2456 {
2457 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2458 return 2;
2459 }
2460
2461 /* Interpret the text values for -d and -D */
2462
2463 if (dee_option != NULL)
2464 {
2465 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2466 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2467 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2468 else
2469 {
2470 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2471 return 2;
2472 }
2473 }
2474
2475 if (DEE_option != NULL)
2476 {
2477 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2478 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2479 else
2480 {
2481 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2482 return 2;
2483 }
2484 }
2485
2486 /* Check the values for Jeffrey Friedl's debugging options. */
2487
2488 #ifdef JFRIEDL_DEBUG
2489 if (S_arg > 9)
2490 {
2491 fprintf(stderr, "pcregrep: bad value for -S option\n");
2492 return 2;
2493 }
2494 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2495 {
2496 if (jfriedl_XT == 0) jfriedl_XT = 1;
2497 if (jfriedl_XR == 0) jfriedl_XR = 1;
2498 }
2499 #endif
2500
2501 /* Get memory for the main buffer, and to store the pattern and hints lists. */
2502
2503 bufsize = 3*bufthird;
2504 main_buffer = (char *)malloc(bufsize);
2505 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2506 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2507
2508 if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)
2509 {
2510 fprintf(stderr, "pcregrep: malloc failed\n");
2511 goto EXIT2;
2512 }
2513
2514 /* If no patterns were provided by -e, and there is no file provided by -f,
2515 the first argument is the one and only pattern, and it must exist. */
2516
2517 if (cmd_pattern_count == 0 && pattern_filename == NULL)
2518 {
2519 if (i >= argc) return usage(2);
2520 patterns[cmd_pattern_count++] = argv[i++];
2521 }
2522
2523 /* Compile the patterns that were provided on the command line, either by
2524 multiple uses of -e or as a single unkeyed pattern. */
2525
2526 for (j = 0; j < cmd_pattern_count; j++)
2527 {
2528 if (!compile_pattern(patterns[j], pcre_options, NULL,
2529 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2530 goto EXIT2;
2531 }
2532
2533 /* Compile the regular expressions that are provided in a file. */
2534
2535 if (pattern_filename != NULL)
2536 {
2537 int linenumber = 0;
2538 FILE *f;
2539 char *filename;
2540 char buffer[PATBUFSIZE];
2541
2542 if (strcmp(pattern_filename, "-") == 0)
2543 {
2544 f = stdin;
2545 filename = stdin_name;
2546 }
2547 else
2548 {
2549 f = fopen(pattern_filename, "r");
2550 if (f == NULL)
2551 {
2552 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2553 strerror(errno));
2554 goto EXIT2;
2555 }
2556 filename = pattern_filename;
2557 }
2558
2559 while (fgets(buffer, PATBUFSIZE, f) != NULL)
2560 {
2561 char *s = buffer + (int)strlen(buffer);
2562 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2563 *s = 0;
2564 linenumber++;
2565 if (buffer[0] == 0) continue; /* Skip blank lines */
2566 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2567 goto EXIT2;
2568 }
2569
2570 if (f != stdin) fclose(f);
2571 }
2572
2573 /* Study the regular expressions, as we will be running them many times */
2574
2575 for (j = 0; j < pattern_count; j++)
2576 {
2577 hints_list[j] = pcre_study(pattern_list[j], study_options, &error);
2578 if (error != NULL)
2579 {
2580 char s[16];
2581 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2582 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2583 goto EXIT2;
2584 }
2585 hint_count++;
2586 }
2587
2588 /* If --match-limit or --recursion-limit was set, put the value(s) into the
2589 pcre_extra block for each pattern. */
2590
2591 if (match_limit > 0 || match_limit_recursion > 0)
2592 {
2593 for (j = 0; j < pattern_count; j++)
2594 {
2595 if (hints_list[j] == NULL)
2596 {
2597 hints_list[j] = malloc(sizeof(pcre_extra));
2598 if (hints_list[j] == NULL)
2599 {
2600 fprintf(stderr, "pcregrep: malloc failed\n");
2601 pcregrep_exit(2);
2602 }
2603 }
2604 if (match_limit > 0)
2605 {
2606 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2607 hints_list[j]->match_limit = match_limit;
2608 }
2609 if (match_limit_recursion > 0)
2610 {
2611 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2612 hints_list[j]->match_limit_recursion = match_limit_recursion;
2613 }
2614 }
2615 }
2616
2617 /* If there are include or exclude patterns, compile them. */
2618
2619 if (exclude_pattern != NULL)
2620 {
2621 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2622 pcretables);
2623 if (exclude_compiled == NULL)
2624 {
2625 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2626 errptr, error);
2627 goto EXIT2;
2628 }
2629 }
2630
2631 if (include_pattern != NULL)
2632 {
2633 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2634 pcretables);
2635 if (include_compiled == NULL)
2636 {
2637 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2638 errptr, error);
2639 goto EXIT2;
2640 }
2641 }
2642
2643 if (exclude_dir_pattern != NULL)
2644 {
2645 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2646 pcretables);
2647 if (exclude_dir_compiled == NULL)
2648 {
2649 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2650 errptr, error);
2651 goto EXIT2;
2652 }
2653 }
2654
2655 if (include_dir_pattern != NULL)
2656 {
2657 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2658 pcretables);
2659 if (include_dir_compiled == NULL)
2660 {
2661 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2662 errptr, error);
2663 goto EXIT2;
2664 }
2665 }
2666
2667 /* If there are no further arguments, do the business on stdin and exit. */
2668
2669 if (i >= argc)
2670 {
2671 rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2672 (filenames > FN_DEFAULT)? stdin_name : NULL);
2673 goto EXIT;
2674 }
2675
2676 /* Otherwise, work through the remaining arguments as files or directories.
2677 Pass in the fact that there is only one argument at top level - this suppresses
2678 the file name if the argument is not a directory and filenames are not
2679 otherwise forced. */
2680
2681 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2682
2683 for (; i < argc; i++)
2684 {
2685 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2686 only_one_at_top);
2687 if (frc > 1) rc = frc;
2688 else if (frc == 0 && rc == 1) rc = 0;
2689 }
2690
2691 EXIT:
2692 if (main_buffer != NULL) free(main_buffer);
2693 if (pattern_list != NULL)
2694 {
2695 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2696 free(pattern_list);
2697 }
2698 if (hints_list != NULL)
2699 {
2700 for (i = 0; i < hint_count; i++)
2701 {
2702 if (hints_list[i] != NULL) pcre_free_study(hints_list[i]);
2703 }
2704 free(hints_list);
2705 }
2706 pcregrep_exit(rc);
2707
2708 EXIT2:
2709 rc = 2;
2710 goto EXIT;
2711 }
2712
2713 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5