/[pcre]/code/tags/pcre-4.0/pcregrep.c
ViewVC logotype

Diff of /code/tags/pcre-4.0/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcregrep.c revision 49 by nigel, Sat Feb 24 21:39:33 2007 UTC code/tags/pcre-4.0/pcregrep.c revision 64 by nigel, Sat Feb 24 21:40:05 2007 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories. */
8    
9    #include <ctype.h>
10  #include <stdio.h>  #include <stdio.h>
11  #include <string.h>  #include <string.h>
12  #include <stdlib.h>  #include <stdlib.h>
# Line 17  its pattern matching. */ Line 19  its pattern matching. */
19    
20  typedef int BOOL;  typedef int BOOL;
21    
22    #define VERSION "3.0 14-Jan-2003"
23    #define MAX_PATTERN_COUNT 100
24    
25    
26  /*************************************************  /*************************************************
27  *               Global variables                 *  *               Global variables                 *
28  *************************************************/  *************************************************/
29    
30  static pcre *pattern;  static char *pattern_filename = NULL;
31  static pcre_extra *hints;  static int  pattern_count = 0;
32    static pcre **pattern_list;
33    static pcre_extra **hints_list;
34    
35  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
36    static BOOL filenames = TRUE;
37  static BOOL filenames_only = FALSE;  static BOOL filenames_only = FALSE;
38  static BOOL invert = FALSE;  static BOOL invert = FALSE;
39  static BOOL number = FALSE;  static BOOL number = FALSE;
40    static BOOL recurse = FALSE;
41  static BOOL silent = FALSE;  static BOOL silent = FALSE;
42  static BOOL whole_lines = FALSE;  static BOOL whole_lines = FALSE;
43    
44    /* Structure for options and list of them */
45    
46    typedef struct option_item {
47      int one_char;
48      char *long_name;
49      char *help_text;
50    } option_item;
51    
52    static option_item optionlist[] = {
53      { -1,  "help",         "display this help and exit" },
54      { 'c', "count",        "print only a count of matching lines per FILE" },
55      { 'h', "no-filename",  "suppress the prefixing filename on output" },
56      { 'i', "ignore-case",  "ignore case distinctions" },
57      { 'l', "files-with-matches", "print only FILE names containing matches" },
58      { 'n', "line-number",  "print line number with output lines" },
59      { 'r', "recursive",    "recursively scan sub-directories" },
60      { 's', "no-messages",  "suppress error messages" },
61      { 'u', "utf-8",        "use UTF-8 mode" },
62      { 'V', "version",      "print version information and exit" },
63      { 'v', "invert-match", "select non-matching lines" },
64      { 'x', "line-regex",   "force PATTERN to match only whole lines" },
65      { 'x', "line-regexp",  "force PATTERN to match only whole lines" },
66      { 0,    NULL,           NULL }
67    };
68    
69    
70    /*************************************************
71    *       Functions for directory scanning         *
72    *************************************************/
73    
74    /* These functions are defined so that they can be made system specific,
75    although at present the only ones are for Unix, Win32, and for "no directory
76    recursion support". */
77    
78    
79    /************* Directory scanning in Unix ***********/
80    
81    #if IS_UNIX
82    #include <sys/types.h>
83    #include <sys/stat.h>
84    #include <dirent.h>
85    
86    typedef DIR directory_type;
87    
88    int
89    isdirectory(char *filename)
90    {
91    struct stat statbuf;
92    if (stat(filename, &statbuf) < 0)
93      return 0;        /* In the expectation that opening as a file will fail */
94    return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
95    }
96    
97    directory_type *
98    opendirectory(char *filename)
99    {
100    return opendir(filename);
101    }
102    
103    char *
104    readdirectory(directory_type *dir)
105    {
106    for (;;)
107      {
108      struct dirent *dent = readdir(dir);
109      if (dent == NULL) return NULL;
110      if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
111        return dent->d_name;
112      }
113    return NULL;   /* Keep compiler happy; never executed */
114    }
115    
116    void
117    closedirectory(directory_type *dir)
118    {
119    closedir(dir);
120    }
121    
122    
123    /************* Directory scanning in Win32 ***********/
124    
125    /* I (Philip Hazel) have no means of testing this code. It was contributed by
126    Lionel Fourquaux. */
127    
128    
129    #elif HAVE_WIN32API
130    
131    #ifndef STRICT
132    # define STRICT
133    #endif
134    #ifndef WIN32_LEAN_AND_MEAN
135    # define WIN32_LEAN_AND_MEAN
136    #endif
137    #include <windows.h>
138    
139    typedef struct directory_type
140    {
141    HANDLE handle;
142    BOOL first;
143    WIN32_FIND_DATA data;
144    } directory_type;
145    
146    int
147    isdirectory(char *filename)
148    {
149    DWORD attr = GetFileAttributes(filename);
150    if (attr == INVALID_FILE_ATTRIBUTES)
151      return 0;
152    return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
153    }
154    
155    directory_type *
156    opendirectory(char *filename)
157    {
158    size_t len;
159    char *pattern;
160    directory_type *dir;
161    DWORD err;
162    len = strlen(filename);
163    pattern = (char *) malloc(len + 3);
164    dir = (directory_type *) malloc(sizeof(*dir));
165    if ((pattern == NULL) || (dir == NULL))
166      {
167      fprintf(stderr, "pcregrep: malloc failed\n");
168      exit(2);
169      }
170    memcpy(pattern, filename, len);
171    memcpy(&(pattern[len]), "\\*", 3);
172    dir->handle = FindFirstFile(pattern, &(dir->data));
173    if (dir->handle != INVALID_HANDLE_VALUE)
174      {
175      free(pattern);
176      dir->first = TRUE;
177      return dir;
178      }
179    err = GetLastError();
180    free(pattern);
181    free(dir);
182    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
183    return NULL;
184    }
185    
186    char *
187    readdirectory(directory_type *dir)
188    {
189    for (;;)
190      {
191      if (!dir->first)
192        {
193        if (!FindNextFile(dir->handle, &(dir->data)))
194          return NULL;
195        }
196      else
197        {
198        dir->first = FALSE;
199        }
200      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
201        return dir->data.cFileName;
202      }
203    #ifndef _MSC_VER
204    return NULL;   /* Keep compiler happy; never executed */
205    #endif
206    }
207    
208    void
209    closedirectory(directory_type *dir)
210    {
211    FindClose(dir->handle);
212    free(dir);
213    }
214    
215    
216    /************* Directory scanning when we can't do it ***********/
217    
218    /* The type is void, and apart from isdirectory(), the functions do nothing. */
219    
220    #else
221    
222    typedef void directory_type;
223    
224    int isdirectory(char *filename) { return FALSE; }
225    directory_type * opendirectory(char *filename) {}
226    char *readdirectory(directory_type *dir) {}
227    void closedirectory(directory_type *dir) {}
228    
229    #endif
230    
231    
232    
233  #if ! HAVE_STRERROR  #if ! HAVE_STRERROR
# Line 72  char buffer[BUFSIZ]; Line 267  char buffer[BUFSIZ];
267    
268  while (fgets(buffer, sizeof(buffer), in) != NULL)  while (fgets(buffer, sizeof(buffer), in) != NULL)
269    {    {
270    BOOL match;    BOOL match = FALSE;
271      int i;
272    int length = (int)strlen(buffer);    int length = (int)strlen(buffer);
273    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;
274    linenumber++;    linenumber++;
275    
276    match = pcre_exec(pattern, hints, buffer, length, 0, 0, offsets, 99) >= 0;    for (i = 0; !match && i < pattern_count; i++)
277    if (match && whole_lines && offsets[1] != length) match = FALSE;      {
278        match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,
279          offsets, 99) >= 0;
280        if (match && whole_lines && offsets[1] != length) match = FALSE;
281        }
282    
283    if (match != invert)    if (match != invert)
284      {      {
# Line 116  return rc; Line 316  return rc;
316    
317    
318  /*************************************************  /*************************************************
319    *     Grep a file or recurse into a directory    *
320    *************************************************/
321    
322    static int
323    grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames,
324      BOOL only_one_at_top)
325    {
326    int rc = 1;
327    int sep;
328    FILE *in;
329    
330    /* If the file is a directory and we are recursing, scan each file within it.
331    The scanning code is localized so it can be made system-specific. */
332    
333    if ((sep = isdirectory(filename)) != 0 && recurse)
334      {
335      char buffer[1024];
336      char *nextfile;
337      directory_type *dir = opendirectory(filename);
338    
339      if (dir == NULL)
340        {
341        fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,
342          strerror(errno));
343        return 2;
344        }
345    
346      while ((nextfile = readdirectory(dir)) != NULL)
347        {
348        int frc;
349        sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);
350        frc = grep_or_recurse(buffer, recurse, TRUE, FALSE);
351        if (frc == 0 && rc == 1) rc = 0;
352        }
353    
354      closedirectory(dir);
355      return rc;
356      }
357    
358    /* If the file is not a directory, or we are not recursing, scan it. If this is
359    the first and only argument at top level, we don't show the file name (unless
360    we are only showing the file name). Otherwise, control is via the
361    show_filenames variable. */
362    
363    in = fopen(filename, "r");
364    if (in == NULL)
365      {
366      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));
367      return 2;
368      }
369    
370    rc = pcregrep(in, (filenames_only || (show_filenames && !only_one_at_top))?
371      filename : NULL);
372    fclose(in);
373    return rc;
374    }
375    
376    
377    
378    
379    /*************************************************
380  *                Usage function                  *  *                Usage function                  *
381  *************************************************/  *************************************************/
382    
383  static int  static int
384  usage(int rc)  usage(int rc)
385  {  {
386  fprintf(stderr, "Usage: pcregrep [-Vchilnsvx] pattern [file] ...\n");  fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] [pattern] [file1 file2 ...]\n");
387    fprintf(stderr, "Type `pcregrep --help' for more information.\n");
388  return rc;  return rc;
389  }  }
390    
# Line 130  return rc; Line 392  return rc;
392    
393    
394  /*************************************************  /*************************************************
395    *                Help function                   *
396    *************************************************/
397    
398    static void
399    help(void)
400    {
401    option_item *op;
402    
403    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
404    printf("Search for PATTERN in each FILE or standard input.\n");
405    printf("PATTERN must be present if -f is not used.\n");
406    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
407    
408    printf("Options:\n");
409    
410    for (op = optionlist; op->one_char != 0; op++)
411      {
412      int n;
413      char s[4];
414      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
415      printf("  %s --%s%n", s, op->long_name, &n);
416      n = 30 - n;
417      if (n < 1) n = 1;
418      printf("%.*s%s\n", n, "                    ", op->help_text);
419      }
420    
421    printf("\n  -f<filename>  or  --file=<filename>\n");
422    printf("    Read patterns from <filename> instead of using a command line option.\n");
423    printf("    Trailing white space is removed; blanks lines are ignored.\n");
424    printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
425    
426    printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");
427    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
428    }
429    
430    
431    
432    
433    /*************************************************
434    *                Handle an option                *
435    *************************************************/
436    
437    static int
438    handle_option(int letter, int options)
439    {
440    switch(letter)
441      {
442      case -1:  help(); exit(0);
443      case 'c': count_only = TRUE; break;
444      case 'h': filenames = FALSE; break;
445      case 'i': options |= PCRE_CASELESS; break;
446      case 'l': filenames_only = TRUE;
447      case 'n': number = TRUE; break;
448      case 'r': recurse = TRUE; break;
449      case 's': silent = TRUE; break;
450      case 'u': options |= PCRE_UTF8; break;
451      case 'v': invert = TRUE; break;
452      case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;
453    
454      case 'V':
455      fprintf(stderr, "pcregrep version %s using ", VERSION);
456      fprintf(stderr, "PCRE version %s\n", pcre_version());
457      exit(0);
458      break;
459    
460      default:
461      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
462      exit(usage(2));
463      }
464    
465    return options;
466    }
467    
468    
469    
470    
471    /*************************************************
472  *                Main program                    *  *                Main program                    *
473  *************************************************/  *************************************************/
474    
475  int  int
476  main(int argc, char **argv)  main(int argc, char **argv)
477  {  {
478  int i;  int i, j;
479  int rc = 1;  int rc = 1;
480  int options = 0;  int options = 0;
481  int errptr;  int errptr;
482  const char *error;  const char *error;
483  BOOL filenames = TRUE;  BOOL only_one_at_top;
484    
485  /* Process the options */  /* Process the options */
486    
487  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
488    {    {
   char *s;  
489    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
490    s = argv[i] + 1;  
491    while (*s != 0)    /* Missing options */
492    
493      if (argv[i][1] == 0) exit(usage(2));
494    
495      /* Long name options */
496    
497      if (argv[i][1] == '-')
498      {      {
499      switch (*s++)      option_item *op;
500    
501        if (strncmp(argv[i]+2, "file=", 5) == 0)
502        {        {
503        case 'c': count_only = TRUE; break;        pattern_filename = argv[i] + 7;
504        case 'h': filenames = FALSE; break;        continue;
505        case 'i': options |= PCRE_CASELESS; break;        }
       case 'l': filenames_only = TRUE;  
       case 'n': number = TRUE; break;  
       case 's': silent = TRUE; break;  
       case 'v': invert = TRUE; break;  
       case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;  
506    
507        case 'V':      for (op = optionlist; op->one_char != 0; op++)
508        fprintf(stderr, "PCRE version %s\n", pcre_version());        {
509        break;        if (strcmp(argv[i]+2, op->long_name) == 0)
510            {
511            options = handle_option(op->one_char, options);
512            break;
513            }
514          }
515        if (op->one_char == 0)
516          {
517          fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
518          exit(usage(2));
519          }
520        }
521    
522        default:    /* One-char options */
523        fprintf(stderr, "pcregrep: unknown option %c\n", s[-1]);  
524        return usage(2);    else
525        {
526        char *s = argv[i] + 1;
527        while (*s != 0)
528          {
529          if (*s == 'f')
530            {
531            pattern_filename = s + 1;
532            if (pattern_filename[0] == 0)
533              {
534              if (i >= argc - 1)
535                {
536                fprintf(stderr, "pcregrep: File name missing after -f\n");
537                exit(usage(2));
538                }
539              pattern_filename = argv[++i];
540              }
541            break;
542            }
543          else options = handle_option(*s++, options);
544        }        }
545      }      }
546    }    }
547    
548  /* There must be at least a regexp argument */  pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
549    hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
550    
551  if (i >= argc) return usage(0);  if (pattern_list == NULL || hints_list == NULL)
552      {
553      fprintf(stderr, "pcregrep: malloc failed\n");
554      return 2;
555      }
556    
557  /* Compile the regular expression. */  /* Compile the regular expression(s). */
558    
559  pattern = pcre_compile(argv[i++], options, &error, &errptr, NULL);  if (pattern_filename != NULL)
 if (pattern == NULL)  
560    {    {
561    fprintf(stderr, "pcregrep: error in regex at offset %d: %s\n", errptr, error);    FILE *f = fopen(pattern_filename, "r");
562    return 2;    char buffer[BUFSIZ];
563      if (f == NULL)
564        {
565        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
566          strerror(errno));
567        return 2;
568        }
569      while (fgets(buffer, sizeof(buffer), f) != NULL)
570        {
571        char *s = buffer + (int)strlen(buffer);
572        if (pattern_count >= MAX_PATTERN_COUNT)
573          {
574          fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",
575            MAX_PATTERN_COUNT);
576          return 2;
577          }
578        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
579        if (s == buffer) continue;
580        *s = 0;
581        pattern_list[pattern_count] = pcre_compile(buffer, options, &error,
582          &errptr, NULL);
583        if (pattern_list[pattern_count++] == NULL)
584          {
585          fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",
586            pattern_count, errptr, error);
587          return 2;
588          }
589        }
590      fclose(f);
591    }    }
592    
593  /* Study the regular expression, as we will be running it may times */  /* If no file name, a single regex must be given inline */
594    
595  hints = pcre_study(pattern, 0, &error);  else
 if (error != NULL)  
596    {    {
597    fprintf(stderr, "pcregrep: error while studing regex: %s\n", error);    if (i >= argc) return usage(2);
598    return 2;    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);
599      if (pattern_list[0] == NULL)
600        {
601        fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,
602          error);
603        return 2;
604        }
605      pattern_count++;
606      }
607    
608    /* Study the regular expressions, as we will be running them may times */
609    
610    for (j = 0; j < pattern_count; j++)
611      {
612      hints_list[j] = pcre_study(pattern_list[j], 0, &error);
613      if (error != NULL)
614        {
615        char s[16];
616        if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
617        fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
618        return 2;
619        }
620    }    }
621    
622  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit */
623    
624  if (i >= argc) return pcregrep(stdin, NULL);  if (i >= argc) return pcregrep(stdin, NULL);
625    
626  /* Otherwise, work through the remaining arguments as files. If there is only  /* Otherwise, work through the remaining arguments as files or directories.
627  one, don't give its name on the output. */  Pass in the fact that there is only one argument at top level - this suppresses
628    the file name if the argument is not a directory. */
629    
630  if (i == argc - 1) filenames = FALSE;  only_one_at_top = (i == argc - 1);
631  if (filenames_only) filenames = TRUE;  if (filenames_only) filenames = TRUE;
632    
633  for (; i < argc; i++)  for (; i < argc; i++)
634    {    {
635    FILE *in = fopen(argv[i], "r");    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);
636    if (in == NULL)    if (frc == 0 && rc == 1) rc = 0;
     {  
     fprintf(stderr, "%s: failed to open: %s\n", argv[i], strerror(errno));  
     rc = 2;  
     }  
   else  
     {  
     int frc = pcregrep(in, filenames? argv[i] : NULL);  
     if (frc == 0 && rc == 1) rc = 0;  
     fclose(in);  
     }  
637    }    }
638    
639  return rc;  return rc;

Legend:
Removed from v.49  
changed lines
  Added in v.64

  ViewVC Help
Powered by ViewVC 1.1.5