/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 49 by nigel, Sat Feb 24 21:39:33 2007 UTC revision 75 by nigel, Sat Feb 24 21:40:37 2007 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2004 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #include <ctype.h>
41  #include <stdio.h>  #include <stdio.h>
42  #include <string.h>  #include <string.h>
43  #include <stdlib.h>  #include <stdlib.h>
# Line 17  its pattern matching. */ Line 50  its pattern matching. */
50    
51  typedef int BOOL;  typedef int BOOL;
52    
53    #define VERSION "3.0 14-Jan-2003"
54    #define MAX_PATTERN_COUNT 100
55    
56    
57  /*************************************************  /*************************************************
58  *               Global variables                 *  *               Global variables                 *
59  *************************************************/  *************************************************/
60    
61  static pcre *pattern;  static char *pattern_filename = NULL;
62  static pcre_extra *hints;  static int  pattern_count = 0;
63    static pcre **pattern_list;
64    static pcre_extra **hints_list;
65    
66  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
67    static BOOL filenames = TRUE;
68  static BOOL filenames_only = FALSE;  static BOOL filenames_only = FALSE;
69  static BOOL invert = FALSE;  static BOOL invert = FALSE;
70  static BOOL number = FALSE;  static BOOL number = FALSE;
71    static BOOL recurse = FALSE;
72  static BOOL silent = FALSE;  static BOOL silent = FALSE;
73  static BOOL whole_lines = FALSE;  static BOOL whole_lines = FALSE;
74    
75    /* Structure for options and list of them */
76    
77    typedef struct option_item {
78      int one_char;
79      const char *long_name;
80      const char *help_text;
81    } option_item;
82    
83    static option_item optionlist[] = {
84      { -1,  "help",         "display this help and exit" },
85      { 'c', "count",        "print only a count of matching lines per FILE" },
86      { 'h', "no-filename",  "suppress the prefixing filename on output" },
87      { 'i', "ignore-case",  "ignore case distinctions" },
88      { 'l', "files-with-matches", "print only FILE names containing matches" },
89      { 'n', "line-number",  "print line number with output lines" },
90      { 'r', "recursive",    "recursively scan sub-directories" },
91      { 's', "no-messages",  "suppress error messages" },
92      { 'u', "utf-8",        "use UTF-8 mode" },
93      { 'V', "version",      "print version information and exit" },
94      { 'v', "invert-match", "select non-matching lines" },
95      { 'x', "line-regex",   "force PATTERN to match only whole lines" },
96      { 'x', "line-regexp",  "force PATTERN to match only whole lines" },
97      { 0,    NULL,           NULL }
98    };
99    
100    
101    /*************************************************
102    *       Functions for directory scanning         *
103    *************************************************/
104    
105    /* These functions are defined so that they can be made system specific,
106    although at present the only ones are for Unix, Win32, and for "no directory
107    recursion support". */
108    
109    
110    /************* Directory scanning in Unix ***********/
111    
112    #if IS_UNIX
113    #include <sys/types.h>
114    #include <sys/stat.h>
115    #include <dirent.h>
116    
117    typedef DIR directory_type;
118    
119    static int
120    isdirectory(char *filename)
121    {
122    struct stat statbuf;
123    if (stat(filename, &statbuf) < 0)
124      return 0;        /* In the expectation that opening as a file will fail */
125    return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
126    }
127    
128    static directory_type *
129    opendirectory(char *filename)
130    {
131    return opendir(filename);
132    }
133    
134    static char *
135    readdirectory(directory_type *dir)
136    {
137    for (;;)
138      {
139      struct dirent *dent = readdir(dir);
140      if (dent == NULL) return NULL;
141      if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
142        return dent->d_name;
143      }
144    return NULL;   /* Keep compiler happy; never executed */
145    }
146    
147    static void
148    closedirectory(directory_type *dir)
149    {
150    closedir(dir);
151    }
152    
153    
154    /************* Directory scanning in Win32 ***********/
155    
156    /* I (Philip Hazel) have no means of testing this code. It was contributed by
157    Lionel Fourquaux. */
158    
159    
160    #elif HAVE_WIN32API
161    
162    #ifndef STRICT
163    # define STRICT
164    #endif
165    #ifndef WIN32_LEAN_AND_MEAN
166    # define WIN32_LEAN_AND_MEAN
167    #endif
168    #include <windows.h>
169    
170    typedef struct directory_type
171    {
172    HANDLE handle;
173    BOOL first;
174    WIN32_FIND_DATA data;
175    } directory_type;
176    
177    int
178    isdirectory(char *filename)
179    {
180    DWORD attr = GetFileAttributes(filename);
181    if (attr == INVALID_FILE_ATTRIBUTES)
182      return 0;
183    return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
184    }
185    
186    directory_type *
187    opendirectory(char *filename)
188    {
189    size_t len;
190    char *pattern;
191    directory_type *dir;
192    DWORD err;
193    len = strlen(filename);
194    pattern = (char *) malloc(len + 3);
195    dir = (directory_type *) malloc(sizeof(*dir));
196    if ((pattern == NULL) || (dir == NULL))
197      {
198      fprintf(stderr, "pcregrep: malloc failed\n");
199      exit(2);
200      }
201    memcpy(pattern, filename, len);
202    memcpy(&(pattern[len]), "\\*", 3);
203    dir->handle = FindFirstFile(pattern, &(dir->data));
204    if (dir->handle != INVALID_HANDLE_VALUE)
205      {
206      free(pattern);
207      dir->first = TRUE;
208      return dir;
209      }
210    err = GetLastError();
211    free(pattern);
212    free(dir);
213    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
214    return NULL;
215    }
216    
217    char *
218    readdirectory(directory_type *dir)
219    {
220    for (;;)
221      {
222      if (!dir->first)
223        {
224        if (!FindNextFile(dir->handle, &(dir->data)))
225          return NULL;
226        }
227      else
228        {
229        dir->first = FALSE;
230        }
231      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
232        return dir->data.cFileName;
233      }
234    #ifndef _MSC_VER
235    return NULL;   /* Keep compiler happy; never executed */
236    #endif
237    }
238    
239    void
240    closedirectory(directory_type *dir)
241    {
242    FindClose(dir->handle);
243    free(dir);
244    }
245    
246    
247    /************* Directory scanning when we can't do it ***********/
248    
249    /* The type is void, and apart from isdirectory(), the functions do nothing. */
250    
251    #else
252    
253    typedef void directory_type;
254    
255    int isdirectory(char *filename) { return FALSE; }
256    directory_type * opendirectory(char *filename) {}
257    char *readdirectory(directory_type *dir) {}
258    void closedirectory(directory_type *dir) {}
259    
260    #endif
261    
262    
263    
264  #if ! HAVE_STRERROR  #if ! HAVE_STRERROR
# Line 72  char buffer[BUFSIZ]; Line 298  char buffer[BUFSIZ];
298    
299  while (fgets(buffer, sizeof(buffer), in) != NULL)  while (fgets(buffer, sizeof(buffer), in) != NULL)
300    {    {
301    BOOL match;    BOOL match = FALSE;
302      int i;
303    int length = (int)strlen(buffer);    int length = (int)strlen(buffer);
304    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;
305    linenumber++;    linenumber++;
306    
307    match = pcre_exec(pattern, hints, buffer, length, 0, 0, offsets, 99) >= 0;    for (i = 0; !match && i < pattern_count; i++)
308    if (match && whole_lines && offsets[1] != length) match = FALSE;      {
309        match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,
310          offsets, 99) >= 0;
311        if (match && whole_lines && offsets[1] != length) match = FALSE;
312        }
313    
314    if (match != invert)    if (match != invert)
315      {      {
# Line 116  return rc; Line 347  return rc;
347    
348    
349  /*************************************************  /*************************************************
350    *     Grep a file or recurse into a directory    *
351    *************************************************/
352    
353    static int
354    grep_or_recurse(char *filename, BOOL dir_recurse, BOOL show_filenames,
355      BOOL only_one_at_top)
356    {
357    int rc = 1;
358    int sep;
359    FILE *in;
360    
361    /* If the file is a directory and we are recursing, scan each file within it.
362    The scanning code is localized so it can be made system-specific. */
363    
364    if ((sep = isdirectory(filename)) != 0 && dir_recurse)
365      {
366      char buffer[1024];
367      char *nextfile;
368      directory_type *dir = opendirectory(filename);
369    
370      if (dir == NULL)
371        {
372        fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,
373          strerror(errno));
374        return 2;
375        }
376    
377      while ((nextfile = readdirectory(dir)) != NULL)
378        {
379        int frc;
380        sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);
381        frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);
382        if (frc == 0 && rc == 1) rc = 0;
383        }
384    
385      closedirectory(dir);
386      return rc;
387      }
388    
389    /* If the file is not a directory, or we are not recursing, scan it. If this is
390    the first and only argument at top level, we don't show the file name (unless
391    we are only showing the file name). Otherwise, control is via the
392    show_filenames variable. */
393    
394    in = fopen(filename, "r");
395    if (in == NULL)
396      {
397      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));
398      return 2;
399      }
400    
401    rc = pcregrep(in, (filenames_only || (show_filenames && !only_one_at_top))?
402      filename : NULL);
403    fclose(in);
404    return rc;
405    }
406    
407    
408    
409    
410    /*************************************************
411  *                Usage function                  *  *                Usage function                  *
412  *************************************************/  *************************************************/
413    
414  static int  static int
415  usage(int rc)  usage(int rc)
416  {  {
417  fprintf(stderr, "Usage: pcregrep [-Vchilnsvx] pattern [file] ...\n");  fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] [pattern] [file1 file2 ...]\n");
418    fprintf(stderr, "Type `pcregrep --help' for more information.\n");
419  return rc;  return rc;
420  }  }
421    
# Line 130  return rc; Line 423  return rc;
423    
424    
425  /*************************************************  /*************************************************
426    *                Help function                   *
427    *************************************************/
428    
429    static void
430    help(void)
431    {
432    option_item *op;
433    
434    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
435    printf("Search for PATTERN in each FILE or standard input.\n");
436    printf("PATTERN must be present if -f is not used.\n");
437    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
438    
439    printf("Options:\n");
440    
441    for (op = optionlist; op->one_char != 0; op++)
442      {
443      int n;
444      char s[4];
445      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
446      printf("  %s --%s%n", s, op->long_name, &n);
447      n = 30 - n;
448      if (n < 1) n = 1;
449      printf("%.*s%s\n", n, "                    ", op->help_text);
450      }
451    
452    printf("\n  -f<filename>  or  --file=<filename>\n");
453    printf("    Read patterns from <filename> instead of using a command line option.\n");
454    printf("    Trailing white space is removed; blanks lines are ignored.\n");
455    printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
456    
457    printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");
458    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
459    }
460    
461    
462    
463    
464    /*************************************************
465    *                Handle an option                *
466    *************************************************/
467    
468    static int
469    handle_option(int letter, int options)
470    {
471    switch(letter)
472      {
473      case -1:  help(); exit(0);
474      case 'c': count_only = TRUE; break;
475      case 'h': filenames = FALSE; break;
476      case 'i': options |= PCRE_CASELESS; break;
477      case 'l': filenames_only = TRUE;
478      case 'n': number = TRUE; break;
479      case 'r': recurse = TRUE; break;
480      case 's': silent = TRUE; break;
481      case 'u': options |= PCRE_UTF8; break;
482      case 'v': invert = TRUE; break;
483      case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;
484    
485      case 'V':
486      fprintf(stderr, "pcregrep version %s using ", VERSION);
487      fprintf(stderr, "PCRE version %s\n", pcre_version());
488      exit(0);
489      break;
490    
491      default:
492      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
493      exit(usage(2));
494      }
495    
496    return options;
497    }
498    
499    
500    
501    
502    /*************************************************
503  *                Main program                    *  *                Main program                    *
504  *************************************************/  *************************************************/
505    
506  int  int
507  main(int argc, char **argv)  main(int argc, char **argv)
508  {  {
509  int i;  int i, j;
510  int rc = 1;  int rc = 1;
511  int options = 0;  int options = 0;
512  int errptr;  int errptr;
513  const char *error;  const char *error;
514  BOOL filenames = TRUE;  BOOL only_one_at_top;
515    
516  /* Process the options */  /* Process the options */
517    
518  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
519    {    {
   char *s;  
520    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
521    s = argv[i] + 1;  
522    while (*s != 0)    /* Missing options */
523    
524      if (argv[i][1] == 0) exit(usage(2));
525    
526      /* Long name options */
527    
528      if (argv[i][1] == '-')
529      {      {
530      switch (*s++)      option_item *op;
531    
532        if (strncmp(argv[i]+2, "file=", 5) == 0)
533        {        {
534        case 'c': count_only = TRUE; break;        pattern_filename = argv[i] + 7;
535        case 'h': filenames = FALSE; break;        continue;
536        case 'i': options |= PCRE_CASELESS; break;        }
       case 'l': filenames_only = TRUE;  
       case 'n': number = TRUE; break;  
       case 's': silent = TRUE; break;  
       case 'v': invert = TRUE; break;  
       case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;  
537    
538        case 'V':      for (op = optionlist; op->one_char != 0; op++)
539        fprintf(stderr, "PCRE version %s\n", pcre_version());        {
540        break;        if (strcmp(argv[i]+2, op->long_name) == 0)
541            {
542            options = handle_option(op->one_char, options);
543            break;
544            }
545          }
546        if (op->one_char == 0)
547          {
548          fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
549          exit(usage(2));
550          }
551        }
552    
553        default:    /* One-char options */
554        fprintf(stderr, "pcregrep: unknown option %c\n", s[-1]);  
555        return usage(2);    else
556        {
557        char *s = argv[i] + 1;
558        while (*s != 0)
559          {
560          if (*s == 'f')
561            {
562            pattern_filename = s + 1;
563            if (pattern_filename[0] == 0)
564              {
565              if (i >= argc - 1)
566                {
567                fprintf(stderr, "pcregrep: File name missing after -f\n");
568                exit(usage(2));
569                }
570              pattern_filename = argv[++i];
571              }
572            break;
573            }
574          else options = handle_option(*s++, options);
575        }        }
576      }      }
577    }    }
578    
579  /* There must be at least a regexp argument */  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
580    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
581    
582  if (i >= argc) return usage(0);  if (pattern_list == NULL || hints_list == NULL)
583      {
584      fprintf(stderr, "pcregrep: malloc failed\n");
585      return 2;
586      }
587    
588  /* Compile the regular expression. */  /* Compile the regular expression(s). */
589    
590  pattern = pcre_compile(argv[i++], options, &error, &errptr, NULL);  if (pattern_filename != NULL)
 if (pattern == NULL)  
591    {    {
592    fprintf(stderr, "pcregrep: error in regex at offset %d: %s\n", errptr, error);    FILE *f = fopen(pattern_filename, "r");
593    return 2;    char buffer[BUFSIZ];
594      if (f == NULL)
595        {
596        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
597          strerror(errno));
598        return 2;
599        }
600      while (fgets(buffer, sizeof(buffer), f) != NULL)
601        {
602        char *s = buffer + (int)strlen(buffer);
603        if (pattern_count >= MAX_PATTERN_COUNT)
604          {
605          fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",
606            MAX_PATTERN_COUNT);
607          return 2;
608          }
609        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
610        if (s == buffer) continue;
611        *s = 0;
612        pattern_list[pattern_count] = pcre_compile(buffer, options, &error,
613          &errptr, NULL);
614        if (pattern_list[pattern_count++] == NULL)
615          {
616          fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",
617            pattern_count, errptr, error);
618          return 2;
619          }
620        }
621      fclose(f);
622    }    }
623    
624  /* Study the regular expression, as we will be running it may times */  /* If no file name, a single regex must be given inline */
625    
626  hints = pcre_study(pattern, 0, &error);  else
 if (error != NULL)  
627    {    {
628    fprintf(stderr, "pcregrep: error while studing regex: %s\n", error);    if (i >= argc) return usage(2);
629    return 2;    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);
630      if (pattern_list[0] == NULL)
631        {
632        fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,
633          error);
634        return 2;
635        }
636      pattern_count++;
637      }
638    
639    /* Study the regular expressions, as we will be running them may times */
640    
641    for (j = 0; j < pattern_count; j++)
642      {
643      hints_list[j] = pcre_study(pattern_list[j], 0, &error);
644      if (error != NULL)
645        {
646        char s[16];
647        if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
648        fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
649        return 2;
650        }
651    }    }
652    
653  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit */
654    
655  if (i >= argc) return pcregrep(stdin, NULL);  if (i >= argc) return pcregrep(stdin, NULL);
656    
657  /* Otherwise, work through the remaining arguments as files. If there is only  /* Otherwise, work through the remaining arguments as files or directories.
658  one, don't give its name on the output. */  Pass in the fact that there is only one argument at top level - this suppresses
659    the file name if the argument is not a directory. */
660    
661  if (i == argc - 1) filenames = FALSE;  only_one_at_top = (i == argc - 1);
662  if (filenames_only) filenames = TRUE;  if (filenames_only) filenames = TRUE;
663    
664  for (; i < argc; i++)  for (; i < argc; i++)
665    {    {
666    FILE *in = fopen(argv[i], "r");    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);
667    if (in == NULL)    if (frc == 0 && rc == 1) rc = 0;
     {  
     fprintf(stderr, "%s: failed to open: %s\n", argv[i], strerror(errno));  
     rc = 2;  
     }  
   else  
     {  
     int frc = pcregrep(in, filenames? argv[i] : NULL);  
     if (frc == 0 && rc == 1) rc = 0;  
     fclose(in);  
     }  
668    }    }
669    
670  return rc;  return rc;

Legend:
Removed from v.49  
changed lines
  Added in v.75

  ViewVC Help
Powered by ViewVC 1.1.5