/[pcre]/code/tags/pcre-8.01/pcregrep.c
ViewVC logotype

Diff of /code/tags/pcre-8.01/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 286 by ph10, Mon Dec 17 14:46:11 2007 UTC revision 419 by ph10, Wed Aug 12 10:45:33 2009 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 71  POSSIBILITY OF SUCH DAMAGE. Line 71  POSSIBILITY OF SUCH DAMAGE.
71  typedef int BOOL;  typedef int BOOL;
72    
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76  #if BUFSIZ > 8192  #if BUFSIZ > 8192
77  #define MBUFTHIRD BUFSIZ  #define MBUFTHIRD BUFSIZ
# Line 139  static pcre_extra **hints_list = NULL; Line 140  static pcre_extra **hints_list = NULL;
140    
141  static char *include_pattern = NULL;  static char *include_pattern = NULL;
142  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
143    static char *include_dir_pattern = NULL;
144    static char *exclude_dir_pattern = NULL;
145    
146  static pcre *include_compiled = NULL;  static pcre *include_compiled = NULL;
147  static pcre *exclude_compiled = NULL;  static pcre *exclude_compiled = NULL;
148    static pcre *include_dir_compiled = NULL;
149    static pcre *exclude_dir_compiled = NULL;
150    
151  static int after_context = 0;  static int after_context = 0;
152  static int before_context = 0;  static int before_context = 0;
# Line 181  typedef struct option_item { Line 186  typedef struct option_item {
186  /* Options without a single-letter equivalent get a negative value. This can be  /* Options without a single-letter equivalent get a negative value. This can be
187  used to identify them. */  used to identify them. */
188    
189  #define N_COLOUR    (-1)  #define N_COLOUR       (-1)
190  #define N_EXCLUDE   (-2)  #define N_EXCLUDE      (-2)
191  #define N_HELP      (-3)  #define N_EXCLUDE_DIR  (-3)
192  #define N_INCLUDE   (-4)  #define N_HELP         (-4)
193  #define N_LABEL     (-5)  #define N_INCLUDE      (-5)
194  #define N_LOCALE    (-6)  #define N_INCLUDE_DIR  (-6)
195  #define N_NULL      (-7)  #define N_LABEL        (-7)
196  #define N_LOFFSETS  (-8)  #define N_LOCALE       (-8)
197  #define N_FOFFSETS  (-9)  #define N_NULL         (-9)
198    #define N_LOFFSETS     (-10)
199    #define N_FOFFSETS     (-11)
200    
201  static option_item optionlist[] = {  static option_item optionlist[] = {
202    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
# Line 222  static option_item optionlist[] = { Line 229  static option_item optionlist[] = {
229    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
230    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
231    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
232      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
233      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
234  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
235    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
236  #endif  #endif
# Line 813  if (after_context > 0 && lastmatchnumber Line 822  if (after_context > 0 && lastmatchnumber
822    
823    
824  /*************************************************  /*************************************************
825    *   Apply patterns to subject till one matches   *
826    *************************************************/
827    
828    /* This function is called to run through all patterns, looking for a match. It
829    is used multiple times for the same subject when colouring is enabled, in order
830    to find all possible matches.
831    
832    Arguments:
833      matchptr    the start of the subject
834      length      the length of the subject to match
835      offsets     the offets vector to fill in
836      mrc         address of where to put the result of pcre_exec()
837    
838    Returns:      TRUE if there was a match
839                  FALSE if there was no match
840                  invert if there was a non-fatal error
841    */
842    
843    static BOOL
844    match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
845    {
846    int i;
847    for (i = 0; i < pattern_count; i++)
848      {
849      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0,
850        PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
851      if (*mrc >= 0) return TRUE;
852      if (*mrc == PCRE_ERROR_NOMATCH) continue;
853      fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
854      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
855      fprintf(stderr, "this text:\n");
856      fwrite(matchptr, 1, length, stderr);  /* In case binary zero included */
857      fprintf(stderr, "\n");
858      if (error_count == 0 &&
859          (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
860        {
861        fprintf(stderr, "pcregrep: error %d means that a resource limit "
862          "was exceeded\n", *mrc);
863        fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
864        }
865      if (error_count++ > 20)
866        {
867        fprintf(stderr, "pcregrep: too many errors - abandoned\n");
868        exit(2);
869        }
870      return invert;    /* No more matching; don't show the line again */
871      }
872    
873    return FALSE;  /* No match, no errors */
874    }
875    
876    
877    
878    /*************************************************
879  *            Grep an individual file             *  *            Grep an individual file             *
880  *************************************************/  *************************************************/
881    
# Line 845  int linenumber = 1; Line 908  int linenumber = 1;
908  int lastmatchnumber = 0;  int lastmatchnumber = 0;
909  int count = 0;  int count = 0;
910  int filepos = 0;  int filepos = 0;
911  int offsets[99];  int offsets[OFFSET_SIZE];
912  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
913  char buffer[3*MBUFTHIRD];  char buffer[3*MBUFTHIRD];
914  char *ptr = buffer;  char *ptr = buffer;
# Line 901  way, the buffer is shifted left and re-f Line 964  way, the buffer is shifted left and re-f
964    
965  while (ptr < endptr)  while (ptr < endptr)
966    {    {
967    int i, endlinelength;    int endlinelength;
968    int mrc = 0;    int mrc = 0;
969    BOOL match = FALSE;    BOOL match;
970    char *matchptr = ptr;    char *matchptr = ptr;
971    char *t = ptr;    char *t = ptr;
972    size_t length, linelength;    size_t length, linelength;
# Line 911  while (ptr < endptr) Line 974  while (ptr < endptr)
974    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
975    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
976    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
977    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
978    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
979    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
980      first line. */
981    
982    t = end_of_line(t, endptr, &endlinelength);    t = end_of_line(t, endptr, &endlinelength);
983    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
# Line 928  while (ptr < endptr) Line 992  while (ptr < endptr)
992        #include <time.h>        #include <time.h>
993        struct timeval start_time, end_time;        struct timeval start_time, end_time;
994        struct timezone dummy;        struct timezone dummy;
995          int i;
996    
997        if (jfriedl_XT)        if (jfriedl_XT)
998        {        {
# Line 953  while (ptr < endptr) Line 1018  while (ptr < endptr)
1018    
1019    
1020        for (i = 0; i < jfriedl_XR; i++)        for (i = 0; i < jfriedl_XR; i++)
1021            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1022                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1023    
1024        if (gettimeofday(&end_time, &dummy) != 0)        if (gettimeofday(&end_time, &dummy) != 0)
1025                perror("bad gettimeofday");                perror("bad gettimeofday");
# Line 972  while (ptr < endptr) Line 1038  while (ptr < endptr)
1038    
1039    ONLY_MATCHING_RESTART:    ONLY_MATCHING_RESTART:
1040    
1041    /* Run through all the patterns until one matches. Note that we don't include    /* Run through all the patterns until one matches or there is an error other
1042    the final newline in the subject string. */    than NOMATCH. This code is in a subroutine so that it can be re-used for
1043      finding subsequent matches when colouring matched lines. */
1044    
1045    for (i = 0; i < pattern_count; i++)    match = match_patterns(matchptr, length, offsets, &mrc);
     {  
     mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,  
       offsets, 99);  
     if (mrc >= 0) { match = TRUE; break; }  
     if (mrc != PCRE_ERROR_NOMATCH)  
       {  
       fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);  
       if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);  
       fprintf(stderr, "this line:\n");  
       fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */  
       fprintf(stderr, "\n");  
       if (error_count == 0 &&  
           (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))  
         {  
         fprintf(stderr, "pcregrep: error %d means that a resource limit "  
           "was exceeded\n", mrc);  
         fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");  
         }  
       if (error_count++ > 20)  
         {  
         fprintf(stderr, "pcregrep: too many errors - abandoned\n");  
         exit(2);  
         }  
       match = invert;    /* No more matching; don't show the line again */  
       break;  
       }  
     }  
1046    
1047    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1048    
# Line 1046  while (ptr < endptr) Line 1086  while (ptr < endptr)
1086          if (printname != NULL) fprintf(stdout, "%s:", printname);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1087          if (number) fprintf(stdout, "%d:", linenumber);          if (number) fprintf(stdout, "%d:", linenumber);
1088          if (line_offsets)          if (line_offsets)
1089            fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,            fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1090              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1091          else if (file_offsets)          else if (file_offsets)
1092            fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,            fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1093              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1094          else          else
1095              {
1096              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1097            fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);            fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1098              if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1099              }
1100          fprintf(stdout, "\n");          fprintf(stdout, "\n");
1101          matchptr += offsets[1];          matchptr += offsets[1];
1102          length -= offsets[1];          length -= offsets[1];
# Line 1189  while (ptr < endptr) Line 1233  while (ptr < endptr)
1233        else        else
1234  #endif  #endif
1235    
1236        /* We have to split the line(s) up if colouring. */        /* We have to split the line(s) up if colouring, and search for further
1237          matches. */
1238    
1239        if (do_colour)        if (do_colour)
1240          {          {
1241            int last_offset = 0;
1242          fwrite(ptr, 1, offsets[0], stdout);          fwrite(ptr, 1, offsets[0], stdout);
1243          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1244          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1245          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1246          fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],          for (;;)
1247              {
1248              last_offset += offsets[1];
1249              matchptr += offsets[1];
1250              length -= offsets[1];
1251              if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1252              fwrite(matchptr, 1, offsets[0], stdout);
1253              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1254              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1255              fprintf(stdout, "%c[00m", 0x1b);
1256              }
1257            fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset,
1258            stdout);            stdout);
1259          }          }
1260    
1261          /* Not colouring; no need to search for further matches */
1262    
1263        else fwrite(ptr, 1, linelength + endlinelength, stdout);        else fwrite(ptr, 1, linelength + endlinelength, stdout);
1264        }        }
1265    
# Line 1361  if (strcmp(pathname, "-") == 0) Line 1421  if (strcmp(pathname, "-") == 0)
1421    }    }
1422    
1423  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* If the file is a directory, skip if skipping or if we are recursing, scan
1424  each file within it, subject to any include or exclude patterns that were set.  each file and directory within it, subject to any include or exclude patterns
1425  The scanning code is localized so it can be made system-specific. */  that were set. The scanning code is localized so it can be made
1426    system-specific. */
1427    
1428  if ((sep = isdirectory(pathname)) != 0)  if ((sep = isdirectory(pathname)) != 0)
1429    {    {
# Line 1383  if ((sep = isdirectory(pathname)) != 0) Line 1444  if ((sep = isdirectory(pathname)) != 0)
1444    
1445      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
1446        {        {
1447        int frc, blen;        int frc, nflen;
1448        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1449        blen = strlen(buffer);        nflen = strlen(nextfile);
1450    
1451          if (isdirectory(buffer))
1452            {
1453            if (exclude_dir_compiled != NULL &&
1454                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1455              continue;
1456    
1457            if (include_dir_compiled != NULL &&
1458                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1459              continue;
1460            }
1461          else
1462            {
1463            if (exclude_compiled != NULL &&
1464                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1465              continue;
1466    
1467        if (exclude_compiled != NULL &&          if (include_compiled != NULL &&
1468            pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)              pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1469          continue;            continue;
1470            }
       if (include_compiled != NULL &&  
           pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  
         continue;  
1471    
1472        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1473        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
# Line 1456  an attempt to read a .bz2 file indicates Line 1530  an attempt to read a .bz2 file indicates
1530  PLAIN_FILE:  PLAIN_FILE:
1531  #endif  #endif
1532    {    {
1533    in = fopen(pathname, "r");    in = fopen(pathname, "rb");
1534    handle = (void *)in;    handle = (void *)in;
1535    frtype = FR_PLAIN;    frtype = FR_PLAIN;
1536    }    }
# Line 1579  for (op = optionlist; op->one_char != 0; Line 1653  for (op = optionlist; op->one_char != 0;
1653    int n;    int n;
1654    char s[4];    char s[4];
1655    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1656    printf("  %s --%s%n", s, op->long_name, &n);    n = 30 - printf("  %s --%s", s, op->long_name);
   n = 30 - n;  
1657    if (n < 1) n = 1;    if (n < 1) n = 1;
1658    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                    ", op->help_text);
1659    }    }
# Line 1798  const char *error; Line 1871  const char *error;
1871    
1872  /* Set the default line ending value from the default in the PCRE library;  /* Set the default line ending value from the default in the PCRE library;
1873  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1874  */  Note that the return values from pcre_config(), though derived from the ASCII
1875    codes, are the same in EBCDIC environments, so we must use the actual values
1876    rather than escapes such as as '\r'. */
1877    
1878  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1879  switch(i)  switch(i)
1880    {    {
1881    default:                 newline = (char *)"lf"; break;    default:               newline = (char *)"lf"; break;
1882    case '\r':               newline = (char *)"cr"; break;    case 13:               newline = (char *)"cr"; break;
1883    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case (13 << 8) | 10:   newline = (char *)"crlf"; break;
1884    case -1:                 newline = (char *)"any"; break;    case -1:               newline = (char *)"any"; break;
1885    case -2:                 newline = (char *)"anycrlf"; break;    case -2:               newline = (char *)"anycrlf"; break;
1886    }    }
1887    
1888  /* Process the options */  /* Process the options */
# Line 2288  if (include_pattern != NULL) Line 2363  if (include_pattern != NULL)
2363        errptr, error);        errptr, error);
2364      goto EXIT2;      goto EXIT2;
2365      }      }
2366      }
2367    
2368    if (exclude_dir_pattern != NULL)
2369      {
2370      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2371        pcretables);
2372      if (exclude_dir_compiled == NULL)
2373        {
2374        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2375          errptr, error);
2376        goto EXIT2;
2377        }
2378      }
2379    
2380    if (include_dir_pattern != NULL)
2381      {
2382      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2383        pcretables);
2384      if (include_dir_compiled == NULL)
2385        {
2386        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2387          errptr, error);
2388        goto EXIT2;
2389        }
2390    }    }
2391    
2392  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */

Legend:
Removed from v.286  
changed lines
  Added in v.419

  ViewVC Help
Powered by ViewVC 1.1.5