/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 379 by ph10, Mon Mar 2 20:30:05 2009 UTC revision 515 by ph10, Tue May 4 09:12:25 2010 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2009 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 83  typedef int BOOL; Line 83  typedef int BOOL;
83  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
84  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
85    
86  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88  /* File reading styles */  /* File reading styles */
89    
# Line 104  enum { DEE_READ, DEE_SKIP }; Line 104  enum { DEE_READ, DEE_SKIP };
104    
105  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108    environments), a warning is issued if the value of fwrite() is ignored.
109    Unfortunately, casting to (void) does not suppress the warning. To get round
110    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111    apply to fprintf(). */
112    
113    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114    
115    
116    
117  /*************************************************  /*************************************************
# Line 165  static BOOL invert = FALSE; Line 173  static BOOL invert = FALSE;
173  static BOOL line_offsets = FALSE;  static BOOL line_offsets = FALSE;
174  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
175  static BOOL number = FALSE;  static BOOL number = FALSE;
176    static BOOL omit_zero_count = FALSE;
177  static BOOL only_matching = FALSE;  static BOOL only_matching = FALSE;
178  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
179  static BOOL silent = FALSE;  static BOOL silent = FALSE;
# Line 209  static option_item optionlist[] = { Line 218  static option_item optionlist[] = {
218    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
219    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
220    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
221    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_PATLIST,   'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
222    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_NODATA,    'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
223    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
224    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
225    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
# Line 812  if (after_context > 0 && lastmatchnumber Line 821  if (after_context > 0 && lastmatchnumber
821      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
822      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
823      pp = end_of_line(pp, endptr, &ellength);      pp = end_of_line(pp, endptr, &ellength);
824      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
825      lastmatchrestart = pp;      lastmatchrestart = pp;
826      }      }
827    hyphenpending = TRUE;    hyphenpending = TRUE;
# Line 825  if (after_context > 0 && lastmatchnumber Line 834  if (after_context > 0 && lastmatchnumber
834  *   Apply patterns to subject till one matches   *  *   Apply patterns to subject till one matches   *
835  *************************************************/  *************************************************/
836    
837  /* This function is called to run through all patterns, looking for a match. It  /* This function is called to run through all patterns, looking for a match. It
838  is used multiple times for the same subject when colouring is enabled, in order  is used multiple times for the same subject when colouring is enabled, in order
839  to find all possible matches.  to find all possible matches.
840    
841  Arguments:  Arguments:
# Line 834  Arguments: Line 843  Arguments:
843    length      the length of the subject to match    length      the length of the subject to match
844    offsets     the offets vector to fill in    offsets     the offets vector to fill in
845    mrc         address of where to put the result of pcre_exec()    mrc         address of where to put the result of pcre_exec()
846    
847  Returns:      TRUE if there was a match  Returns:      TRUE if there was a match
848                FALSE if there was no match                FALSE if there was no match
849                invert if there was a non-fatal error                invert if there was a non-fatal error
850  */  */
851    
852  static BOOL  static BOOL
853  match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)  match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
# Line 853  for (i = 0; i < pattern_count; i++) Line 862  for (i = 0; i < pattern_count; i++)
862    fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);    fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
863    if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);    if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
864    fprintf(stderr, "this text:\n");    fprintf(stderr, "this text:\n");
865    fwrite(matchptr, 1, length, stderr);  /* In case binary zero included */    FWRITE(matchptr, 1, length, stderr);   /* In case binary zero included */
866    fprintf(stderr, "\n");    fprintf(stderr, "\n");
867    if (error_count == 0 &&    if (error_count == 0 &&
868        (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))        (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
# Line 992  while (ptr < endptr) Line 1001  while (ptr < endptr)
1001        #include <time.h>        #include <time.h>
1002        struct timeval start_time, end_time;        struct timeval start_time, end_time;
1003        struct timezone dummy;        struct timezone dummy;
1004        int i;        int i;
1005    
1006        if (jfriedl_XT)        if (jfriedl_XT)
1007        {        {
# Line 1018  while (ptr < endptr) Line 1027  while (ptr < endptr)
1027    
1028    
1029        for (i = 0; i < jfriedl_XR; i++)        for (i = 0; i < jfriedl_XR; i++)
1030            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1031                PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);                PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1032    
1033        if (gettimeofday(&end_time, &dummy) != 0)        if (gettimeofday(&end_time, &dummy) != 0)
# Line 1038  while (ptr < endptr) Line 1047  while (ptr < endptr)
1047    
1048    ONLY_MATCHING_RESTART:    ONLY_MATCHING_RESTART:
1049    
1050    /* Run through all the patterns until one matches or there is an error other    /* Run through all the patterns until one matches or there is an error other
1051    than NOMATCH. This code is in a subroutine so that it can be re-used for    than NOMATCH. This code is in a subroutine so that it can be re-used for
1052    finding subsequent matches when colouring matched lines. */    finding subsequent matches when colouring matched lines. */
1053    
1054    match = match_patterns(matchptr, length, offsets, &mrc);    match = match_patterns(matchptr, length, offsets, &mrc);
1055    
1056    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
# Line 1061  while (ptr < endptr) Line 1070  while (ptr < endptr)
1070      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1071      in the file. */      in the file. */
1072    
1073      else if (filenames == FN_ONLY)      else if (filenames == FN_MATCH_ONLY)
1074        {        {
1075        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1076        return 0;        return 0;
# Line 1094  while (ptr < endptr) Line 1103  while (ptr < endptr)
1103          else          else
1104            {            {
1105            if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);            if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1106            fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);            FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1107            if (do_colour) fprintf(stdout, "%c[00m", 0x1b);            if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1108            }            }
1109          fprintf(stdout, "\n");          fprintf(stdout, "\n");
1110          matchptr += offsets[1];          matchptr += offsets[1];
1111          length -= offsets[1];          length -= offsets[1];
# Line 1136  while (ptr < endptr) Line 1145  while (ptr < endptr)
1145            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1146            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1147            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1148            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1149            lastmatchrestart = pp;            lastmatchrestart = pp;
1150            }            }
1151          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
# Line 1176  while (ptr < endptr) Line 1185  while (ptr < endptr)
1185            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1186            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1187            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1188            fwrite(p, 1, pp - p, stdout);            FWRITE(p, 1, pp - p, stdout);
1189            p = pp;            p = pp;
1190            }            }
1191          }          }
# Line 1226  while (ptr < endptr) Line 1235  while (ptr < endptr)
1235          {          {
1236          int first = S_arg * 2;          int first = S_arg * 2;
1237          int last  = first + 1;          int last  = first + 1;
1238          fwrite(ptr, 1, offsets[first], stdout);          FWRITE(ptr, 1, offsets[first], stdout);
1239          fprintf(stdout, "X");          fprintf(stdout, "X");
1240          fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);          FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1241          }          }
1242        else        else
1243  #endif  #endif
1244    
1245        /* We have to split the line(s) up if colouring, and search for further        /* We have to split the line(s) up if colouring, and search for further
1246        matches. */        matches. */
1247    
1248        if (do_colour)        if (do_colour)
1249          {          {
1250          int last_offset = 0;          int last_offset = 0;
1251          fwrite(ptr, 1, offsets[0], stdout);          FWRITE(ptr, 1, offsets[0], stdout);
1252          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1253          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1254          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1255          for (;;)          for (;;)
1256            {            {
1257            last_offset += offsets[1];            last_offset += offsets[1];
1258            matchptr += offsets[1];            matchptr += offsets[1];
1259            length -= offsets[1];            length -= offsets[1];
1260            if (!match_patterns(matchptr, length, offsets, &mrc)) break;            if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1261            fwrite(matchptr, 1, offsets[0], stdout);            FWRITE(matchptr, 1, offsets[0], stdout);
1262            fprintf(stdout, "%c[%sm", 0x1b, colour_string);            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1263            fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);            FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1264            fprintf(stdout, "%c[00m", 0x1b);            fprintf(stdout, "%c[00m", 0x1b);
1265            }            }
1266          fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset,          FWRITE(ptr + last_offset, 1,
1267            stdout);            (linelength + endlinelength) - last_offset, stdout);
1268          }          }
1269    
1270        /* Not colouring; no need to search for further matches */        /* Not colouring; no need to search for further matches */
1271    
1272        else fwrite(ptr, 1, linelength + endlinelength, stdout);        else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1273        }        }
1274    
1275      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match */
# Line 1365  if (filenames == FN_NOMATCH_ONLY) Line 1374  if (filenames == FN_NOMATCH_ONLY)
1374    
1375  if (count_only)  if (count_only)
1376    {    {
1377    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
1378    fprintf(stdout, "%d\n", count);      {
1379        if (printname != NULL && filenames != FN_NONE)
1380          fprintf(stdout, "%s:", printname);
1381        fprintf(stdout, "%d\n", count);
1382        }
1383    }    }
1384    
1385  return rc;  return rc;
# Line 1530  an attempt to read a .bz2 file indicates Line 1543  an attempt to read a .bz2 file indicates
1543  PLAIN_FILE:  PLAIN_FILE:
1544  #endif  #endif
1545    {    {
1546    in = fopen(pathname, "r");    in = fopen(pathname, "rb");
1547    handle = (void *)in;    handle = (void *)in;
1548    frtype = FR_PLAIN;    frtype = FR_PLAIN;
1549    }    }
# Line 1686  switch(letter) Line 1699  switch(letter)
1699    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
1700    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
1701    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1702    case 'l': filenames = FN_ONLY; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1703    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1704    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1705    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
# Line 1871  const char *error; Line 1884  const char *error;
1884    
1885  /* Set the default line ending value from the default in the PCRE library;  /* Set the default line ending value from the default in the PCRE library;
1886  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1887  */  Note that the return values from pcre_config(), though derived from the ASCII
1888    codes, are the same in EBCDIC environments, so we must use the actual values
1889    rather than escapes such as as '\r'. */
1890    
1891  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1892  switch(i)  switch(i)
1893    {    {
1894    default:                 newline = (char *)"lf"; break;    default:               newline = (char *)"lf"; break;
1895    case '\r':               newline = (char *)"cr"; break;    case 13:               newline = (char *)"cr"; break;
1896    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case (13 << 8) | 10:   newline = (char *)"crlf"; break;
1897    case -1:                 newline = (char *)"any"; break;    case -1:               newline = (char *)"any"; break;
1898    case -2:                 newline = (char *)"anycrlf"; break;    case -2:               newline = (char *)"anycrlf"; break;
1899    }    }
1900    
1901  /* Process the options */  /* Process the options */
# Line 1922  for (i = 1; i < argc; i++) Line 1937  for (i = 1; i < argc; i++)
1937      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
1938      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
1939      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1940      These options are entered in the table as "regex(p)". No option is in both      These options are entered in the table as "regex(p)". Options can be in
1941      these categories, fortunately. */      both these categories. */
1942    
1943      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
1944        {        {
1945        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
1946        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
1947        if (opbra == NULL)     /* Not a (p) case */  
1948          /* Handle options with only one spelling of the name */
1949    
1950          if (opbra == NULL)     /* Does not contain '(' */
1951          {          {
1952          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
1953            {            {
# Line 1951  for (i = 1; i < argc; i++) Line 1969  for (i = 1; i < argc; i++)
1969              }              }
1970            }            }
1971          }          }
1972        else                   /* Special case xxxx(p) */  
1973          /* Handle options with an alternate spelling of the name */
1974    
1975          else
1976          {          {
1977          char buff1[24];          char buff1[24];
1978          char buff2[24];          char buff2[24];
1979    
1980          int baselen = opbra - op->long_name;          int baselen = opbra - op->long_name;
1981            int fulllen = strchr(op->long_name, ')') - op->long_name + 1;
1982            int arglen = (argequals == NULL || equals == NULL)?
1983              (int)strlen(arg) : argequals - arg;
1984    
1985          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
1986          sprintf(buff2, "%s%.*s", buff1,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
1987            (int)strlen(op->long_name) - baselen - 2, opbra + 1);  
1988          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
1989               strncmp(arg, buff2, arglen) == 0)
1990              {
1991              if (equals != NULL && argequals != NULL)
1992                {
1993                option_data = argequals;
1994                if (*option_data == '=')
1995                  {
1996                  option_data++;
1997                  longopwasequals = TRUE;
1998                  }
1999                }
2000            break;            break;
2001              }
2002          }          }
2003        }        }
2004    
# Line 1971  for (i = 1; i < argc; i++) Line 2009  for (i = 1; i < argc; i++)
2009        }        }
2010      }      }
2011    
   
2012    /* Jeffrey Friedl's debugging harness uses these additional options which    /* Jeffrey Friedl's debugging harness uses these additional options which
2013    are not in the right form for putting in the option table because they use    are not in the right form for putting in the option table because they use
2014    only one hyphen, yet are more than one character long. By putting them    only one hyphen, yet are more than one character long. By putting them

Legend:
Removed from v.379  
changed lines
  Added in v.515

  ViewVC Help
Powered by ViewVC 1.1.5