/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 96 by nigel, Fri Mar 2 13:10:43 2007 UTC revision 236 by ph10, Tue Sep 11 12:57:06 2007 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45  #include <locale.h>  #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
# Line 46  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55  #include <unistd.h>  #include <unistd.h>
56    #endif
57    
 #include "config.h"  
58  #include "pcre.h"  #include "pcre.h"
59    
60  #define FALSE 0  #define FALSE 0
# Line 56  POSSIBILITY OF SUCH DAMAGE. Line 62  POSSIBILITY OF SUCH DAMAGE.
62    
63  typedef int BOOL;  typedef int BOOL;
64    
 #define VERSION "4.4 29-Nov-2006"  
65  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
66    
67  #if BUFSIZ > 8192  #if BUFSIZ > 8192
# Line 84  enum { DEE_READ, DEE_SKIP }; Line 89  enum { DEE_READ, DEE_SKIP };
89    
90  /* Line ending types */  /* Line ending types */
91    
92  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY };  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93    
94    
95    
# Line 117  static char *locale = NULL; Line 122  static char *locale = NULL;
122  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
123    
124  static int  pattern_count = 0;  static int  pattern_count = 0;
125  static pcre **pattern_list;  static pcre **pattern_list = NULL;
126  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
127    
128  static char *include_pattern = NULL;  static char *include_pattern = NULL;
129  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
# Line 192  static option_item optionlist[] = { Line 197  static option_item optionlist[] = {
197    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
198    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
199    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
200    { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LR, CRLF)" },    { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
201    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
202    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
203    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
# Line 222  static const char *prefix[] = { Line 227  static const char *prefix[] = {
227  static const char *suffix[] = {  static const char *suffix[] = {
228    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
229    
230  /* UTF-8 tables - used only when the newline setting is "all". */  /* UTF-8 tables - used only when the newline setting is "any". */
231    
232  const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
233    
# Line 244  although at present the only ones are fo Line 249  although at present the only ones are fo
249    
250  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
251    
252  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
253  #include <sys/types.h>  #include <sys/types.h>
254  #include <sys/stat.h>  #include <sys/stat.h>
255  #include <dirent.h>  #include <dirent.h>
# Line 276  for (;;) Line 281  for (;;)
281    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
282      return dent->d_name;      return dent->d_name;
283    }    }
284  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
285  }  }
286    
287  static void  static void
# Line 314  Lionel Fourquaux. David Burgess added a Line 319  Lionel Fourquaux. David Burgess added a
319  when it did not exist. */  when it did not exist. */
320    
321    
322  #elif HAVE_WIN32API  #elif HAVE_WINDOWS_H
323    
324  #ifndef STRICT  #ifndef STRICT
325  # define STRICT  # define STRICT
# Line 436  FALSE; Line 441  FALSE;
441  typedef void directory_type;  typedef void directory_type;
442    
443  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
444  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
445  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
446  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
447    
448    
# Line 461  return FALSE; Line 466  return FALSE;
466    
467    
468    
469  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
470  /*************************************************  /*************************************************
471  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
472  *************************************************/  *************************************************/
# Line 541  switch(endlinetype) Line 546  switch(endlinetype)
546      }      }
547    break;    break;
548    
549      case EL_ANYCRLF:
550      while (p < endptr)
551        {
552        int extra = 0;
553        register int c = *((unsigned char *)p);
554    
555        if (utf8 && c >= 0xc0)
556          {
557          int gcii, gcss;
558          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
559          gcss = 6*extra;
560          c = (c & utf8_table3[extra]) << gcss;
561          for (gcii = 1; gcii <= extra; gcii++)
562            {
563            gcss -= 6;
564            c |= (p[gcii] & 0x3f) << gcss;
565            }
566          }
567    
568        p += 1 + extra;
569    
570        switch (c)
571          {
572          case 0x0a:    /* LF */
573          *lenptr = 1;
574          return p;
575    
576          case 0x0d:    /* CR */
577          if (p < endptr && *p == 0x0a)
578            {
579            *lenptr = 2;
580            p++;
581            }
582          else *lenptr = 1;
583          return p;
584    
585          default:
586          break;
587          }
588        }   /* End of loop for ANYCRLF case */
589    
590      *lenptr = 0;  /* Must have hit the end */
591      return endptr;
592    
593    case EL_ANY:    case EL_ANY:
594    while (p < endptr)    while (p < endptr)
595      {      {
# Line 639  switch(endlinetype) Line 688  switch(endlinetype)
688    return p;   /* But control should never get here */    return p;   /* But control should never get here */
689    
690    case EL_ANY:    case EL_ANY:
691      case EL_ANYCRLF:
692    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
693    if (utf8) while ((*p & 0xc0) == 0x80) p--;    if (utf8) while ((*p & 0xc0) == 0x80) p--;
694    
# Line 667  switch(endlinetype) Line 717  switch(endlinetype)
717        }        }
718      else c = *((unsigned char *)pp);      else c = *((unsigned char *)pp);
719    
720      switch (c)      if (endlinetype == EL_ANYCRLF) switch (c)
721          {
722          case 0x0a:    /* LF */
723          case 0x0d:    /* CR */
724          return p;
725    
726          default:
727          break;
728          }
729    
730        else switch (c)
731        {        {
732        case 0x0a:    /* LF */        case 0x0a:    /* LF */
733        case 0x0b:    /* VT */        case 0x0b:    /* VT */
# Line 796  while (ptr < endptr) Line 856  while (ptr < endptr)
856    
857    t = end_of_line(t, endptr, &endlinelength);    t = end_of_line(t, endptr, &endlinelength);
858    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
859    length = multiline? endptr - ptr : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
860    
861    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
862    
# Line 1004  while (ptr < endptr) Line 1064  while (ptr < endptr)
1064    
1065        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1066        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1067        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1068        start of the match will always be before the first newline sequence. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1069          the match will always be before the first newline sequence. */
1070    
1071        if (multiline)        if (multiline)
1072          {          {
1073          int ellength;          int ellength;
1074          char *endmatch = ptr + offsets[1];          char *endmatch = ptr;
1075          t = ptr;          if (!invert)
         while (t < endmatch)  
1076            {            {
1077            t = end_of_line(t, endptr, &ellength);            endmatch += offsets[1];
1078            if (t <= endmatch) linenumber++; else break;            t = ptr;
1079              while (t < endmatch)
1080                {
1081                t = end_of_line(t, endptr, &ellength);
1082                if (t <= endmatch) linenumber++; else break;
1083                }
1084            }            }
1085          endmatch = end_of_line(endmatch, endptr, &ellength);          endmatch = end_of_line(endmatch, endptr, &ellength);
1086          linelength = endmatch - ptr - ellength;          linelength = endmatch - ptr - ellength;
# Line 1064  while (ptr < endptr) Line 1129  while (ptr < endptr)
1129      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1130      }      }
1131    
1132      /* For a match in multiline inverted mode (which of course did not cause
1133      anything to be printed), we have to move on to the end of the match before
1134      proceeding. */
1135    
1136      if (multiline && invert && match)
1137        {
1138        int ellength;
1139        char *endmatch = ptr + offsets[1];
1140        t = ptr;
1141        while (t < endmatch)
1142          {
1143          t = end_of_line(t, endptr, &ellength);
1144          if (t <= endmatch) linenumber++; else break;
1145          }
1146        endmatch = end_of_line(endmatch, endptr, &ellength);
1147        linelength = endmatch - ptr - ellength;
1148        }
1149    
1150    /* Advance to after the newline and increment the line number. */    /* Advance to after the newline and increment the line number. */
1151    
1152    ptr += linelength + endlinelength;    ptr += linelength + endlinelength;
# Line 1328  switch(letter) Line 1411  switch(letter)
1411    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
1412    
1413    case 'V':    case 'V':
1414    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
1415    exit(0);    exit(0);
1416    break;    break;
1417    
# Line 1405  sprintf(buffer, "%s%.*s%s", prefix[proce Line 1487  sprintf(buffer, "%s%.*s%s", prefix[proce
1487    suffix[process_options]);    suffix[process_options]);
1488  pattern_list[pattern_count] =  pattern_list[pattern_count] =
1489    pcre_compile(buffer, options, &error, &errptr, pcretables);    pcre_compile(buffer, options, &error, &errptr, pcretables);
1490  if (pattern_list[pattern_count++] != NULL) return TRUE;  if (pattern_list[pattern_count] != NULL)
1491      {
1492      pattern_count++;
1493      return TRUE;
1494      }
1495    
1496  /* Handle compile errors */  /* Handle compile errors */
1497    
# Line 1463  if ((process_options & PO_FIXED_STRINGS) Line 1549  if ((process_options & PO_FIXED_STRINGS)
1549      char *p = end_of_line(pattern, eop, &ellength);      char *p = end_of_line(pattern, eop, &ellength);
1550      if (ellength == 0)      if (ellength == 0)
1551        return compile_single_pattern(pattern, options, filename, count);        return compile_single_pattern(pattern, options, filename, count);
1552      sprintf(buffer, "%.*s", p - pattern - ellength, pattern);      sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1553      pattern = p;      pattern = p;
1554      if (!compile_single_pattern(buffer, options, filename, count))      if (!compile_single_pattern(buffer, options, filename, count))
1555        return FALSE;        return FALSE;
# Line 1487  int i, j; Line 1573  int i, j;
1573  int rc = 1;  int rc = 1;
1574  int pcre_options = 0;  int pcre_options = 0;
1575  int cmd_pattern_count = 0;  int cmd_pattern_count = 0;
1576    int hint_count = 0;
1577  int errptr;  int errptr;
1578  BOOL only_one_at_top;  BOOL only_one_at_top;
1579  char *patterns[MAX_PATTERN_COUNT];  char *patterns[MAX_PATTERN_COUNT];
# Line 1504  switch(i) Line 1591  switch(i)
1591    case '\r':               newline = (char *)"cr"; break;    case '\r':               newline = (char *)"cr"; break;
1592    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1593    case -1:                 newline = (char *)"any"; break;    case -1:                 newline = (char *)"any"; break;
1594      case -2:                 newline = (char *)"anycrlf"; break;
1595    }    }
1596    
1597  /* Process the options */  /* Process the options */
# Line 1561  for (i = 1; i < argc; i++) Line 1649  for (i = 1; i < argc; i++)
1649          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
1650            {            {
1651            int oplen = equals - op->long_name;            int oplen = equals - op->long_name;
1652            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1653            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1654              {              {
1655              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1580  for (i = 1; i < argc; i++) Line 1668  for (i = 1; i < argc; i++)
1668          char buff2[24];          char buff2[24];
1669          int baselen = opbra - op->long_name;          int baselen = opbra - op->long_name;
1670          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
1671          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1,
1672            opbra + 1);            (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1673          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1674            break;            break;
1675          }          }
# Line 1811  else if (strcmp(newline, "any") == 0 || Line 1899  else if (strcmp(newline, "any") == 0 ||
1899    pcre_options |= PCRE_NEWLINE_ANY;    pcre_options |= PCRE_NEWLINE_ANY;
1900    endlinetype = EL_ANY;    endlinetype = EL_ANY;
1901    }    }
1902    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1903      {
1904      pcre_options |= PCRE_NEWLINE_ANYCRLF;
1905      endlinetype = EL_ANYCRLF;
1906      }
1907  else  else
1908    {    {
1909    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
# Line 1865  hints_list = (pcre_extra **)malloc(MAX_P Line 1958  hints_list = (pcre_extra **)malloc(MAX_P
1958  if (pattern_list == NULL || hints_list == NULL)  if (pattern_list == NULL || hints_list == NULL)
1959    {    {
1960    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
1961    return 2;    goto EXIT2;
1962    }    }
1963    
1964  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there is no file provided by -f,
# Line 1884  for (j = 0; j < cmd_pattern_count; j++) Line 1977  for (j = 0; j < cmd_pattern_count; j++)
1977    {    {
1978    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(patterns[j], pcre_options, NULL,
1979         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1980      return 2;      goto EXIT2;
1981    }    }
1982    
1983  /* Compile the regular expressions that are provided in a file. */  /* Compile the regular expressions that are provided in a file. */
# Line 1908  if (pattern_filename != NULL) Line 2001  if (pattern_filename != NULL)
2001        {        {
2002        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2003          strerror(errno));          strerror(errno));
2004        return 2;        goto EXIT2;
2005        }        }
2006      filename = pattern_filename;      filename = pattern_filename;
2007      }      }
# Line 1921  if (pattern_filename != NULL) Line 2014  if (pattern_filename != NULL)
2014      linenumber++;      linenumber++;
2015      if (buffer[0] == 0) continue;   /* Skip blank lines */      if (buffer[0] == 0) continue;   /* Skip blank lines */
2016      if (!compile_pattern(buffer, pcre_options, filename, linenumber))      if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2017        return 2;        goto EXIT2;
2018      }      }
2019    
2020    if (f != stdin) fclose(f);    if (f != stdin) fclose(f);
# Line 1937  for (j = 0; j < pattern_count; j++) Line 2030  for (j = 0; j < pattern_count; j++)
2030      char s[16];      char s[16];
2031      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2032      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2033      return 2;      goto EXIT2;
2034      }      }
2035      hint_count++;
2036    }    }
2037    
2038  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
# Line 1951  if (exclude_pattern != NULL) Line 2045  if (exclude_pattern != NULL)
2045      {      {
2046      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2047        errptr, error);        errptr, error);
2048      return 2;      goto EXIT2;
2049      }      }
2050    }    }
2051    
# Line 1963  if (include_pattern != NULL) Line 2057  if (include_pattern != NULL)
2057      {      {
2058      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2059        errptr, error);        errptr, error);
2060      return 2;      goto EXIT2;
2061      }      }
2062    }    }
2063    
2064  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */
2065    
2066  if (i >= argc)  if (i >= argc)
2067    return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    {
2068      rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2069      goto EXIT;
2070      }
2071    
2072  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2073  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
# Line 1987  for (; i < argc; i++) Line 2084  for (; i < argc; i++)
2084      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
2085    }    }
2086    
2087    EXIT:
2088    if (pattern_list != NULL)
2089      {
2090      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2091      free(pattern_list);
2092      }
2093    if (hints_list != NULL)
2094      {
2095      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2096      free(hints_list);
2097      }
2098  return rc;  return rc;
2099    
2100    EXIT2:
2101    rc = 2;
2102    goto EXIT;
2103  }  }
2104    
2105  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.96  
changed lines
  Added in v.236

  ViewVC Help
Powered by ViewVC 1.1.5