/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 93 by nigel, Sat Feb 24 21:41:42 2007 UTC revision 152 by ph10, Tue Apr 17 15:55:53 2007 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #  include <config.h>
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45  #include <locale.h>  #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
# Line 46  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53  #include <unistd.h>  #ifdef HAVE_UNISTD_H
54    #  include <unistd.h>
55    #endif
56    
57  #include "config.h"  #include <pcre.h>
 #include "pcre.h"  
58    
59  #define FALSE 0  #define FALSE 0
60  #define TRUE 1  #define TRUE 1
61    
62  typedef int BOOL;  typedef int BOOL;
63    
 #define VERSION "4.4 29-Nov-2006"  
64  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
65    
66  #if BUFSIZ > 8192  #if BUFSIZ > 8192
# Line 84  enum { DEE_READ, DEE_SKIP }; Line 88  enum { DEE_READ, DEE_SKIP };
88    
89  /* Line ending types */  /* Line ending types */
90    
91  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY };  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
92    
93    
94    
# Line 117  static char *locale = NULL; Line 121  static char *locale = NULL;
121  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
122    
123  static int  pattern_count = 0;  static int  pattern_count = 0;
124  static pcre **pattern_list;  static pcre **pattern_list = NULL;
125  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
126    
127  static char *include_pattern = NULL;  static char *include_pattern = NULL;
128  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
# Line 192  static option_item optionlist[] = { Line 196  static option_item optionlist[] = {
196    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
197    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
198    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
199    { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LR, CRLF)" },    { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
200    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
201    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
202    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
# Line 222  static const char *prefix[] = { Line 226  static const char *prefix[] = {
226  static const char *suffix[] = {  static const char *suffix[] = {
227    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
228    
229  /* UTF-8 tables - used only when the newline setting is "all". */  /* UTF-8 tables - used only when the newline setting is "any". */
230    
231  const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
232    
# Line 244  although at present the only ones are fo Line 248  although at present the only ones are fo
248    
249  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
250    
251  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
252  #include <sys/types.h>  #include <sys/types.h>
253  #include <sys/stat.h>  #include <sys/stat.h>
254  #include <dirent.h>  #include <dirent.h>
# Line 276  for (;;) Line 280  for (;;)
280    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
281      return dent->d_name;      return dent->d_name;
282    }    }
283  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
284  }  }
285    
286  static void  static void
# Line 314  Lionel Fourquaux. David Burgess added a Line 318  Lionel Fourquaux. David Burgess added a
318  when it did not exist. */  when it did not exist. */
319    
320    
321  #elif HAVE_WIN32API  #elif HAVE_WINDOWS_H
322    
323  #ifndef STRICT  #ifndef STRICT
324  # define STRICT  # define STRICT
# Line 436  FALSE; Line 440  FALSE;
440  typedef void directory_type;  typedef void directory_type;
441    
442  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
443  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
444  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
445  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
446    
447    
# Line 461  return FALSE; Line 465  return FALSE;
465    
466    
467    
468  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
469  /*************************************************  /*************************************************
470  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
471  *************************************************/  *************************************************/
# Line 541  switch(endlinetype) Line 545  switch(endlinetype)
545      }      }
546    break;    break;
547    
548      case EL_ANYCRLF:
549      while (p < endptr)
550        {
551        int extra = 0;
552        register int c = *((unsigned char *)p);
553    
554        if (utf8 && c >= 0xc0)
555          {
556          int gcii, gcss;
557          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
558          gcss = 6*extra;
559          c = (c & utf8_table3[extra]) << gcss;
560          for (gcii = 1; gcii <= extra; gcii++)
561            {
562            gcss -= 6;
563            c |= (p[gcii] & 0x3f) << gcss;
564            }
565          }
566    
567        p += 1 + extra;
568    
569        switch (c)
570          {
571          case 0x0a:    /* LF */
572          *lenptr = 1;
573          return p;
574    
575          case 0x0d:    /* CR */
576          if (p < endptr && *p == 0x0a)
577            {
578            *lenptr = 2;
579            p++;
580            }
581          else *lenptr = 1;
582          return p;
583    
584          default:
585          break;
586          }
587        }   /* End of loop for ANYCRLF case */
588    
589      *lenptr = 0;  /* Must have hit the end */
590      return endptr;
591    
592    case EL_ANY:    case EL_ANY:
593    while (p < endptr)    while (p < endptr)
594      {      {
# Line 639  switch(endlinetype) Line 687  switch(endlinetype)
687    return p;   /* But control should never get here */    return p;   /* But control should never get here */
688    
689    case EL_ANY:    case EL_ANY:
690      case EL_ANYCRLF:
691    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
692    if (utf8) while ((*p & 0xc0) == 0x80) p--;    if (utf8) while ((*p & 0xc0) == 0x80) p--;
693    
# Line 667  switch(endlinetype) Line 716  switch(endlinetype)
716        }        }
717      else c = *((unsigned char *)pp);      else c = *((unsigned char *)pp);
718    
719      switch (c)      if (endlinetype == EL_ANYCRLF) switch (c)
720          {
721          case 0x0a:    /* LF */
722          case 0x0d:    /* CR */
723          return p;
724    
725          default:
726          break;
727          }
728    
729        else switch (c)
730        {        {
731        case 0x0a:    /* LF */        case 0x0a:    /* LF */
732        case 0x0b:    /* VT */        case 0x0b:    /* VT */
# Line 1328  switch(letter) Line 1387  switch(letter)
1387    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
1388    
1389    case 'V':    case 'V':
1390    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
1391    exit(0);    exit(0);
1392    break;    break;
1393    
# Line 1405  sprintf(buffer, "%s%.*s%s", prefix[proce Line 1463  sprintf(buffer, "%s%.*s%s", prefix[proce
1463    suffix[process_options]);    suffix[process_options]);
1464  pattern_list[pattern_count] =  pattern_list[pattern_count] =
1465    pcre_compile(buffer, options, &error, &errptr, pcretables);    pcre_compile(buffer, options, &error, &errptr, pcretables);
1466  if (pattern_list[pattern_count++] != NULL) return TRUE;  if (pattern_list[pattern_count] != NULL)
1467      {
1468      pattern_count++;
1469      return TRUE;
1470      }
1471    
1472  /* Handle compile errors */  /* Handle compile errors */
1473    
# Line 1463  if ((process_options & PO_FIXED_STRINGS) Line 1525  if ((process_options & PO_FIXED_STRINGS)
1525      char *p = end_of_line(pattern, eop, &ellength);      char *p = end_of_line(pattern, eop, &ellength);
1526      if (ellength == 0)      if (ellength == 0)
1527        return compile_single_pattern(pattern, options, filename, count);        return compile_single_pattern(pattern, options, filename, count);
1528      sprintf(buffer, "%.*s", p - pattern - ellength, pattern);      sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1529      pattern = p;      pattern = p;
1530      if (!compile_single_pattern(buffer, options, filename, count))      if (!compile_single_pattern(buffer, options, filename, count))
1531        return FALSE;        return FALSE;
# Line 1487  int i, j; Line 1549  int i, j;
1549  int rc = 1;  int rc = 1;
1550  int pcre_options = 0;  int pcre_options = 0;
1551  int cmd_pattern_count = 0;  int cmd_pattern_count = 0;
1552    int hint_count = 0;
1553  int errptr;  int errptr;
1554  BOOL only_one_at_top;  BOOL only_one_at_top;
1555  char *patterns[MAX_PATTERN_COUNT];  char *patterns[MAX_PATTERN_COUNT];
# Line 1504  switch(i) Line 1567  switch(i)
1567    case '\r':               newline = (char *)"cr"; break;    case '\r':               newline = (char *)"cr"; break;
1568    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1569    case -1:                 newline = (char *)"any"; break;    case -1:                 newline = (char *)"any"; break;
1570      case -2:                 newline = (char *)"anycrlf"; break;
1571    }    }
1572    
1573  /* Process the options */  /* Process the options */
# Line 1580  for (i = 1; i < argc; i++) Line 1644  for (i = 1; i < argc; i++)
1644          char buff2[24];          char buff2[24];
1645          int baselen = opbra - op->long_name;          int baselen = opbra - op->long_name;
1646          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
1647          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1,
1648            opbra + 1);            (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1649          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1650            break;            break;
1651          }          }
# Line 1811  else if (strcmp(newline, "any") == 0 || Line 1875  else if (strcmp(newline, "any") == 0 ||
1875    pcre_options |= PCRE_NEWLINE_ANY;    pcre_options |= PCRE_NEWLINE_ANY;
1876    endlinetype = EL_ANY;    endlinetype = EL_ANY;
1877    }    }
1878    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1879      {
1880      pcre_options |= PCRE_NEWLINE_ANYCRLF;
1881      endlinetype = EL_ANYCRLF;
1882      }
1883  else  else
1884    {    {
1885    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
# Line 1865  hints_list = (pcre_extra **)malloc(MAX_P Line 1934  hints_list = (pcre_extra **)malloc(MAX_P
1934  if (pattern_list == NULL || hints_list == NULL)  if (pattern_list == NULL || hints_list == NULL)
1935    {    {
1936    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
1937    return 2;    goto EXIT2;
1938    }    }
1939    
1940  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there is no file provided by -f,
# Line 1884  for (j = 0; j < cmd_pattern_count; j++) Line 1953  for (j = 0; j < cmd_pattern_count; j++)
1953    {    {
1954    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(patterns[j], pcre_options, NULL,
1955         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1956      return 2;      goto EXIT2;
1957    }    }
1958    
1959  /* Compile the regular expressions that are provided in a file. */  /* Compile the regular expressions that are provided in a file. */
# Line 1908  if (pattern_filename != NULL) Line 1977  if (pattern_filename != NULL)
1977        {        {
1978        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1979          strerror(errno));          strerror(errno));
1980        return 2;        goto EXIT2;
1981        }        }
1982      filename = pattern_filename;      filename = pattern_filename;
1983      }      }
# Line 1921  if (pattern_filename != NULL) Line 1990  if (pattern_filename != NULL)
1990      linenumber++;      linenumber++;
1991      if (buffer[0] == 0) continue;   /* Skip blank lines */      if (buffer[0] == 0) continue;   /* Skip blank lines */
1992      if (!compile_pattern(buffer, pcre_options, filename, linenumber))      if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1993        return 2;        goto EXIT2;
1994      }      }
1995    
1996    if (f != stdin) fclose(f);    if (f != stdin) fclose(f);
# Line 1937  for (j = 0; j < pattern_count; j++) Line 2006  for (j = 0; j < pattern_count; j++)
2006      char s[16];      char s[16];
2007      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2008      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2009      return 2;      goto EXIT2;
2010      }      }
2011      hint_count++;
2012    }    }
2013    
2014  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
# Line 1951  if (exclude_pattern != NULL) Line 2021  if (exclude_pattern != NULL)
2021      {      {
2022      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2023        errptr, error);        errptr, error);
2024      return 2;      goto EXIT2;
2025      }      }
2026    }    }
2027    
# Line 1963  if (include_pattern != NULL) Line 2033  if (include_pattern != NULL)
2033      {      {
2034      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2035        errptr, error);        errptr, error);
2036      return 2;      goto EXIT2;
2037      }      }
2038    }    }
2039    
2040  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */
2041    
2042  if (i >= argc)  if (i >= argc)
2043    return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    {
2044      rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2045      goto EXIT;
2046      }
2047    
2048  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2049  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
# Line 1987  for (; i < argc; i++) Line 2060  for (; i < argc; i++)
2060      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
2061    }    }
2062    
2063    EXIT:
2064    if (pattern_list != NULL)
2065      {
2066      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2067      free(pattern_list);
2068      }
2069    if (hints_list != NULL)
2070      {
2071      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2072      free(hints_list);
2073      }
2074  return rc;  return rc;
2075    
2076    EXIT2:
2077    rc = 2;
2078    goto EXIT;
2079  }  }
2080    
2081  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.93  
changed lines
  Added in v.152

  ViewVC Help
Powered by ViewVC 1.1.5