/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 97 by ph10, Mon Mar 5 12:36:47 2007 UTC revision 199 by ph10, Tue Jul 31 14:39:09 2007 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  */  */
39    
40  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
41  #  include <config.h>  #include <config.h>
42  #endif  #endif
43    
44  #include <ctype.h>  #include <ctype.h>
# Line 50  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55  #include <unistd.h>  #include <unistd.h>
56    #endif
57    
58  #include "pcre.h"  #include <pcre.h>
59    
60  #define FALSE 0  #define FALSE 0
61  #define TRUE 1  #define TRUE 1
# Line 86  enum { DEE_READ, DEE_SKIP }; Line 89  enum { DEE_READ, DEE_SKIP };
89    
90  /* Line ending types */  /* Line ending types */
91    
92  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY };  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93    
94    
95    
# Line 119  static char *locale = NULL; Line 122  static char *locale = NULL;
122  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
123    
124  static int  pattern_count = 0;  static int  pattern_count = 0;
125  static pcre **pattern_list;  static pcre **pattern_list = NULL;
126  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
127    
128  static char *include_pattern = NULL;  static char *include_pattern = NULL;
129  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
# Line 194  static option_item optionlist[] = { Line 197  static option_item optionlist[] = {
197    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
198    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
199    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
200    { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LR, CRLF)" },    { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
201    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
202    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
203    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
# Line 224  static const char *prefix[] = { Line 227  static const char *prefix[] = {
227  static const char *suffix[] = {  static const char *suffix[] = {
228    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
229    
230  /* UTF-8 tables - used only when the newline setting is "all". */  /* UTF-8 tables - used only when the newline setting is "any". */
231    
232  const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
233    
# Line 278  for (;;) Line 281  for (;;)
281    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
282      return dent->d_name;      return dent->d_name;
283    }    }
284  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
285  }  }
286    
287  static void  static void
# Line 463  return FALSE; Line 466  return FALSE;
466    
467    
468    
469  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
470  /*************************************************  /*************************************************
471  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
472  *************************************************/  *************************************************/
# Line 543  switch(endlinetype) Line 546  switch(endlinetype)
546      }      }
547    break;    break;
548    
549      case EL_ANYCRLF:
550      while (p < endptr)
551        {
552        int extra = 0;
553        register int c = *((unsigned char *)p);
554    
555        if (utf8 && c >= 0xc0)
556          {
557          int gcii, gcss;
558          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
559          gcss = 6*extra;
560          c = (c & utf8_table3[extra]) << gcss;
561          for (gcii = 1; gcii <= extra; gcii++)
562            {
563            gcss -= 6;
564            c |= (p[gcii] & 0x3f) << gcss;
565            }
566          }
567    
568        p += 1 + extra;
569    
570        switch (c)
571          {
572          case 0x0a:    /* LF */
573          *lenptr = 1;
574          return p;
575    
576          case 0x0d:    /* CR */
577          if (p < endptr && *p == 0x0a)
578            {
579            *lenptr = 2;
580            p++;
581            }
582          else *lenptr = 1;
583          return p;
584    
585          default:
586          break;
587          }
588        }   /* End of loop for ANYCRLF case */
589    
590      *lenptr = 0;  /* Must have hit the end */
591      return endptr;
592    
593    case EL_ANY:    case EL_ANY:
594    while (p < endptr)    while (p < endptr)
595      {      {
# Line 641  switch(endlinetype) Line 688  switch(endlinetype)
688    return p;   /* But control should never get here */    return p;   /* But control should never get here */
689    
690    case EL_ANY:    case EL_ANY:
691      case EL_ANYCRLF:
692    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
693    if (utf8) while ((*p & 0xc0) == 0x80) p--;    if (utf8) while ((*p & 0xc0) == 0x80) p--;
694    
# Line 669  switch(endlinetype) Line 717  switch(endlinetype)
717        }        }
718      else c = *((unsigned char *)pp);      else c = *((unsigned char *)pp);
719    
720      switch (c)      if (endlinetype == EL_ANYCRLF) switch (c)
721          {
722          case 0x0a:    /* LF */
723          case 0x0d:    /* CR */
724          return p;
725    
726          default:
727          break;
728          }
729    
730        else switch (c)
731        {        {
732        case 0x0a:    /* LF */        case 0x0a:    /* LF */
733        case 0x0b:    /* VT */        case 0x0b:    /* VT */
# Line 798  while (ptr < endptr) Line 856  while (ptr < endptr)
856    
857    t = end_of_line(t, endptr, &endlinelength);    t = end_of_line(t, endptr, &endlinelength);
858    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
859    length = multiline? endptr - ptr : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
860    
861    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
862    
# Line 1406  sprintf(buffer, "%s%.*s%s", prefix[proce Line 1464  sprintf(buffer, "%s%.*s%s", prefix[proce
1464    suffix[process_options]);    suffix[process_options]);
1465  pattern_list[pattern_count] =  pattern_list[pattern_count] =
1466    pcre_compile(buffer, options, &error, &errptr, pcretables);    pcre_compile(buffer, options, &error, &errptr, pcretables);
1467  if (pattern_list[pattern_count++] != NULL) return TRUE;  if (pattern_list[pattern_count] != NULL)
1468      {
1469      pattern_count++;
1470      return TRUE;
1471      }
1472    
1473  /* Handle compile errors */  /* Handle compile errors */
1474    
# Line 1464  if ((process_options & PO_FIXED_STRINGS) Line 1526  if ((process_options & PO_FIXED_STRINGS)
1526      char *p = end_of_line(pattern, eop, &ellength);      char *p = end_of_line(pattern, eop, &ellength);
1527      if (ellength == 0)      if (ellength == 0)
1528        return compile_single_pattern(pattern, options, filename, count);        return compile_single_pattern(pattern, options, filename, count);
1529      sprintf(buffer, "%.*s", p - pattern - ellength, pattern);      sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1530      pattern = p;      pattern = p;
1531      if (!compile_single_pattern(buffer, options, filename, count))      if (!compile_single_pattern(buffer, options, filename, count))
1532        return FALSE;        return FALSE;
# Line 1488  int i, j; Line 1550  int i, j;
1550  int rc = 1;  int rc = 1;
1551  int pcre_options = 0;  int pcre_options = 0;
1552  int cmd_pattern_count = 0;  int cmd_pattern_count = 0;
1553    int hint_count = 0;
1554  int errptr;  int errptr;
1555  BOOL only_one_at_top;  BOOL only_one_at_top;
1556  char *patterns[MAX_PATTERN_COUNT];  char *patterns[MAX_PATTERN_COUNT];
# Line 1505  switch(i) Line 1568  switch(i)
1568    case '\r':               newline = (char *)"cr"; break;    case '\r':               newline = (char *)"cr"; break;
1569    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1570    case -1:                 newline = (char *)"any"; break;    case -1:                 newline = (char *)"any"; break;
1571      case -2:                 newline = (char *)"anycrlf"; break;
1572    }    }
1573    
1574  /* Process the options */  /* Process the options */
# Line 1562  for (i = 1; i < argc; i++) Line 1626  for (i = 1; i < argc; i++)
1626          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
1627            {            {
1628            int oplen = equals - op->long_name;            int oplen = equals - op->long_name;
1629            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1630            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1631              {              {
1632              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1581  for (i = 1; i < argc; i++) Line 1645  for (i = 1; i < argc; i++)
1645          char buff2[24];          char buff2[24];
1646          int baselen = opbra - op->long_name;          int baselen = opbra - op->long_name;
1647          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
1648          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1,
1649            opbra + 1);            (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1650          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1651            break;            break;
1652          }          }
# Line 1812  else if (strcmp(newline, "any") == 0 || Line 1876  else if (strcmp(newline, "any") == 0 ||
1876    pcre_options |= PCRE_NEWLINE_ANY;    pcre_options |= PCRE_NEWLINE_ANY;
1877    endlinetype = EL_ANY;    endlinetype = EL_ANY;
1878    }    }
1879    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1880      {
1881      pcre_options |= PCRE_NEWLINE_ANYCRLF;
1882      endlinetype = EL_ANYCRLF;
1883      }
1884  else  else
1885    {    {
1886    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
# Line 1866  hints_list = (pcre_extra **)malloc(MAX_P Line 1935  hints_list = (pcre_extra **)malloc(MAX_P
1935  if (pattern_list == NULL || hints_list == NULL)  if (pattern_list == NULL || hints_list == NULL)
1936    {    {
1937    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
1938    return 2;    goto EXIT2;
1939    }    }
1940    
1941  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there is no file provided by -f,
# Line 1885  for (j = 0; j < cmd_pattern_count; j++) Line 1954  for (j = 0; j < cmd_pattern_count; j++)
1954    {    {
1955    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(patterns[j], pcre_options, NULL,
1956         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1957      return 2;      goto EXIT2;
1958    }    }
1959    
1960  /* Compile the regular expressions that are provided in a file. */  /* Compile the regular expressions that are provided in a file. */
# Line 1909  if (pattern_filename != NULL) Line 1978  if (pattern_filename != NULL)
1978        {        {
1979        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1980          strerror(errno));          strerror(errno));
1981        return 2;        goto EXIT2;
1982        }        }
1983      filename = pattern_filename;      filename = pattern_filename;
1984      }      }
# Line 1922  if (pattern_filename != NULL) Line 1991  if (pattern_filename != NULL)
1991      linenumber++;      linenumber++;
1992      if (buffer[0] == 0) continue;   /* Skip blank lines */      if (buffer[0] == 0) continue;   /* Skip blank lines */
1993      if (!compile_pattern(buffer, pcre_options, filename, linenumber))      if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1994        return 2;        goto EXIT2;
1995      }      }
1996    
1997    if (f != stdin) fclose(f);    if (f != stdin) fclose(f);
# Line 1938  for (j = 0; j < pattern_count; j++) Line 2007  for (j = 0; j < pattern_count; j++)
2007      char s[16];      char s[16];
2008      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2009      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2010      return 2;      goto EXIT2;
2011      }      }
2012      hint_count++;
2013    }    }
2014    
2015  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
# Line 1952  if (exclude_pattern != NULL) Line 2022  if (exclude_pattern != NULL)
2022      {      {
2023      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2024        errptr, error);        errptr, error);
2025      return 2;      goto EXIT2;
2026      }      }
2027    }    }
2028    
# Line 1964  if (include_pattern != NULL) Line 2034  if (include_pattern != NULL)
2034      {      {
2035      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2036        errptr, error);        errptr, error);
2037      return 2;      goto EXIT2;
2038      }      }
2039    }    }
2040    
2041  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */
2042    
2043  if (i >= argc)  if (i >= argc)
2044    return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    {
2045      rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2046      goto EXIT;
2047      }
2048    
2049  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2050  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
# Line 1988  for (; i < argc; i++) Line 2061  for (; i < argc; i++)
2061      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
2062    }    }
2063    
2064    EXIT:
2065    if (pattern_list != NULL)
2066      {
2067      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2068      free(pattern_list);
2069      }
2070    if (hints_list != NULL)
2071      {
2072      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2073      free(hints_list);
2074      }
2075  return rc;  return rc;
2076    
2077    EXIT2:
2078    rc = 2;
2079    goto EXIT;
2080  }  }
2081    
2082  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.97  
changed lines
  Added in v.199

  ViewVC Help
Powered by ViewVC 1.1.5