/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 117 by ph10, Fri Mar 9 15:59:06 2007 UTC revision 243 by ph10, Thu Sep 13 09:28:14 2007 UTC
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  */  */
39    
40  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
41  #  include <config.h>  #include "config.h"
42  #endif  #endif
43    
44  #include <ctype.h>  #include <ctype.h>
# Line 50  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55  #include <unistd.h>  #include <unistd.h>
56    #endif
57    
58  #include "pcre.h"  #include "pcre.h"
59    
# Line 86  enum { DEE_READ, DEE_SKIP }; Line 89  enum { DEE_READ, DEE_SKIP };
89    
90  /* Line ending types */  /* Line ending types */
91    
92  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY };  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93    
94    
95    
# Line 119  static char *locale = NULL; Line 122  static char *locale = NULL;
122  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
123    
124  static int  pattern_count = 0;  static int  pattern_count = 0;
125  static pcre **pattern_list;  static pcre **pattern_list = NULL;
126  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
127    
128  static char *include_pattern = NULL;  static char *include_pattern = NULL;
129  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
# Line 194  static option_item optionlist[] = { Line 197  static option_item optionlist[] = {
197    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
198    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
199    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
200    { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LR, CRLF)" },    { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
201    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
202    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
203    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
# Line 224  static const char *prefix[] = { Line 227  static const char *prefix[] = {
227  static const char *suffix[] = {  static const char *suffix[] = {
228    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
229    
230  /* UTF-8 tables - used only when the newline setting is "all". */  /* UTF-8 tables - used only when the newline setting is "any". */
231    
232  const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
233    
# Line 278  for (;;) Line 281  for (;;)
281    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
282      return dent->d_name;      return dent->d_name;
283    }    }
284  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
285  }  }
286    
287  static void  static void
# Line 463  return FALSE; Line 466  return FALSE;
466    
467    
468    
469  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
470  /*************************************************  /*************************************************
471  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
472  *************************************************/  *************************************************/
# Line 543  switch(endlinetype) Line 546  switch(endlinetype)
546      }      }
547    break;    break;
548    
549      case EL_ANYCRLF:
550      while (p < endptr)
551        {
552        int extra = 0;
553        register int c = *((unsigned char *)p);
554    
555        if (utf8 && c >= 0xc0)
556          {
557          int gcii, gcss;
558          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
559          gcss = 6*extra;
560          c = (c & utf8_table3[extra]) << gcss;
561          for (gcii = 1; gcii <= extra; gcii++)
562            {
563            gcss -= 6;
564            c |= (p[gcii] & 0x3f) << gcss;
565            }
566          }
567    
568        p += 1 + extra;
569    
570        switch (c)
571          {
572          case 0x0a:    /* LF */
573          *lenptr = 1;
574          return p;
575    
576          case 0x0d:    /* CR */
577          if (p < endptr && *p == 0x0a)
578            {
579            *lenptr = 2;
580            p++;
581            }
582          else *lenptr = 1;
583          return p;
584    
585          default:
586          break;
587          }
588        }   /* End of loop for ANYCRLF case */
589    
590      *lenptr = 0;  /* Must have hit the end */
591      return endptr;
592    
593    case EL_ANY:    case EL_ANY:
594    while (p < endptr)    while (p < endptr)
595      {      {
# Line 641  switch(endlinetype) Line 688  switch(endlinetype)
688    return p;   /* But control should never get here */    return p;   /* But control should never get here */
689    
690    case EL_ANY:    case EL_ANY:
691      case EL_ANYCRLF:
692    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
693    if (utf8) while ((*p & 0xc0) == 0x80) p--;    if (utf8) while ((*p & 0xc0) == 0x80) p--;
694    
# Line 669  switch(endlinetype) Line 717  switch(endlinetype)
717        }        }
718      else c = *((unsigned char *)pp);      else c = *((unsigned char *)pp);
719    
720      switch (c)      if (endlinetype == EL_ANYCRLF) switch (c)
721          {
722          case 0x0a:    /* LF */
723          case 0x0d:    /* CR */
724          return p;
725    
726          default:
727          break;
728          }
729    
730        else switch (c)
731        {        {
732        case 0x0a:    /* LF */        case 0x0a:    /* LF */
733        case 0x0b:    /* VT */        case 0x0b:    /* VT */
# Line 798  while (ptr < endptr) Line 856  while (ptr < endptr)
856    
857    t = end_of_line(t, endptr, &endlinelength);    t = end_of_line(t, endptr, &endlinelength);
858    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
859    length = multiline? endptr - ptr : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
860    
861    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
862    
# Line 1006  while (ptr < endptr) Line 1064  while (ptr < endptr)
1064    
1065        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1066        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1067        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1068        start of the match will always be before the first newline sequence. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1069          the match will always be before the first newline sequence. */
1070    
1071        if (multiline)        if (multiline)
1072          {          {
1073          int ellength;          int ellength;
1074          char *endmatch = ptr + offsets[1];          char *endmatch = ptr;
1075          t = ptr;          if (!invert)
         while (t < endmatch)  
1076            {            {
1077            t = end_of_line(t, endptr, &ellength);            endmatch += offsets[1];
1078            if (t <= endmatch) linenumber++; else break;            t = ptr;
1079              while (t < endmatch)
1080                {
1081                t = end_of_line(t, endptr, &ellength);
1082                if (t <= endmatch) linenumber++; else break;
1083                }
1084            }            }
1085          endmatch = end_of_line(endmatch, endptr, &ellength);          endmatch = end_of_line(endmatch, endptr, &ellength);
1086          linelength = endmatch - ptr - ellength;          linelength = endmatch - ptr - ellength;
# Line 1050  while (ptr < endptr) Line 1113  while (ptr < endptr)
1113          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1114          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1115          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1116          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1117              stdout);
1118          }          }
1119        else fwrite(ptr, 1, linelength + endlinelength, stdout);        else fwrite(ptr, 1, linelength + endlinelength, stdout);
1120        }        }
# Line 1066  while (ptr < endptr) Line 1130  while (ptr < endptr)
1130      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1131      }      }
1132    
1133      /* For a match in multiline inverted mode (which of course did not cause
1134      anything to be printed), we have to move on to the end of the match before
1135      proceeding. */
1136    
1137      if (multiline && invert && match)
1138        {
1139        int ellength;
1140        char *endmatch = ptr + offsets[1];
1141        t = ptr;
1142        while (t < endmatch)
1143          {
1144          t = end_of_line(t, endptr, &ellength);
1145          if (t <= endmatch) linenumber++; else break;
1146          }
1147        endmatch = end_of_line(endmatch, endptr, &ellength);
1148        linelength = endmatch - ptr - ellength;
1149        }
1150    
1151    /* Advance to after the newline and increment the line number. */    /* Advance to after the newline and increment the line number. */
1152    
1153    ptr += linelength + endlinelength;    ptr += linelength + endlinelength;
# Line 1406  sprintf(buffer, "%s%.*s%s", prefix[proce Line 1488  sprintf(buffer, "%s%.*s%s", prefix[proce
1488    suffix[process_options]);    suffix[process_options]);
1489  pattern_list[pattern_count] =  pattern_list[pattern_count] =
1490    pcre_compile(buffer, options, &error, &errptr, pcretables);    pcre_compile(buffer, options, &error, &errptr, pcretables);
1491  if (pattern_list[pattern_count++] != NULL) return TRUE;  if (pattern_list[pattern_count] != NULL)
1492      {
1493      pattern_count++;
1494      return TRUE;
1495      }
1496    
1497  /* Handle compile errors */  /* Handle compile errors */
1498    
# Line 1464  if ((process_options & PO_FIXED_STRINGS) Line 1550  if ((process_options & PO_FIXED_STRINGS)
1550      char *p = end_of_line(pattern, eop, &ellength);      char *p = end_of_line(pattern, eop, &ellength);
1551      if (ellength == 0)      if (ellength == 0)
1552        return compile_single_pattern(pattern, options, filename, count);        return compile_single_pattern(pattern, options, filename, count);
1553      sprintf(buffer, "%.*s", p - pattern - ellength, pattern);      sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1554      pattern = p;      pattern = p;
1555      if (!compile_single_pattern(buffer, options, filename, count))      if (!compile_single_pattern(buffer, options, filename, count))
1556        return FALSE;        return FALSE;
# Line 1488  int i, j; Line 1574  int i, j;
1574  int rc = 1;  int rc = 1;
1575  int pcre_options = 0;  int pcre_options = 0;
1576  int cmd_pattern_count = 0;  int cmd_pattern_count = 0;
1577    int hint_count = 0;
1578  int errptr;  int errptr;
1579  BOOL only_one_at_top;  BOOL only_one_at_top;
1580  char *patterns[MAX_PATTERN_COUNT];  char *patterns[MAX_PATTERN_COUNT];
# Line 1505  switch(i) Line 1592  switch(i)
1592    case '\r':               newline = (char *)"cr"; break;    case '\r':               newline = (char *)"cr"; break;
1593    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1594    case -1:                 newline = (char *)"any"; break;    case -1:                 newline = (char *)"any"; break;
1595      case -2:                 newline = (char *)"anycrlf"; break;
1596    }    }
1597    
1598  /* Process the options */  /* Process the options */
# Line 1562  for (i = 1; i < argc; i++) Line 1650  for (i = 1; i < argc; i++)
1650          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
1651            {            {
1652            int oplen = equals - op->long_name;            int oplen = equals - op->long_name;
1653            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1654            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1655              {              {
1656              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1581  for (i = 1; i < argc; i++) Line 1669  for (i = 1; i < argc; i++)
1669          char buff2[24];          char buff2[24];
1670          int baselen = opbra - op->long_name;          int baselen = opbra - op->long_name;
1671          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
1672          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1,
1673            opbra + 1);            (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1674          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1675            break;            break;
1676          }          }
# Line 1812  else if (strcmp(newline, "any") == 0 || Line 1900  else if (strcmp(newline, "any") == 0 ||
1900    pcre_options |= PCRE_NEWLINE_ANY;    pcre_options |= PCRE_NEWLINE_ANY;
1901    endlinetype = EL_ANY;    endlinetype = EL_ANY;
1902    }    }
1903    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1904      {
1905      pcre_options |= PCRE_NEWLINE_ANYCRLF;
1906      endlinetype = EL_ANYCRLF;
1907      }
1908  else  else
1909    {    {
1910    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
# Line 1866  hints_list = (pcre_extra **)malloc(MAX_P Line 1959  hints_list = (pcre_extra **)malloc(MAX_P
1959  if (pattern_list == NULL || hints_list == NULL)  if (pattern_list == NULL || hints_list == NULL)
1960    {    {
1961    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
1962    return 2;    goto EXIT2;
1963    }    }
1964    
1965  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there is no file provided by -f,
# Line 1885  for (j = 0; j < cmd_pattern_count; j++) Line 1978  for (j = 0; j < cmd_pattern_count; j++)
1978    {    {
1979    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(patterns[j], pcre_options, NULL,
1980         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1981      return 2;      goto EXIT2;
1982    }    }
1983    
1984  /* Compile the regular expressions that are provided in a file. */  /* Compile the regular expressions that are provided in a file. */
# Line 1909  if (pattern_filename != NULL) Line 2002  if (pattern_filename != NULL)
2002        {        {
2003        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2004          strerror(errno));          strerror(errno));
2005        return 2;        goto EXIT2;
2006        }        }
2007      filename = pattern_filename;      filename = pattern_filename;
2008      }      }
# Line 1922  if (pattern_filename != NULL) Line 2015  if (pattern_filename != NULL)
2015      linenumber++;      linenumber++;
2016      if (buffer[0] == 0) continue;   /* Skip blank lines */      if (buffer[0] == 0) continue;   /* Skip blank lines */
2017      if (!compile_pattern(buffer, pcre_options, filename, linenumber))      if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2018        return 2;        goto EXIT2;
2019      }      }
2020    
2021    if (f != stdin) fclose(f);    if (f != stdin) fclose(f);
# Line 1938  for (j = 0; j < pattern_count; j++) Line 2031  for (j = 0; j < pattern_count; j++)
2031      char s[16];      char s[16];
2032      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2033      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2034      return 2;      goto EXIT2;
2035      }      }
2036      hint_count++;
2037    }    }
2038    
2039  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
# Line 1952  if (exclude_pattern != NULL) Line 2046  if (exclude_pattern != NULL)
2046      {      {
2047      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2048        errptr, error);        errptr, error);
2049      return 2;      goto EXIT2;
2050      }      }
2051    }    }
2052    
# Line 1964  if (include_pattern != NULL) Line 2058  if (include_pattern != NULL)
2058      {      {
2059      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2060        errptr, error);        errptr, error);
2061      return 2;      goto EXIT2;
2062      }      }
2063    }    }
2064    
2065  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */
2066    
2067  if (i >= argc)  if (i >= argc)
2068    return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    {
2069      rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2070      goto EXIT;
2071      }
2072    
2073  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2074  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
# Line 1988  for (; i < argc; i++) Line 2085  for (; i < argc; i++)
2085      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
2086    }    }
2087    
2088    EXIT:
2089    if (pattern_list != NULL)
2090      {
2091      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2092      free(pattern_list);
2093      }
2094    if (hints_list != NULL)
2095      {
2096      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2097      free(hints_list);
2098      }
2099  return rc;  return rc;
2100    
2101    EXIT2:
2102    rc = 2;
2103    goto EXIT;
2104  }  }
2105    
2106  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.117  
changed lines
  Added in v.243

  ViewVC Help
Powered by ViewVC 1.1.5