/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 121 by ph10, Mon Mar 12 12:12:47 2007 UTC revision 236 by ph10, Tue Sep 11 12:57:06 2007 UTC
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  */  */
39    
40  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
41  #  include <config.h>  #include "config.h"
42  #endif  #endif
43    
44  #include <ctype.h>  #include <ctype.h>
# Line 50  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55  #include <unistd.h>  #include <unistd.h>
56    #endif
57    
58  #include "pcre.h"  #include "pcre.h"
59    
# Line 86  enum { DEE_READ, DEE_SKIP }; Line 89  enum { DEE_READ, DEE_SKIP };
89    
90  /* Line ending types */  /* Line ending types */
91    
92  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY };  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93    
94    
95    
# Line 194  static option_item optionlist[] = { Line 197  static option_item optionlist[] = {
197    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
198    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
199    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
200    { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LR, CRLF)" },    { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
201    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
202    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
203    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
# Line 224  static const char *prefix[] = { Line 227  static const char *prefix[] = {
227  static const char *suffix[] = {  static const char *suffix[] = {
228    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
229    
230  /* UTF-8 tables - used only when the newline setting is "all". */  /* UTF-8 tables - used only when the newline setting is "any". */
231    
232  const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
233    
# Line 278  for (;;) Line 281  for (;;)
281    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
282      return dent->d_name;      return dent->d_name;
283    }    }
284  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
285  }  }
286    
287  static void  static void
# Line 463  return FALSE; Line 466  return FALSE;
466    
467    
468    
469  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
470  /*************************************************  /*************************************************
471  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
472  *************************************************/  *************************************************/
# Line 543  switch(endlinetype) Line 546  switch(endlinetype)
546      }      }
547    break;    break;
548    
549      case EL_ANYCRLF:
550      while (p < endptr)
551        {
552        int extra = 0;
553        register int c = *((unsigned char *)p);
554    
555        if (utf8 && c >= 0xc0)
556          {
557          int gcii, gcss;
558          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
559          gcss = 6*extra;
560          c = (c & utf8_table3[extra]) << gcss;
561          for (gcii = 1; gcii <= extra; gcii++)
562            {
563            gcss -= 6;
564            c |= (p[gcii] & 0x3f) << gcss;
565            }
566          }
567    
568        p += 1 + extra;
569    
570        switch (c)
571          {
572          case 0x0a:    /* LF */
573          *lenptr = 1;
574          return p;
575    
576          case 0x0d:    /* CR */
577          if (p < endptr && *p == 0x0a)
578            {
579            *lenptr = 2;
580            p++;
581            }
582          else *lenptr = 1;
583          return p;
584    
585          default:
586          break;
587          }
588        }   /* End of loop for ANYCRLF case */
589    
590      *lenptr = 0;  /* Must have hit the end */
591      return endptr;
592    
593    case EL_ANY:    case EL_ANY:
594    while (p < endptr)    while (p < endptr)
595      {      {
# Line 641  switch(endlinetype) Line 688  switch(endlinetype)
688    return p;   /* But control should never get here */    return p;   /* But control should never get here */
689    
690    case EL_ANY:    case EL_ANY:
691      case EL_ANYCRLF:
692    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
693    if (utf8) while ((*p & 0xc0) == 0x80) p--;    if (utf8) while ((*p & 0xc0) == 0x80) p--;
694    
# Line 669  switch(endlinetype) Line 717  switch(endlinetype)
717        }        }
718      else c = *((unsigned char *)pp);      else c = *((unsigned char *)pp);
719    
720      switch (c)      if (endlinetype == EL_ANYCRLF) switch (c)
721          {
722          case 0x0a:    /* LF */
723          case 0x0d:    /* CR */
724          return p;
725    
726          default:
727          break;
728          }
729    
730        else switch (c)
731        {        {
732        case 0x0a:    /* LF */        case 0x0a:    /* LF */
733        case 0x0b:    /* VT */        case 0x0b:    /* VT */
# Line 798  while (ptr < endptr) Line 856  while (ptr < endptr)
856    
857    t = end_of_line(t, endptr, &endlinelength);    t = end_of_line(t, endptr, &endlinelength);
858    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
859    length = multiline? endptr - ptr : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
860    
861    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
862    
# Line 1006  while (ptr < endptr) Line 1064  while (ptr < endptr)
1064    
1065        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1066        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1067        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1068        start of the match will always be before the first newline sequence. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1069          the match will always be before the first newline sequence. */
1070    
1071        if (multiline)        if (multiline)
1072          {          {
1073          int ellength;          int ellength;
1074          char *endmatch = ptr + offsets[1];          char *endmatch = ptr;
1075          t = ptr;          if (!invert)
         while (t < endmatch)  
1076            {            {
1077            t = end_of_line(t, endptr, &ellength);            endmatch += offsets[1];
1078            if (t <= endmatch) linenumber++; else break;            t = ptr;
1079              while (t < endmatch)
1080                {
1081                t = end_of_line(t, endptr, &ellength);
1082                if (t <= endmatch) linenumber++; else break;
1083                }
1084            }            }
1085          endmatch = end_of_line(endmatch, endptr, &ellength);          endmatch = end_of_line(endmatch, endptr, &ellength);
1086          linelength = endmatch - ptr - ellength;          linelength = endmatch - ptr - ellength;
# Line 1066  while (ptr < endptr) Line 1129  while (ptr < endptr)
1129      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1130      }      }
1131    
1132      /* For a match in multiline inverted mode (which of course did not cause
1133      anything to be printed), we have to move on to the end of the match before
1134      proceeding. */
1135    
1136      if (multiline && invert && match)
1137        {
1138        int ellength;
1139        char *endmatch = ptr + offsets[1];
1140        t = ptr;
1141        while (t < endmatch)
1142          {
1143          t = end_of_line(t, endptr, &ellength);
1144          if (t <= endmatch) linenumber++; else break;
1145          }
1146        endmatch = end_of_line(endmatch, endptr, &ellength);
1147        linelength = endmatch - ptr - ellength;
1148        }
1149    
1150    /* Advance to after the newline and increment the line number. */    /* Advance to after the newline and increment the line number. */
1151    
1152    ptr += linelength + endlinelength;    ptr += linelength + endlinelength;
# Line 1406  sprintf(buffer, "%s%.*s%s", prefix[proce Line 1487  sprintf(buffer, "%s%.*s%s", prefix[proce
1487    suffix[process_options]);    suffix[process_options]);
1488  pattern_list[pattern_count] =  pattern_list[pattern_count] =
1489    pcre_compile(buffer, options, &error, &errptr, pcretables);    pcre_compile(buffer, options, &error, &errptr, pcretables);
1490  if (pattern_list[pattern_count++] != NULL) return TRUE;  if (pattern_list[pattern_count] != NULL)
1491      {
1492      pattern_count++;
1493      return TRUE;
1494      }
1495    
1496  /* Handle compile errors */  /* Handle compile errors */
1497    
# Line 1464  if ((process_options & PO_FIXED_STRINGS) Line 1549  if ((process_options & PO_FIXED_STRINGS)
1549      char *p = end_of_line(pattern, eop, &ellength);      char *p = end_of_line(pattern, eop, &ellength);
1550      if (ellength == 0)      if (ellength == 0)
1551        return compile_single_pattern(pattern, options, filename, count);        return compile_single_pattern(pattern, options, filename, count);
1552      sprintf(buffer, "%.*s", p - pattern - ellength, pattern);      sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1553      pattern = p;      pattern = p;
1554      if (!compile_single_pattern(buffer, options, filename, count))      if (!compile_single_pattern(buffer, options, filename, count))
1555        return FALSE;        return FALSE;
# Line 1488  int i, j; Line 1573  int i, j;
1573  int rc = 1;  int rc = 1;
1574  int pcre_options = 0;  int pcre_options = 0;
1575  int cmd_pattern_count = 0;  int cmd_pattern_count = 0;
1576    int hint_count = 0;
1577  int errptr;  int errptr;
1578  BOOL only_one_at_top;  BOOL only_one_at_top;
1579  char *patterns[MAX_PATTERN_COUNT];  char *patterns[MAX_PATTERN_COUNT];
# Line 1505  switch(i) Line 1591  switch(i)
1591    case '\r':               newline = (char *)"cr"; break;    case '\r':               newline = (char *)"cr"; break;
1592    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1593    case -1:                 newline = (char *)"any"; break;    case -1:                 newline = (char *)"any"; break;
1594      case -2:                 newline = (char *)"anycrlf"; break;
1595    }    }
1596    
1597  /* Process the options */  /* Process the options */
# Line 1562  for (i = 1; i < argc; i++) Line 1649  for (i = 1; i < argc; i++)
1649          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
1650            {            {
1651            int oplen = equals - op->long_name;            int oplen = equals - op->long_name;
1652            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1653            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1654              {              {
1655              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1581  for (i = 1; i < argc; i++) Line 1668  for (i = 1; i < argc; i++)
1668          char buff2[24];          char buff2[24];
1669          int baselen = opbra - op->long_name;          int baselen = opbra - op->long_name;
1670          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
1671          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1,
1672            opbra + 1);            (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1673          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1674            break;            break;
1675          }          }
# Line 1812  else if (strcmp(newline, "any") == 0 || Line 1899  else if (strcmp(newline, "any") == 0 ||
1899    pcre_options |= PCRE_NEWLINE_ANY;    pcre_options |= PCRE_NEWLINE_ANY;
1900    endlinetype = EL_ANY;    endlinetype = EL_ANY;
1901    }    }
1902    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1903      {
1904      pcre_options |= PCRE_NEWLINE_ANYCRLF;
1905      endlinetype = EL_ANYCRLF;
1906      }
1907  else  else
1908    {    {
1909    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
# Line 1866  hints_list = (pcre_extra **)malloc(MAX_P Line 1958  hints_list = (pcre_extra **)malloc(MAX_P
1958  if (pattern_list == NULL || hints_list == NULL)  if (pattern_list == NULL || hints_list == NULL)
1959    {    {
1960    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
1961    goto EXIT2;    goto EXIT2;
1962    }    }
1963    
1964  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there is no file provided by -f,
# Line 1885  for (j = 0; j < cmd_pattern_count; j++) Line 1977  for (j = 0; j < cmd_pattern_count; j++)
1977    {    {
1978    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(patterns[j], pcre_options, NULL,
1979         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1980      goto EXIT2;      goto EXIT2;
1981    }    }
1982    
1983  /* Compile the regular expressions that are provided in a file. */  /* Compile the regular expressions that are provided in a file. */
# Line 1909  if (pattern_filename != NULL) Line 2001  if (pattern_filename != NULL)
2001        {        {
2002        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2003          strerror(errno));          strerror(errno));
2004        goto EXIT2;        goto EXIT2;
2005        }        }
2006      filename = pattern_filename;      filename = pattern_filename;
2007      }      }
# Line 1940  for (j = 0; j < pattern_count; j++) Line 2032  for (j = 0; j < pattern_count; j++)
2032      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2033      goto EXIT2;      goto EXIT2;
2034      }      }
2035      hint_count++;
2036    }    }
2037    
2038  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
# Line 1974  if (i >= argc) Line 2067  if (i >= argc)
2067    {    {
2068    rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2069    goto EXIT;    goto EXIT;
2070    }    }
2071    
2072  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2073  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
# Line 1994  for (; i < argc; i++) Line 2087  for (; i < argc; i++)
2087  EXIT:  EXIT:
2088  if (pattern_list != NULL)  if (pattern_list != NULL)
2089    {    {
2090    for (i = 0; i < pattern_count; i++) free(pattern_list[i]);    for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2091    free(pattern_list);    free(pattern_list);
2092    }    }
2093  if (hints_list != NULL)  if (hints_list != NULL)
2094    {    {
2095    for (i = 0; i < pattern_count; i++) free(hints_list[i]);    for (i = 0; i < hint_count; i++) free(hints_list[i]);
2096    free(hints_list);    free(hints_list);
2097    }    }
2098  return rc;  return rc;
2099    
2100  EXIT2:  EXIT2:

Legend:
Removed from v.121  
changed lines
  Added in v.236

  ViewVC Help
Powered by ViewVC 1.1.5