/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 151 by ph10, Tue Apr 17 15:07:29 2007 UTC revision 280 by ph10, Wed Dec 5 20:56:03 2007 UTC
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  */  */
39    
40  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
41  #  include <config.h>  #include "config.h"
42  #endif  #endif
43    
44  #include <ctype.h>  #include <ctype.h>
# Line 50  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54  #ifdef HAVE_UNISTD_H  #ifdef HAVE_UNISTD_H
55  #  include <unistd.h>  #include <unistd.h>
56  #endif  #endif
57    
58  #include <pcre.h>  #include "pcre.h"
59    
60  #define FALSE 0  #define FALSE 0
61  #define TRUE 1  #define TRUE 1
# Line 141  static int process_options = 0; Line 142  static int process_options = 0;
142    
143  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
144  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
145    static BOOL file_offsets = FALSE;
146  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
147  static BOOL invert = FALSE;  static BOOL invert = FALSE;
148    static BOOL line_offsets = FALSE;
149  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
150  static BOOL number = FALSE;  static BOOL number = FALSE;
151  static BOOL only_matching = FALSE;  static BOOL only_matching = FALSE;
# Line 173  used to identify them. */ Line 176  used to identify them. */
176  #define N_LABEL     (-5)  #define N_LABEL     (-5)
177  #define N_LOCALE    (-6)  #define N_LOCALE    (-6)
178  #define N_NULL      (-7)  #define N_NULL      (-7)
179    #define N_LOFFSETS  (-8)
180    #define N_FOFFSETS  (-9)
181    
182  static option_item optionlist[] = {  static option_item optionlist[] = {
183    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
# Line 188  static option_item optionlist[] = { Line 193  static option_item optionlist[] = {
193    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
194    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
195    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
196      { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
197    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
198    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
199    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
200    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
201    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
202    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
203      { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
204    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
205    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
206    { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },    { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
207    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
208    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
209    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
# Line 819  int rc = 1; Line 826  int rc = 1;
826  int linenumber = 1;  int linenumber = 1;
827  int lastmatchnumber = 0;  int lastmatchnumber = 0;
828  int count = 0;  int count = 0;
829    int filepos = 0;
830  int offsets[99];  int offsets[99];
831  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
832  char buffer[3*MBUFTHIRD];  char buffer[3*MBUFTHIRD];
# Line 843  while (ptr < endptr) Line 851  while (ptr < endptr)
851    int i, endlinelength;    int i, endlinelength;
852    int mrc = 0;    int mrc = 0;
853    BOOL match = FALSE;    BOOL match = FALSE;
854      char *matchptr = ptr;
855    char *t = ptr;    char *t = ptr;
856    size_t length, linelength;    size_t length, linelength;
857    
# Line 855  while (ptr < endptr) Line 864  while (ptr < endptr)
864    
865    t = end_of_line(t, endptr, &endlinelength);    t = end_of_line(t, endptr, &endlinelength);
866    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
867    length = multiline? endptr - ptr : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
868    
869    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
870    
# Line 905  while (ptr < endptr) Line 914  while (ptr < endptr)
914    }    }
915  #endif  #endif
916    
917      /* We come back here after a match when the -o option (only_matching) is set,
918      in order to find any further matches in the same line. */
919    
920      ONLY_MATCHING_RESTART:
921    
922    /* Run through all the patterns until one matches. Note that we don't include    /* Run through all the patterns until one matches. Note that we don't include
923    the final newline in the subject string. */    the final newline in the subject string. */
924    
925    for (i = 0; i < pattern_count; i++)    for (i = 0; i < pattern_count; i++)
926      {      {
927      mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,      mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
928        offsets, 99);        offsets, 99);
929      if (mrc >= 0) { match = TRUE; break; }      if (mrc >= 0) { match = TRUE; break; }
930      if (mrc != PCRE_ERROR_NOMATCH)      if (mrc != PCRE_ERROR_NOMATCH)
# Line 919  while (ptr < endptr) Line 932  while (ptr < endptr)
932        fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);        fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
933        if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);        if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
934        fprintf(stderr, "this line:\n");        fprintf(stderr, "this line:\n");
935        fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */        fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */
936        fprintf(stderr, "\n");        fprintf(stderr, "\n");
937        if (error_count == 0 &&        if (error_count == 0 &&
938            (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))            (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
# Line 966  while (ptr < endptr) Line 979  while (ptr < endptr)
979      else if (quiet) return 0;      else if (quiet) return 0;
980    
981      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched, and
982      does not pring any context. */      the --file-offsets and --line-offsets options output offsets for the
983        matching substring (they both force --only-matching). None of these options
984        prints any context. Afterwards, adjust the start and length, and then jump
985        back to look for further matches in the same line. If we are in invert
986        mode, however, nothing is printed - this could be still useful because the
987        return code is set. */
988    
989      else if (only_matching)      else if (only_matching)
990        {        {
991        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (!invert)
992        if (number) fprintf(stdout, "%d:", linenumber);          {
993        fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          if (printname != NULL) fprintf(stdout, "%s:", printname);
994        fprintf(stdout, "\n");          if (number) fprintf(stdout, "%d:", linenumber);
995            if (line_offsets)
996              fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,
997                offsets[1] - offsets[0]);
998            else if (file_offsets)
999              fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,
1000                offsets[1] - offsets[0]);
1001            else
1002              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1003            fprintf(stdout, "\n");
1004            matchptr += offsets[1];
1005            length -= offsets[1];
1006            match = FALSE;
1007            goto ONLY_MATCHING_RESTART;
1008            }
1009        }        }
1010    
1011      /* This is the default case when none of the above options is set. We print      /* This is the default case when none of the above options is set. We print
# Line 1063  while (ptr < endptr) Line 1095  while (ptr < endptr)
1095    
1096        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1097        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1098        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1099        start of the match will always be before the first newline sequence. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1100          the match will always be before the first newline sequence. */
1101    
1102        if (multiline)        if (multiline)
1103          {          {
1104          int ellength;          int ellength;
1105          char *endmatch = ptr + offsets[1];          char *endmatch = ptr;
1106          t = ptr;          if (!invert)
         while (t < endmatch)  
1107            {            {
1108            t = end_of_line(t, endptr, &ellength);            endmatch += offsets[1];
1109            if (t <= endmatch) linenumber++; else break;            t = ptr;
1110              while (t < endmatch)
1111                {
1112                t = end_of_line(t, endptr, &ellength);
1113                if (t <= endmatch) linenumber++; else break;
1114                }
1115            }            }
1116          endmatch = end_of_line(endmatch, endptr, &ellength);          endmatch = end_of_line(endmatch, endptr, &ellength);
1117          linelength = endmatch - ptr - ellength;          linelength = endmatch - ptr - ellength;
# Line 1107  while (ptr < endptr) Line 1144  while (ptr < endptr)
1144          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1145          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1146          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1147          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1148              stdout);
1149          }          }
1150        else fwrite(ptr, 1, linelength + endlinelength, stdout);        else fwrite(ptr, 1, linelength + endlinelength, stdout);
1151        }        }
# Line 1123  while (ptr < endptr) Line 1161  while (ptr < endptr)
1161      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1162      }      }
1163    
1164    /* Advance to after the newline and increment the line number. */    /* For a match in multiline inverted mode (which of course did not cause
1165      anything to be printed), we have to move on to the end of the match before
1166      proceeding. */
1167    
1168      if (multiline && invert && match)
1169        {
1170        int ellength;
1171        char *endmatch = ptr + offsets[1];
1172        t = ptr;
1173        while (t < endmatch)
1174          {
1175          t = end_of_line(t, endptr, &ellength);
1176          if (t <= endmatch) linenumber++; else break;
1177          }
1178        endmatch = end_of_line(endmatch, endptr, &ellength);
1179        linelength = endmatch - ptr - ellength;
1180        }
1181    
1182      /* Advance to after the newline and increment the line number. The file
1183      offset to the current line is maintained in filepos. */
1184    
1185    ptr += linelength + endlinelength;    ptr += linelength + endlinelength;
1186      filepos += linelength + endlinelength;
1187    linenumber++;    linenumber++;
1188    
1189    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
# Line 1312  for (op = optionlist; op->one_char != 0; Line 1370  for (op = optionlist; op->one_char != 0;
1370    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1371    }    }
1372  fprintf(stderr, "] [long options] [pattern] [files]\n");  fprintf(stderr, "] [long options] [pattern] [files]\n");
1373  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1374      "options.\n");
1375  return rc;  return rc;
1376  }  }
1377    
# Line 1367  handle_option(int letter, int options) Line 1426  handle_option(int letter, int options)
1426  {  {
1427  switch(letter)  switch(letter)
1428    {    {
1429      case N_FOFFSETS: file_offsets = TRUE; break;
1430    case N_HELP: help(); exit(0);    case N_HELP: help(); exit(0);
1431      case N_LOFFSETS: line_offsets = number = TRUE; break;
1432    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1433    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1434    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
# Line 1625  for (i = 1; i < argc; i++) Line 1686  for (i = 1; i < argc; i++)
1686          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
1687            {            {
1688            int oplen = equals - op->long_name;            int oplen = equals - op->long_name;
1689            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1690            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1691              {              {
1692              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1644  for (i = 1; i < argc; i++) Line 1705  for (i = 1; i < argc; i++)
1705          char buff2[24];          char buff2[24];
1706          int baselen = opbra - op->long_name;          int baselen = opbra - op->long_name;
1707          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
1708          sprintf(buff2, "%s%.*s", buff1,          sprintf(buff2, "%s%.*s", buff1,
1709            (int)strlen(op->long_name) - baselen - 2, opbra + 1);            (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1710          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1711            break;            break;
# Line 1803  if (both_context > 0) Line 1864  if (both_context > 0)
1864    if (after_context == 0) after_context = both_context;    if (after_context == 0) after_context = both_context;
1865    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
1866    }    }
1867    
1868    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
1869    However, the latter two set the only_matching flag. */
1870    
1871    if ((only_matching && (file_offsets || line_offsets)) ||
1872        (file_offsets && line_offsets))
1873      {
1874      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
1875        "and/or --line-offsets\n");
1876      exit(usage(2));
1877      }
1878    
1879    if (file_offsets || line_offsets) only_matching = TRUE;
1880    
1881  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
1882  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */

Legend:
Removed from v.151  
changed lines
  Added in v.280

  ViewVC Help
Powered by ViewVC 1.1.5