/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC revision 280 by ph10, Wed Dec 5 20:56:03 2007 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45  #include <locale.h>  #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
# Line 46  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55  #include <unistd.h>  #include <unistd.h>
56    #endif
57    
 #include "config.h"  
58  #include "pcre.h"  #include "pcre.h"
59    
60  #define FALSE 0  #define FALSE 0
# Line 56  POSSIBILITY OF SUCH DAMAGE. Line 62  POSSIBILITY OF SUCH DAMAGE.
62    
63  typedef int BOOL;  typedef int BOOL;
64    
 #define VERSION "4.3 01-Jun-2006"  
65  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
66    
67  #if BUFSIZ > 8192  #if BUFSIZ > 8192
# Line 65  typedef int BOOL; Line 70  typedef int BOOL;
70  #define MBUFTHIRD 8192  #define MBUFTHIRD 8192
71  #endif  #endif
72    
   
73  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
74  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
75  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
# Line 83  enum { DEE_READ, DEE_SKIP }; Line 87  enum { DEE_READ, DEE_SKIP };
87  #define PO_LINE_MATCH     0x0002  #define PO_LINE_MATCH     0x0002
88  #define PO_FIXED_STRINGS  0x0004  #define PO_FIXED_STRINGS  0x0004
89    
90    /* Line ending types */
91    
92    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93    
94    
95    
96  /*************************************************  /*************************************************
# Line 100  static const char *jfriedl_prefix = ""; Line 108  static const char *jfriedl_prefix = "";
108  static const char *jfriedl_postfix = "";  static const char *jfriedl_postfix = "";
109  #endif  #endif
110    
111  static int  endlinebyte = '\n';     /* Last byte of endline sequence */  static int  endlinetype;
 static int  endlineextra = 0;       /* Extra bytes for endline sequence */  
112    
113  static char *colour_string = (char *)"1;31";  static char *colour_string = (char *)"1;31";
114  static char *colour_option = NULL;  static char *colour_option = NULL;
# Line 115  static char *locale = NULL; Line 122  static char *locale = NULL;
122  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
123    
124  static int  pattern_count = 0;  static int  pattern_count = 0;
125  static pcre **pattern_list;  static pcre **pattern_list = NULL;
126  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
127    
128  static char *include_pattern = NULL;  static char *include_pattern = NULL;
129  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
# Line 135  static int process_options = 0; Line 142  static int process_options = 0;
142    
143  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
144  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
145    static BOOL file_offsets = FALSE;
146  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
147  static BOOL invert = FALSE;  static BOOL invert = FALSE;
148    static BOOL line_offsets = FALSE;
149  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
150  static BOOL number = FALSE;  static BOOL number = FALSE;
151  static BOOL only_matching = FALSE;  static BOOL only_matching = FALSE;
152  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
153  static BOOL silent = FALSE;  static BOOL silent = FALSE;
154    static BOOL utf8 = FALSE;
155    
156  /* Structure for options and list of them */  /* Structure for options and list of them */
157    
# Line 166  used to identify them. */ Line 176  used to identify them. */
176  #define N_LABEL     (-5)  #define N_LABEL     (-5)
177  #define N_LOCALE    (-6)  #define N_LOCALE    (-6)
178  #define N_NULL      (-7)  #define N_NULL      (-7)
179    #define N_LOFFSETS  (-8)
180    #define N_FOFFSETS  (-9)
181    
182  static option_item optionlist[] = {  static option_item optionlist[] = {
183    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
# Line 181  static option_item optionlist[] = { Line 193  static option_item optionlist[] = {
193    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
194    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
195    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
196      { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
197    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
198    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
199    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
200    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
201    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
202    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
203      { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
204    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
205    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
206    { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LR, CRLF)" },    { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
207    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
208    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
209    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
# Line 219  static const char *prefix[] = { Line 233  static const char *prefix[] = {
233  static const char *suffix[] = {  static const char *suffix[] = {
234    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
235    
236    /* UTF-8 tables - used only when the newline setting is "any". */
237    
238    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
239    
240    const char utf8_table4[] = {
241      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
242      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
243      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
244      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
245    
246    
247    
248  /*************************************************  /*************************************************
# Line 231  although at present the only ones are fo Line 255  although at present the only ones are fo
255    
256  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
257    
258  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
259  #include <sys/types.h>  #include <sys/types.h>
260  #include <sys/stat.h>  #include <sys/stat.h>
261  #include <dirent.h>  #include <dirent.h>
# Line 263  for (;;) Line 287  for (;;)
287    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
288      return dent->d_name;      return dent->d_name;
289    }    }
290  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
291  }  }
292    
293  static void  static void
# Line 301  Lionel Fourquaux. David Burgess added a Line 325  Lionel Fourquaux. David Burgess added a
325  when it did not exist. */  when it did not exist. */
326    
327    
328  #elif HAVE_WIN32API  #elif HAVE_WINDOWS_H
329    
330  #ifndef STRICT  #ifndef STRICT
331  # define STRICT  # define STRICT
# Line 423  FALSE; Line 447  FALSE;
447  typedef void directory_type;  typedef void directory_type;
448    
449  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
450  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
451  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
452  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
453    
454    
# Line 448  return FALSE; Line 472  return FALSE;
472    
473    
474    
475  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
476  /*************************************************  /*************************************************
477  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
478  *************************************************/  *************************************************/
# Line 471  return sys_errlist[n]; Line 495  return sys_errlist[n];
495    
496    
497  /*************************************************  /*************************************************
498    *             Find end of line                   *
499    *************************************************/
500    
501    /* The length of the endline sequence that is found is set via lenptr. This may
502    be zero at the very end of the file if there is no line-ending sequence there.
503    
504    Arguments:
505      p         current position in line
506      endptr    end of available data
507      lenptr    where to put the length of the eol sequence
508    
509    Returns:    pointer to the last byte of the line
510    */
511    
512    static char *
513    end_of_line(char *p, char *endptr, int *lenptr)
514    {
515    switch(endlinetype)
516      {
517      default:      /* Just in case */
518      case EL_LF:
519      while (p < endptr && *p != '\n') p++;
520      if (p < endptr)
521        {
522        *lenptr = 1;
523        return p + 1;
524        }
525      *lenptr = 0;
526      return endptr;
527    
528      case EL_CR:
529      while (p < endptr && *p != '\r') p++;
530      if (p < endptr)
531        {
532        *lenptr = 1;
533        return p + 1;
534        }
535      *lenptr = 0;
536      return endptr;
537    
538      case EL_CRLF:
539      for (;;)
540        {
541        while (p < endptr && *p != '\r') p++;
542        if (++p >= endptr)
543          {
544          *lenptr = 0;
545          return endptr;
546          }
547        if (*p == '\n')
548          {
549          *lenptr = 2;
550          return p + 1;
551          }
552        }
553      break;
554    
555      case EL_ANYCRLF:
556      while (p < endptr)
557        {
558        int extra = 0;
559        register int c = *((unsigned char *)p);
560    
561        if (utf8 && c >= 0xc0)
562          {
563          int gcii, gcss;
564          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
565          gcss = 6*extra;
566          c = (c & utf8_table3[extra]) << gcss;
567          for (gcii = 1; gcii <= extra; gcii++)
568            {
569            gcss -= 6;
570            c |= (p[gcii] & 0x3f) << gcss;
571            }
572          }
573    
574        p += 1 + extra;
575    
576        switch (c)
577          {
578          case 0x0a:    /* LF */
579          *lenptr = 1;
580          return p;
581    
582          case 0x0d:    /* CR */
583          if (p < endptr && *p == 0x0a)
584            {
585            *lenptr = 2;
586            p++;
587            }
588          else *lenptr = 1;
589          return p;
590    
591          default:
592          break;
593          }
594        }   /* End of loop for ANYCRLF case */
595    
596      *lenptr = 0;  /* Must have hit the end */
597      return endptr;
598    
599      case EL_ANY:
600      while (p < endptr)
601        {
602        int extra = 0;
603        register int c = *((unsigned char *)p);
604    
605        if (utf8 && c >= 0xc0)
606          {
607          int gcii, gcss;
608          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
609          gcss = 6*extra;
610          c = (c & utf8_table3[extra]) << gcss;
611          for (gcii = 1; gcii <= extra; gcii++)
612            {
613            gcss -= 6;
614            c |= (p[gcii] & 0x3f) << gcss;
615            }
616          }
617    
618        p += 1 + extra;
619    
620        switch (c)
621          {
622          case 0x0a:    /* LF */
623          case 0x0b:    /* VT */
624          case 0x0c:    /* FF */
625          *lenptr = 1;
626          return p;
627    
628          case 0x0d:    /* CR */
629          if (p < endptr && *p == 0x0a)
630            {
631            *lenptr = 2;
632            p++;
633            }
634          else *lenptr = 1;
635          return p;
636    
637          case 0x85:    /* NEL */
638          *lenptr = utf8? 2 : 1;
639          return p;
640    
641          case 0x2028:  /* LS */
642          case 0x2029:  /* PS */
643          *lenptr = 3;
644          return p;
645    
646          default:
647          break;
648          }
649        }   /* End of loop for ANY case */
650    
651      *lenptr = 0;  /* Must have hit the end */
652      return endptr;
653      }     /* End of overall switch */
654    }
655    
656    
657    
658    /*************************************************
659    *         Find start of previous line            *
660    *************************************************/
661    
662    /* This is called when looking back for before lines to print.
663    
664    Arguments:
665      p         start of the subsequent line
666      startptr  start of available data
667    
668    Returns:    pointer to the start of the previous line
669    */
670    
671    static char *
672    previous_line(char *p, char *startptr)
673    {
674    switch(endlinetype)
675      {
676      default:      /* Just in case */
677      case EL_LF:
678      p--;
679      while (p > startptr && p[-1] != '\n') p--;
680      return p;
681    
682      case EL_CR:
683      p--;
684      while (p > startptr && p[-1] != '\n') p--;
685      return p;
686    
687      case EL_CRLF:
688      for (;;)
689        {
690        p -= 2;
691        while (p > startptr && p[-1] != '\n') p--;
692        if (p <= startptr + 1 || p[-2] == '\r') return p;
693        }
694      return p;   /* But control should never get here */
695    
696      case EL_ANY:
697      case EL_ANYCRLF:
698      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
699      if (utf8) while ((*p & 0xc0) == 0x80) p--;
700    
701      while (p > startptr)
702        {
703        register int c;
704        char *pp = p - 1;
705    
706        if (utf8)
707          {
708          int extra = 0;
709          while ((*pp & 0xc0) == 0x80) pp--;
710          c = *((unsigned char *)pp);
711          if (c >= 0xc0)
712            {
713            int gcii, gcss;
714            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
715            gcss = 6*extra;
716            c = (c & utf8_table3[extra]) << gcss;
717            for (gcii = 1; gcii <= extra; gcii++)
718              {
719              gcss -= 6;
720              c |= (pp[gcii] & 0x3f) << gcss;
721              }
722            }
723          }
724        else c = *((unsigned char *)pp);
725    
726        if (endlinetype == EL_ANYCRLF) switch (c)
727          {
728          case 0x0a:    /* LF */
729          case 0x0d:    /* CR */
730          return p;
731    
732          default:
733          break;
734          }
735    
736        else switch (c)
737          {
738          case 0x0a:    /* LF */
739          case 0x0b:    /* VT */
740          case 0x0c:    /* FF */
741          case 0x0d:    /* CR */
742          case 0x85:    /* NEL */
743          case 0x2028:  /* LS */
744          case 0x2029:  /* PS */
745          return p;
746    
747          default:
748          break;
749          }
750    
751        p = pp;  /* Back one character */
752        }        /* End of loop for ANY case */
753    
754      return startptr;  /* Hit start of data */
755      }     /* End of overall switch */
756    }
757    
758    
759    
760    
761    
762    /*************************************************
763  *       Print the previous "after" lines         *  *       Print the previous "after" lines         *
764  *************************************************/  *************************************************/
765    
# Line 495  if (after_context > 0 && lastmatchnumber Line 784  if (after_context > 0 && lastmatchnumber
784    int count = 0;    int count = 0;
785    while (lastmatchrestart < endptr && count++ < after_context)    while (lastmatchrestart < endptr && count++ < after_context)
786      {      {
787        int ellength;
788      char *pp = lastmatchrestart;      char *pp = lastmatchrestart;
789      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
790      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
791      while (*pp != endlinebyte) pp++;      pp = end_of_line(pp, endptr, &ellength);
792      fwrite(lastmatchrestart, 1, pp - lastmatchrestart + (1 + endlineextra),      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
793        stdout);      lastmatchrestart = pp;
     lastmatchrestart = pp + 1;  
794      }      }
795    hyphenpending = TRUE;    hyphenpending = TRUE;
796    }    }
# Line 537  int rc = 1; Line 826  int rc = 1;
826  int linenumber = 1;  int linenumber = 1;
827  int lastmatchnumber = 0;  int lastmatchnumber = 0;
828  int count = 0;  int count = 0;
829    int filepos = 0;
830  int offsets[99];  int offsets[99];
831  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
832  char buffer[3*MBUFTHIRD];  char buffer[3*MBUFTHIRD];
# Line 558  way, the buffer is shifted left and re-f Line 848  way, the buffer is shifted left and re-f
848    
849  while (ptr < endptr)  while (ptr < endptr)
850    {    {
851    int i;    int i, endlinelength;
852    int mrc = 0;    int mrc = 0;
853    BOOL match = FALSE;    BOOL match = FALSE;
854      char *matchptr = ptr;
855    char *t = ptr;    char *t = ptr;
856    size_t length, linelength;    size_t length, linelength;
857    
# Line 571  while (ptr < endptr) Line 862  while (ptr < endptr)
862    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
863    that any match is constrained to be in the first line. */    that any match is constrained to be in the first line. */
864    
865    linelength = 0;    t = end_of_line(t, endptr, &endlinelength);
866    while (t < endptr && *t++ != endlinebyte) linelength++;    linelength = t - ptr - endlinelength;
867    length = multiline? endptr - ptr : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
   
868    
869    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
870    
# Line 624  while (ptr < endptr) Line 914  while (ptr < endptr)
914    }    }
915  #endif  #endif
916    
917      /* We come back here after a match when the -o option (only_matching) is set,
918      in order to find any further matches in the same line. */
919    
920      ONLY_MATCHING_RESTART:
921    
922    /* Run through all the patterns until one matches. Note that we don't include    /* Run through all the patterns until one matches. Note that we don't include
923    the final newline in the subject string. */    the final newline in the subject string. */
924    
925    for (i = 0; i < pattern_count; i++)    for (i = 0; i < pattern_count; i++)
926      {      {
927      mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,      mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
928        offsets, 99);        offsets, 99);
929      if (mrc >= 0) { match = TRUE; break; }      if (mrc >= 0) { match = TRUE; break; }
930      if (mrc != PCRE_ERROR_NOMATCH)      if (mrc != PCRE_ERROR_NOMATCH)
# Line 638  while (ptr < endptr) Line 932  while (ptr < endptr)
932        fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);        fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
933        if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);        if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
934        fprintf(stderr, "this line:\n");        fprintf(stderr, "this line:\n");
935        fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */        fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */
936        fprintf(stderr, "\n");        fprintf(stderr, "\n");
937        if (error_count == 0 &&        if (error_count == 0 &&
938            (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))            (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
# Line 685  while (ptr < endptr) Line 979  while (ptr < endptr)
979      else if (quiet) return 0;      else if (quiet) return 0;
980    
981      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched, and
982      does not pring any context. */      the --file-offsets and --line-offsets options output offsets for the
983        matching substring (they both force --only-matching). None of these options
984        prints any context. Afterwards, adjust the start and length, and then jump
985        back to look for further matches in the same line. If we are in invert
986        mode, however, nothing is printed - this could be still useful because the
987        return code is set. */
988    
989      else if (only_matching)      else if (only_matching)
990        {        {
991        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (!invert)
992        if (number) fprintf(stdout, "%d:", linenumber);          {
993        fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          if (printname != NULL) fprintf(stdout, "%s:", printname);
994        fprintf(stdout, "\n");          if (number) fprintf(stdout, "%d:", linenumber);
995            if (line_offsets)
996              fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,
997                offsets[1] - offsets[0]);
998            else if (file_offsets)
999              fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,
1000                offsets[1] - offsets[0]);
1001            else
1002              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1003            fprintf(stdout, "\n");
1004            matchptr += offsets[1];
1005            length -= offsets[1];
1006            match = FALSE;
1007            goto ONLY_MATCHING_RESTART;
1008            }
1009        }        }
1010    
1011      /* This is the default case when none of the above options is set. We print      /* This is the default case when none of the above options is set. We print
# Line 706  while (ptr < endptr) Line 1019  while (ptr < endptr)
1019    
1020        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
1021          {          {
1022            int ellength;
1023          int linecount = 0;          int linecount = 0;
1024          char *p = lastmatchrestart;          char *p = lastmatchrestart;
1025    
1026          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
1027            {            {
1028            while (*p != endlinebyte) p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
1029            linecount++;            linecount++;
1030            }            }
1031    
# Line 725  while (ptr < endptr) Line 1038  while (ptr < endptr)
1038            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
1039            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1040            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1041            while (*pp != endlinebyte) pp++;            pp = end_of_line(pp, endptr, &ellength);
1042            fwrite(lastmatchrestart, 1, pp - lastmatchrestart +            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1043              (1 + endlineextra), stdout);            lastmatchrestart = pp;
           lastmatchrestart = pp + 1;  
1044            }            }
1045          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
1046          }          }
# Line 754  while (ptr < endptr) Line 1066  while (ptr < endptr)
1066                 linecount < before_context)                 linecount < before_context)
1067            {            {
1068            linecount++;            linecount++;
1069            p--;            p = previous_line(p, buffer);
           while (p > buffer && p[-1] != endlinebyte) p--;  
1070            }            }
1071    
1072          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 763  while (ptr < endptr) Line 1074  while (ptr < endptr)
1074    
1075          while (p < ptr)          while (p < ptr)
1076            {            {
1077              int ellength;
1078            char *pp = p;            char *pp = p;
1079            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1080            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1081            while (*pp != endlinebyte) pp++;            pp = end_of_line(pp, endptr, &ellength);
1082            fwrite(p, 1, pp - p + (1 + endlineextra), stdout);            fwrite(p, 1, pp - p, stdout);
1083            p = pp + 1;            p = pp;
1084            }            }
1085          }          }
1086    
# Line 783  while (ptr < endptr) Line 1095  while (ptr < endptr)
1095    
1096        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1097        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1098        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1099        start of the match will always be before the first newline sequence. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1100          the match will always be before the first newline sequence. */
1101    
1102        if (multiline)        if (multiline)
1103          {          {
1104          char *endmatch = ptr + offsets[1];          int ellength;
1105          t = ptr;          char *endmatch = ptr;
1106          while (t < endmatch) { if (*t++ == endlinebyte) linenumber++; }          if (!invert)
1107          while (endmatch < endptr && *endmatch != endlinebyte) endmatch++;            {
1108          linelength = endmatch - ptr;            endmatch += offsets[1];
1109              t = ptr;
1110              while (t < endmatch)
1111                {
1112                t = end_of_line(t, endptr, &ellength);
1113                if (t <= endmatch) linenumber++; else break;
1114                }
1115              }
1116            endmatch = end_of_line(endmatch, endptr, &ellength);
1117            linelength = endmatch - ptr - ellength;
1118          }          }
1119    
1120        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 822  while (ptr < endptr) Line 1144  while (ptr < endptr)
1144          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1145          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1146          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1147          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1148              stdout);
1149          }          }
1150        else fwrite(ptr, 1, linelength, stdout);        else fwrite(ptr, 1, linelength + endlinelength, stdout);
   
       fprintf(stdout, "\n");  
1151        }        }
1152    
1153      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match */
# Line 836  while (ptr < endptr) Line 1157  while (ptr < endptr)
1157      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1158      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1159    
1160      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1161      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1162      }      }
1163    
1164    /* Advance to after the newline and increment the line number. */    /* For a match in multiline inverted mode (which of course did not cause
1165      anything to be printed), we have to move on to the end of the match before
1166      proceeding. */
1167    
1168      if (multiline && invert && match)
1169        {
1170        int ellength;
1171        char *endmatch = ptr + offsets[1];
1172        t = ptr;
1173        while (t < endmatch)
1174          {
1175          t = end_of_line(t, endptr, &ellength);
1176          if (t <= endmatch) linenumber++; else break;
1177          }
1178        endmatch = end_of_line(endmatch, endptr, &ellength);
1179        linelength = endmatch - ptr - ellength;
1180        }
1181    
1182    ptr += linelength + 1;    /* Advance to after the newline and increment the line number. The file
1183      offset to the current line is maintained in filepos. */
1184    
1185      ptr += linelength + endlinelength;
1186      filepos += linelength + endlinelength;
1187    linenumber++;    linenumber++;
1188    
1189    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
# Line 1029  for (op = optionlist; op->one_char != 0; Line 1370  for (op = optionlist; op->one_char != 0;
1370    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1371    }    }
1372  fprintf(stderr, "] [long options] [pattern] [files]\n");  fprintf(stderr, "] [long options] [pattern] [files]\n");
1373  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1374      "options.\n");
1375  return rc;  return rc;
1376  }  }
1377    
# Line 1084  handle_option(int letter, int options) Line 1426  handle_option(int letter, int options)
1426  {  {
1427  switch(letter)  switch(letter)
1428    {    {
1429      case N_FOFFSETS: file_offsets = TRUE; break;
1430    case N_HELP: help(); exit(0);    case N_HELP: help(); exit(0);
1431      case N_LOFFSETS: line_offsets = number = TRUE; break;
1432    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1433    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1434    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
# Line 1098  switch(letter) Line 1442  switch(letter)
1442    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1443    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
1444    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1445    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1446    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1447    case 'w': process_options |= PO_WORD_MATCH; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1448    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
1449    
1450    case 'V':    case 'V':
1451    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
1452    exit(0);    exit(0);
1453    break;    break;
1454    
# Line 1181  sprintf(buffer, "%s%.*s%s", prefix[proce Line 1524  sprintf(buffer, "%s%.*s%s", prefix[proce
1524    suffix[process_options]);    suffix[process_options]);
1525  pattern_list[pattern_count] =  pattern_list[pattern_count] =
1526    pcre_compile(buffer, options, &error, &errptr, pcretables);    pcre_compile(buffer, options, &error, &errptr, pcretables);
1527  if (pattern_list[pattern_count++] != NULL) return TRUE;  if (pattern_list[pattern_count] != NULL)
1528      {
1529      pattern_count++;
1530      return TRUE;
1531      }
1532    
1533  /* Handle compile errors */  /* Handle compile errors */
1534    
# Line 1231  compile_pattern(char *pattern, int optio Line 1578  compile_pattern(char *pattern, int optio
1578  {  {
1579  if ((process_options & PO_FIXED_STRINGS) != 0)  if ((process_options & PO_FIXED_STRINGS) != 0)
1580    {    {
1581      char *eop = pattern + strlen(pattern);
1582    char buffer[MBUFTHIRD];    char buffer[MBUFTHIRD];
1583    for(;;)    for(;;)
1584      {      {
1585      char *p = strchr(pattern, endlinebyte);      int ellength;
1586      if (p == NULL)      char *p = end_of_line(pattern, eop, &ellength);
1587        if (ellength == 0)
1588        return compile_single_pattern(pattern, options, filename, count);        return compile_single_pattern(pattern, options, filename, count);
1589      sprintf(buffer, "%.*s", p - pattern - endlineextra, pattern);      sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1590      pattern = p + 1;      pattern = p;
1591      if (!compile_single_pattern(buffer, options, filename, count))      if (!compile_single_pattern(buffer, options, filename, count))
1592        return FALSE;        return FALSE;
1593      }      }
# Line 1261  int i, j; Line 1610  int i, j;
1610  int rc = 1;  int rc = 1;
1611  int pcre_options = 0;  int pcre_options = 0;
1612  int cmd_pattern_count = 0;  int cmd_pattern_count = 0;
1613    int hint_count = 0;
1614  int errptr;  int errptr;
1615  BOOL only_one_at_top;  BOOL only_one_at_top;
1616  char *patterns[MAX_PATTERN_COUNT];  char *patterns[MAX_PATTERN_COUNT];
1617  const char *locale_from = "--locale";  const char *locale_from = "--locale";
1618  const char *error;  const char *error;
1619    
1620  /* Set the default line ending value from the default in the PCRE library. */  /* Set the default line ending value from the default in the PCRE library;
1621    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1622    */
1623    
1624  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1625  switch(i)  switch(i)
# Line 1275  switch(i) Line 1627  switch(i)
1627    default:                 newline = (char *)"lf"; break;    default:                 newline = (char *)"lf"; break;
1628    case '\r':               newline = (char *)"cr"; break;    case '\r':               newline = (char *)"cr"; break;
1629    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1630      case -1:                 newline = (char *)"any"; break;
1631      case -2:                 newline = (char *)"anycrlf"; break;
1632    }    }
1633    
1634  /* Process the options */  /* Process the options */
# Line 1332  for (i = 1; i < argc; i++) Line 1686  for (i = 1; i < argc; i++)
1686          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
1687            {            {
1688            int oplen = equals - op->long_name;            int oplen = equals - op->long_name;
1689            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1690            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1691              {              {
1692              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1351  for (i = 1; i < argc; i++) Line 1705  for (i = 1; i < argc; i++)
1705          char buff2[24];          char buff2[24];
1706          int baselen = opbra - op->long_name;          int baselen = opbra - op->long_name;
1707          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
1708          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1,
1709            opbra + 1);            (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1710          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1711            break;            break;
1712          }          }
# Line 1510  if (both_context > 0) Line 1864  if (both_context > 0)
1864    if (after_context == 0) after_context = both_context;    if (after_context == 0) after_context = both_context;
1865    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
1866    }    }
1867    
1868    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
1869    However, the latter two set the only_matching flag. */
1870    
1871    if ((only_matching && (file_offsets || line_offsets)) ||
1872        (file_offsets && line_offsets))
1873      {
1874      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
1875        "and/or --line-offsets\n");
1876      exit(usage(2));
1877      }
1878    
1879    if (file_offsets || line_offsets) only_matching = TRUE;
1880    
1881  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
1882  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
# Line 1565  if (colour_option != NULL && strcmp(colo Line 1932  if (colour_option != NULL && strcmp(colo
1932  if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)  if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1933    {    {
1934    pcre_options |= PCRE_NEWLINE_CR;    pcre_options |= PCRE_NEWLINE_CR;
1935    endlinebyte = '\r';    endlinetype = EL_CR;
1936    }    }
1937  else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)  else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1938    {    {
1939    pcre_options |= PCRE_NEWLINE_LF;    pcre_options |= PCRE_NEWLINE_LF;
1940      endlinetype = EL_LF;
1941    }    }
1942  else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)  else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1943    {    {
1944    pcre_options |= PCRE_NEWLINE_CRLF;    pcre_options |= PCRE_NEWLINE_CRLF;
1945    endlineextra = 1;    endlinetype = EL_CRLF;
1946      }
1947    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1948      {
1949      pcre_options |= PCRE_NEWLINE_ANY;
1950      endlinetype = EL_ANY;
1951      }
1952    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1953      {
1954      pcre_options |= PCRE_NEWLINE_ANYCRLF;
1955      endlinetype = EL_ANYCRLF;
1956    }    }
1957  else  else
1958    {    {
# Line 1630  hints_list = (pcre_extra **)malloc(MAX_P Line 2008  hints_list = (pcre_extra **)malloc(MAX_P
2008  if (pattern_list == NULL || hints_list == NULL)  if (pattern_list == NULL || hints_list == NULL)
2009    {    {
2010    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
2011    return 2;    goto EXIT2;
2012    }    }
2013    
2014  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there is no file provided by -f,
# Line 1649  for (j = 0; j < cmd_pattern_count; j++) Line 2027  for (j = 0; j < cmd_pattern_count; j++)
2027    {    {
2028    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(patterns[j], pcre_options, NULL,
2029         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2030      return 2;      goto EXIT2;
2031    }    }
2032    
2033  /* Compile the regular expressions that are provided in a file. */  /* Compile the regular expressions that are provided in a file. */
# Line 1673  if (pattern_filename != NULL) Line 2051  if (pattern_filename != NULL)
2051        {        {
2052        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2053          strerror(errno));          strerror(errno));
2054        return 2;        goto EXIT2;
2055        }        }
2056      filename = pattern_filename;      filename = pattern_filename;
2057      }      }
# Line 1686  if (pattern_filename != NULL) Line 2064  if (pattern_filename != NULL)
2064      linenumber++;      linenumber++;
2065      if (buffer[0] == 0) continue;   /* Skip blank lines */      if (buffer[0] == 0) continue;   /* Skip blank lines */
2066      if (!compile_pattern(buffer, pcre_options, filename, linenumber))      if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2067        return 2;        goto EXIT2;
2068      }      }
2069    
2070    if (f != stdin) fclose(f);    if (f != stdin) fclose(f);
# Line 1702  for (j = 0; j < pattern_count; j++) Line 2080  for (j = 0; j < pattern_count; j++)
2080      char s[16];      char s[16];
2081      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2082      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2083      return 2;      goto EXIT2;
2084      }      }
2085      hint_count++;
2086    }    }
2087    
2088  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
# Line 1716  if (exclude_pattern != NULL) Line 2095  if (exclude_pattern != NULL)
2095      {      {
2096      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2097        errptr, error);        errptr, error);
2098      return 2;      goto EXIT2;
2099      }      }
2100    }    }
2101    
# Line 1728  if (include_pattern != NULL) Line 2107  if (include_pattern != NULL)
2107      {      {
2108      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2109        errptr, error);        errptr, error);
2110      return 2;      goto EXIT2;
2111      }      }
2112    }    }
2113    
2114  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */
2115    
2116  if (i >= argc)  if (i >= argc)
2117    return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    {
2118      rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2119      goto EXIT;
2120      }
2121    
2122  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2123  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
# Line 1752  for (; i < argc; i++) Line 2134  for (; i < argc; i++)
2134      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
2135    }    }
2136    
2137    EXIT:
2138    if (pattern_list != NULL)
2139      {
2140      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2141      free(pattern_list);
2142      }
2143    if (hints_list != NULL)
2144      {
2145      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2146      free(hints_list);
2147      }
2148  return rc;  return rc;
2149    
2150    EXIT2:
2151    rc = 2;
2152    goto EXIT;
2153  }  }
2154    
2155  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.91  
changed lines
  Added in v.280

  ViewVC Help
Powered by ViewVC 1.1.5