/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 199 by ph10, Tue Jul 31 14:39:09 2007 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #include <config.h>
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45  #include <locale.h>  #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
# Line 46  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55  #include <unistd.h>  #include <unistd.h>
56    #endif
57    
58  #include "config.h"  #include <pcre.h>
 #include "pcre.h"  
59    
60  #define FALSE 0  #define FALSE 0
61  #define TRUE 1  #define TRUE 1
62    
63  typedef int BOOL;  typedef int BOOL;
64    
 #define VERSION "4.2 09-Jan-2006"  
65  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
66    
67  #if BUFSIZ > 8192  #if BUFSIZ > 8192
# Line 65  typedef int BOOL; Line 70  typedef int BOOL;
70  #define MBUFTHIRD 8192  #define MBUFTHIRD 8192
71  #endif  #endif
72    
   
73  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
74  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
75  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
# Line 83  enum { DEE_READ, DEE_SKIP }; Line 87  enum { DEE_READ, DEE_SKIP };
87  #define PO_LINE_MATCH     0x0002  #define PO_LINE_MATCH     0x0002
88  #define PO_FIXED_STRINGS  0x0004  #define PO_FIXED_STRINGS  0x0004
89    
90    /* Line ending types */
91    
92    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93    
94    
95    
96  /*************************************************  /*************************************************
# Line 94  regular code. */ Line 102  regular code. */
102    
103  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
104  static int S_arg = -1;  static int S_arg = -1;
105    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
106    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
107    static const char *jfriedl_prefix = "";
108    static const char *jfriedl_postfix = "";
109  #endif  #endif
110    
111    static int  endlinetype;
112    
113  static char *colour_string = (char *)"1;31";  static char *colour_string = (char *)"1;31";
114  static char *colour_option = NULL;  static char *colour_option = NULL;
115  static char *dee_option = NULL;  static char *dee_option = NULL;
116  static char *DEE_option = NULL;  static char *DEE_option = NULL;
117    static char *newline = NULL;
118  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
119  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
120  static char *locale = NULL;  static char *locale = NULL;
# Line 107  static char *locale = NULL; Line 122  static char *locale = NULL;
122  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
123    
124  static int  pattern_count = 0;  static int  pattern_count = 0;
125  static pcre **pattern_list;  static pcre **pattern_list = NULL;
126  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
127    
128  static char *include_pattern = NULL;  static char *include_pattern = NULL;
129  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
# Line 134  static BOOL number = FALSE; Line 149  static BOOL number = FALSE;
149  static BOOL only_matching = FALSE;  static BOOL only_matching = FALSE;
150  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
151  static BOOL silent = FALSE;  static BOOL silent = FALSE;
152    static BOOL utf8 = FALSE;
153    
154  /* Structure for options and list of them */  /* Structure for options and list of them */
155    
# Line 181  static option_item optionlist[] = { Line 197  static option_item optionlist[] = {
197    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
198    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
199    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
200      { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
201    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
202    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
203    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
# Line 210  static const char *prefix[] = { Line 227  static const char *prefix[] = {
227  static const char *suffix[] = {  static const char *suffix[] = {
228    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
229    
230    /* UTF-8 tables - used only when the newline setting is "any". */
231    
232    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
233    
234    const char utf8_table4[] = {
235      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
236      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
237      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
238      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
239    
240    
241    
242  /*************************************************  /*************************************************
# Line 222  although at present the only ones are fo Line 249  although at present the only ones are fo
249    
250  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
251    
252  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
253  #include <sys/types.h>  #include <sys/types.h>
254  #include <sys/stat.h>  #include <sys/stat.h>
255  #include <dirent.h>  #include <dirent.h>
# Line 254  for (;;) Line 281  for (;;)
281    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
282      return dent->d_name;      return dent->d_name;
283    }    }
284  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
285  }  }
286    
287  static void  static void
# Line 292  Lionel Fourquaux. David Burgess added a Line 319  Lionel Fourquaux. David Burgess added a
319  when it did not exist. */  when it did not exist. */
320    
321    
322  #elif HAVE_WIN32API  #elif HAVE_WINDOWS_H
323    
324  #ifndef STRICT  #ifndef STRICT
325  # define STRICT  # define STRICT
# Line 414  FALSE; Line 441  FALSE;
441  typedef void directory_type;  typedef void directory_type;
442    
443  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
444  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
445  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
446  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
447    
448    
# Line 439  return FALSE; Line 466  return FALSE;
466    
467    
468    
469  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
470  /*************************************************  /*************************************************
471  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
472  *************************************************/  *************************************************/
# Line 462  return sys_errlist[n]; Line 489  return sys_errlist[n];
489    
490    
491  /*************************************************  /*************************************************
492    *             Find end of line                   *
493    *************************************************/
494    
495    /* The length of the endline sequence that is found is set via lenptr. This may
496    be zero at the very end of the file if there is no line-ending sequence there.
497    
498    Arguments:
499      p         current position in line
500      endptr    end of available data
501      lenptr    where to put the length of the eol sequence
502    
503    Returns:    pointer to the last byte of the line
504    */
505    
506    static char *
507    end_of_line(char *p, char *endptr, int *lenptr)
508    {
509    switch(endlinetype)
510      {
511      default:      /* Just in case */
512      case EL_LF:
513      while (p < endptr && *p != '\n') p++;
514      if (p < endptr)
515        {
516        *lenptr = 1;
517        return p + 1;
518        }
519      *lenptr = 0;
520      return endptr;
521    
522      case EL_CR:
523      while (p < endptr && *p != '\r') p++;
524      if (p < endptr)
525        {
526        *lenptr = 1;
527        return p + 1;
528        }
529      *lenptr = 0;
530      return endptr;
531    
532      case EL_CRLF:
533      for (;;)
534        {
535        while (p < endptr && *p != '\r') p++;
536        if (++p >= endptr)
537          {
538          *lenptr = 0;
539          return endptr;
540          }
541        if (*p == '\n')
542          {
543          *lenptr = 2;
544          return p + 1;
545          }
546        }
547      break;
548    
549      case EL_ANYCRLF:
550      while (p < endptr)
551        {
552        int extra = 0;
553        register int c = *((unsigned char *)p);
554    
555        if (utf8 && c >= 0xc0)
556          {
557          int gcii, gcss;
558          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
559          gcss = 6*extra;
560          c = (c & utf8_table3[extra]) << gcss;
561          for (gcii = 1; gcii <= extra; gcii++)
562            {
563            gcss -= 6;
564            c |= (p[gcii] & 0x3f) << gcss;
565            }
566          }
567    
568        p += 1 + extra;
569    
570        switch (c)
571          {
572          case 0x0a:    /* LF */
573          *lenptr = 1;
574          return p;
575    
576          case 0x0d:    /* CR */
577          if (p < endptr && *p == 0x0a)
578            {
579            *lenptr = 2;
580            p++;
581            }
582          else *lenptr = 1;
583          return p;
584    
585          default:
586          break;
587          }
588        }   /* End of loop for ANYCRLF case */
589    
590      *lenptr = 0;  /* Must have hit the end */
591      return endptr;
592    
593      case EL_ANY:
594      while (p < endptr)
595        {
596        int extra = 0;
597        register int c = *((unsigned char *)p);
598    
599        if (utf8 && c >= 0xc0)
600          {
601          int gcii, gcss;
602          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
603          gcss = 6*extra;
604          c = (c & utf8_table3[extra]) << gcss;
605          for (gcii = 1; gcii <= extra; gcii++)
606            {
607            gcss -= 6;
608            c |= (p[gcii] & 0x3f) << gcss;
609            }
610          }
611    
612        p += 1 + extra;
613    
614        switch (c)
615          {
616          case 0x0a:    /* LF */
617          case 0x0b:    /* VT */
618          case 0x0c:    /* FF */
619          *lenptr = 1;
620          return p;
621    
622          case 0x0d:    /* CR */
623          if (p < endptr && *p == 0x0a)
624            {
625            *lenptr = 2;
626            p++;
627            }
628          else *lenptr = 1;
629          return p;
630    
631          case 0x85:    /* NEL */
632          *lenptr = utf8? 2 : 1;
633          return p;
634    
635          case 0x2028:  /* LS */
636          case 0x2029:  /* PS */
637          *lenptr = 3;
638          return p;
639    
640          default:
641          break;
642          }
643        }   /* End of loop for ANY case */
644    
645      *lenptr = 0;  /* Must have hit the end */
646      return endptr;
647      }     /* End of overall switch */
648    }
649    
650    
651    
652    /*************************************************
653    *         Find start of previous line            *
654    *************************************************/
655    
656    /* This is called when looking back for before lines to print.
657    
658    Arguments:
659      p         start of the subsequent line
660      startptr  start of available data
661    
662    Returns:    pointer to the start of the previous line
663    */
664    
665    static char *
666    previous_line(char *p, char *startptr)
667    {
668    switch(endlinetype)
669      {
670      default:      /* Just in case */
671      case EL_LF:
672      p--;
673      while (p > startptr && p[-1] != '\n') p--;
674      return p;
675    
676      case EL_CR:
677      p--;
678      while (p > startptr && p[-1] != '\n') p--;
679      return p;
680    
681      case EL_CRLF:
682      for (;;)
683        {
684        p -= 2;
685        while (p > startptr && p[-1] != '\n') p--;
686        if (p <= startptr + 1 || p[-2] == '\r') return p;
687        }
688      return p;   /* But control should never get here */
689    
690      case EL_ANY:
691      case EL_ANYCRLF:
692      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
693      if (utf8) while ((*p & 0xc0) == 0x80) p--;
694    
695      while (p > startptr)
696        {
697        register int c;
698        char *pp = p - 1;
699    
700        if (utf8)
701          {
702          int extra = 0;
703          while ((*pp & 0xc0) == 0x80) pp--;
704          c = *((unsigned char *)pp);
705          if (c >= 0xc0)
706            {
707            int gcii, gcss;
708            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
709            gcss = 6*extra;
710            c = (c & utf8_table3[extra]) << gcss;
711            for (gcii = 1; gcii <= extra; gcii++)
712              {
713              gcss -= 6;
714              c |= (pp[gcii] & 0x3f) << gcss;
715              }
716            }
717          }
718        else c = *((unsigned char *)pp);
719    
720        if (endlinetype == EL_ANYCRLF) switch (c)
721          {
722          case 0x0a:    /* LF */
723          case 0x0d:    /* CR */
724          return p;
725    
726          default:
727          break;
728          }
729    
730        else switch (c)
731          {
732          case 0x0a:    /* LF */
733          case 0x0b:    /* VT */
734          case 0x0c:    /* FF */
735          case 0x0d:    /* CR */
736          case 0x85:    /* NEL */
737          case 0x2028:  /* LS */
738          case 0x2029:  /* PS */
739          return p;
740    
741          default:
742          break;
743          }
744    
745        p = pp;  /* Back one character */
746        }        /* End of loop for ANY case */
747    
748      return startptr;  /* Hit start of data */
749      }     /* End of overall switch */
750    }
751    
752    
753    
754    
755    
756    /*************************************************
757  *       Print the previous "after" lines         *  *       Print the previous "after" lines         *
758  *************************************************/  *************************************************/
759    
# Line 486  if (after_context > 0 && lastmatchnumber Line 778  if (after_context > 0 && lastmatchnumber
778    int count = 0;    int count = 0;
779    while (lastmatchrestart < endptr && count++ < after_context)    while (lastmatchrestart < endptr && count++ < after_context)
780      {      {
781        int ellength;
782      char *pp = lastmatchrestart;      char *pp = lastmatchrestart;
783      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
784      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
785      while (*pp != '\n') pp++;      pp = end_of_line(pp, endptr, &ellength);
786      fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
787      lastmatchrestart = pp + 1;      lastmatchrestart = pp;
788      }      }
789    hyphenpending = TRUE;    hyphenpending = TRUE;
790    }    }
# Line 548  way, the buffer is shifted left and re-f Line 841  way, the buffer is shifted left and re-f
841    
842  while (ptr < endptr)  while (ptr < endptr)
843    {    {
844    int i;    int i, endlinelength;
845    int mrc = 0;    int mrc = 0;
846    BOOL match = FALSE;    BOOL match = FALSE;
847    char *t = ptr;    char *t = ptr;
# Line 561  while (ptr < endptr) Line 854  while (ptr < endptr)
854    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
855    that any match is constrained to be in the first line. */    that any match is constrained to be in the first line. */
856    
857    linelength = 0;    t = end_of_line(t, endptr, &endlinelength);
858    while (t < endptr && *t++ != '\n') linelength++;    linelength = t - ptr - endlinelength;
859    length = multiline? endptr - ptr : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
860    
861      /* Extra processing for Jeffrey Friedl's debugging. */
862    
863    #ifdef JFRIEDL_DEBUG
864      if (jfriedl_XT || jfriedl_XR)
865      {
866          #include <sys/time.h>
867          #include <time.h>
868          struct timeval start_time, end_time;
869          struct timezone dummy;
870    
871          if (jfriedl_XT)
872          {
873              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
874              const char *orig = ptr;
875              ptr = malloc(newlen + 1);
876              if (!ptr) {
877                      printf("out of memory");
878                      exit(2);
879              }
880              endptr = ptr;
881              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
882              for (i = 0; i < jfriedl_XT; i++) {
883                      strncpy(endptr, orig,  length);
884                      endptr += length;
885              }
886              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
887              length = newlen;
888          }
889    
890          if (gettimeofday(&start_time, &dummy) != 0)
891                  perror("bad gettimeofday");
892    
893    
894          for (i = 0; i < jfriedl_XR; i++)
895              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
896    
897          if (gettimeofday(&end_time, &dummy) != 0)
898                  perror("bad gettimeofday");
899    
900          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
901                          -
902                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
903    
904          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
905          return 0;
906      }
907    #endif
908    
909    
910    /* Run through all the patterns until one matches. Note that we don't include    /* Run through all the patterns until one matches. Note that we don't include
911    the final newline in the subject string. */    the final newline in the subject string. */
# Line 646  while (ptr < endptr) Line 988  while (ptr < endptr)
988    
989        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
990          {          {
991            int ellength;
992          int linecount = 0;          int linecount = 0;
993          char *p = lastmatchrestart;          char *p = lastmatchrestart;
994    
995          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
996            {            {
997            while (*p != '\n') p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
998            linecount++;            linecount++;
999            }            }
1000    
# Line 665  while (ptr < endptr) Line 1007  while (ptr < endptr)
1007            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
1008            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1009            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1010            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1011            fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1012            lastmatchrestart = pp + 1;            lastmatchrestart = pp;
1013            }            }
1014          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
1015          }          }
# Line 693  while (ptr < endptr) Line 1035  while (ptr < endptr)
1035                 linecount < before_context)                 linecount < before_context)
1036            {            {
1037            linecount++;            linecount++;
1038            p--;            p = previous_line(p, buffer);
           while (p > buffer && p[-1] != '\n') p--;  
1039            }            }
1040    
1041          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 702  while (ptr < endptr) Line 1043  while (ptr < endptr)
1043    
1044          while (p < ptr)          while (p < ptr)
1045            {            {
1046              int ellength;
1047            char *pp = p;            char *pp = p;
1048            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1049            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1050            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1051            fwrite(p, 1, pp - p + 1, stdout);   /* In case binary zero */            fwrite(p, 1, pp - p, stdout);
1052            p = pp + 1;            p = pp;
1053            }            }
1054          }          }
1055    
# Line 723  while (ptr < endptr) Line 1065  while (ptr < endptr)
1065        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1066        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1067        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately. Because the PCRE_FIRSTLINE option is set, the
1068        start of the match will always be before the first \n character. */        start of the match will always be before the first newline sequence. */
1069    
1070        if (multiline)        if (multiline)
1071          {          {
1072            int ellength;
1073          char *endmatch = ptr + offsets[1];          char *endmatch = ptr + offsets[1];
1074          t = ptr;          t = ptr;
1075          while (t < endmatch) { if (*t++ == '\n') linenumber++; }          while (t < endmatch)
1076          while (endmatch < endptr && *endmatch != '\n') endmatch++;            {
1077          linelength = endmatch - ptr;            t = end_of_line(t, endptr, &ellength);
1078              if (t <= endmatch) linenumber++; else break;
1079              }
1080            endmatch = end_of_line(endmatch, endptr, &ellength);
1081            linelength = endmatch - ptr - ellength;
1082          }          }
1083    
1084        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 763  while (ptr < endptr) Line 1110  while (ptr < endptr)
1110          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1111          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
1112          }          }
1113        else fwrite(ptr, 1, linelength, stdout);        else fwrite(ptr, 1, linelength + endlinelength, stdout);
   
       fprintf(stdout, "\n");  
1114        }        }
1115    
1116      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match */
# Line 775  while (ptr < endptr) Line 1120  while (ptr < endptr)
1120      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1121      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1122    
1123      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1124      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1125      }      }
1126    
1127    /* Advance to after the newline and increment the line number. */    /* Advance to after the newline and increment the line number. */
1128    
1129    ptr += linelength + 1;    ptr += linelength + endlinelength;
1130    linenumber++;    linenumber++;
1131    
1132    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
# Line 1037  switch(letter) Line 1382  switch(letter)
1382    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1383    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
1384    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1385    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1386    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1387    case 'w': process_options |= PO_WORD_MATCH; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1388    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
1389    
1390    case 'V':    case 'V':
1391    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
1392    exit(0);    exit(0);
1393    break;    break;
1394    
# Line 1120  sprintf(buffer, "%s%.*s%s", prefix[proce Line 1464  sprintf(buffer, "%s%.*s%s", prefix[proce
1464    suffix[process_options]);    suffix[process_options]);
1465  pattern_list[pattern_count] =  pattern_list[pattern_count] =
1466    pcre_compile(buffer, options, &error, &errptr, pcretables);    pcre_compile(buffer, options, &error, &errptr, pcretables);
1467  if (pattern_list[pattern_count++] != NULL) return TRUE;  if (pattern_list[pattern_count] != NULL)
1468      {
1469      pattern_count++;
1470      return TRUE;
1471      }
1472    
1473  /* Handle compile errors */  /* Handle compile errors */
1474    
# Line 1152  return FALSE; Line 1500  return FALSE;
1500  *************************************************/  *************************************************/
1501    
1502  /* When the -F option has been used, each string may be a list of strings,  /* When the -F option has been used, each string may be a list of strings,
1503  separated by newlines. They will be matched literally.  separated by line breaks. They will be matched literally.
1504    
1505  Arguments:  Arguments:
1506    pattern        the pattern string    pattern        the pattern string
# Line 1170  compile_pattern(char *pattern, int optio Line 1518  compile_pattern(char *pattern, int optio
1518  {  {
1519  if ((process_options & PO_FIXED_STRINGS) != 0)  if ((process_options & PO_FIXED_STRINGS) != 0)
1520    {    {
1521      char *eop = pattern + strlen(pattern);
1522    char buffer[MBUFTHIRD];    char buffer[MBUFTHIRD];
1523    for(;;)    for(;;)
1524      {      {
1525      char *p = strchr(pattern, '\n');      int ellength;
1526      if (p == NULL)      char *p = end_of_line(pattern, eop, &ellength);
1527        if (ellength == 0)
1528        return compile_single_pattern(pattern, options, filename, count);        return compile_single_pattern(pattern, options, filename, count);
1529      sprintf(buffer, "%.*s", p - pattern, pattern);      sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1530      pattern = p + 1;      pattern = p;
1531      if (!compile_single_pattern(buffer, options, filename, count))      if (!compile_single_pattern(buffer, options, filename, count))
1532        return FALSE;        return FALSE;
1533      }      }
# Line 1200  int i, j; Line 1550  int i, j;
1550  int rc = 1;  int rc = 1;
1551  int pcre_options = 0;  int pcre_options = 0;
1552  int cmd_pattern_count = 0;  int cmd_pattern_count = 0;
1553    int hint_count = 0;
1554  int errptr;  int errptr;
1555  BOOL only_one_at_top;  BOOL only_one_at_top;
1556  char *patterns[MAX_PATTERN_COUNT];  char *patterns[MAX_PATTERN_COUNT];
1557  const char *locale_from = "--locale";  const char *locale_from = "--locale";
1558  const char *error;  const char *error;
1559    
1560    /* Set the default line ending value from the default in the PCRE library;
1561    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1562    */
1563    
1564    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1565    switch(i)
1566      {
1567      default:                 newline = (char *)"lf"; break;
1568      case '\r':               newline = (char *)"cr"; break;
1569      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1570      case -1:                 newline = (char *)"any"; break;
1571      case -2:                 newline = (char *)"anycrlf"; break;
1572      }
1573    
1574  /* Process the options */  /* Process the options */
1575    
1576  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
# Line 1261  for (i = 1; i < argc; i++) Line 1626  for (i = 1; i < argc; i++)
1626          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
1627            {            {
1628            int oplen = equals - op->long_name;            int oplen = equals - op->long_name;
1629            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1630            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1631              {              {
1632              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1280  for (i = 1; i < argc; i++) Line 1645  for (i = 1; i < argc; i++)
1645          char buff2[24];          char buff2[24];
1646          int baselen = opbra - op->long_name;          int baselen = opbra - op->long_name;
1647          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
1648          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1,
1649            opbra + 1);            (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1650          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1651            break;            break;
1652          }          }
# Line 1294  for (i = 1; i < argc; i++) Line 1659  for (i = 1; i < argc; i++)
1659        }        }
1660      }      }
1661    
1662    
1663      /* Jeffrey Friedl's debugging harness uses these additional options which
1664      are not in the right form for putting in the option table because they use
1665      only one hyphen, yet are more than one character long. By putting them
1666      separately here, they will not get displayed as part of the help() output,
1667      but I don't think Jeffrey will care about that. */
1668    
1669    #ifdef JFRIEDL_DEBUG
1670      else if (strcmp(argv[i], "-pre") == 0) {
1671              jfriedl_prefix = argv[++i];
1672              continue;
1673      } else if (strcmp(argv[i], "-post") == 0) {
1674              jfriedl_postfix = argv[++i];
1675              continue;
1676      } else if (strcmp(argv[i], "-XT") == 0) {
1677              sscanf(argv[++i], "%d", &jfriedl_XT);
1678              continue;
1679      } else if (strcmp(argv[i], "-XR") == 0) {
1680              sscanf(argv[++i], "%d", &jfriedl_XR);
1681              continue;
1682      }
1683    #endif
1684    
1685    
1686    /* One-char options; many that have no data may be in a single argument; we    /* One-char options; many that have no data may be in a single argument; we
1687    continue till we hit the last one or one that needs data. */    continue till we hit the last one or one that needs data. */
1688    
# Line 1333  for (i = 1; i < argc; i++) Line 1722  for (i = 1; i < argc; i++)
1722    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1723    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
1724    separate item. At the moment, the only such options are "colo(u)r" and    separate item. At the moment, the only such options are "colo(u)r" and
1725    Jeffrey Friedl's special debugging option. */    Jeffrey Friedl's special -S debugging option. */
1726    
1727    if (*option_data == 0 &&    if (*option_data == 0 &&
1728        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
# Line 1465  if (colour_option != NULL && strcmp(colo Line 1854  if (colour_option != NULL && strcmp(colo
1854      }      }
1855    }    }
1856    
1857    /* Interpret the newline type; the default settings are Unix-like. */
1858    
1859    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1860      {
1861      pcre_options |= PCRE_NEWLINE_CR;
1862      endlinetype = EL_CR;
1863      }
1864    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1865      {
1866      pcre_options |= PCRE_NEWLINE_LF;
1867      endlinetype = EL_LF;
1868      }
1869    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1870      {
1871      pcre_options |= PCRE_NEWLINE_CRLF;
1872      endlinetype = EL_CRLF;
1873      }
1874    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1875      {
1876      pcre_options |= PCRE_NEWLINE_ANY;
1877      endlinetype = EL_ANY;
1878      }
1879    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1880      {
1881      pcre_options |= PCRE_NEWLINE_ANYCRLF;
1882      endlinetype = EL_ANYCRLF;
1883      }
1884    else
1885      {
1886      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1887      return 2;
1888      }
1889    
1890  /* Interpret the text values for -d and -D */  /* Interpret the text values for -d and -D */
1891    
1892  if (dee_option != NULL)  if (dee_option != NULL)
# Line 1490  if (DEE_option != NULL) Line 1912  if (DEE_option != NULL)
1912      }      }
1913    }    }
1914    
1915  /* Check the value for Jeff Friedl's debugging option. */  /* Check the values for Jeffrey Friedl's debugging options. */
1916    
1917  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
1918  if (S_arg > 9)  if (S_arg > 9)
# Line 1498  if (S_arg > 9) Line 1920  if (S_arg > 9)
1920    fprintf(stderr, "pcregrep: bad value for -S option\n");    fprintf(stderr, "pcregrep: bad value for -S option\n");
1921    return 2;    return 2;
1922    }    }
1923    if (jfriedl_XT != 0 || jfriedl_XR != 0)
1924      {
1925      if (jfriedl_XT == 0) jfriedl_XT = 1;
1926      if (jfriedl_XR == 0) jfriedl_XR = 1;
1927      }
1928  #endif  #endif
1929    
1930  /* Get memory to store the pattern and hints lists. */  /* Get memory to store the pattern and hints lists. */
# Line 1508  hints_list = (pcre_extra **)malloc(MAX_P Line 1935  hints_list = (pcre_extra **)malloc(MAX_P
1935  if (pattern_list == NULL || hints_list == NULL)  if (pattern_list == NULL || hints_list == NULL)
1936    {    {
1937    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
1938    return 2;    goto EXIT2;
1939    }    }
1940    
1941  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there is no file provided by -f,
# Line 1527  for (j = 0; j < cmd_pattern_count; j++) Line 1954  for (j = 0; j < cmd_pattern_count; j++)
1954    {    {
1955    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(patterns[j], pcre_options, NULL,
1956         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1957      return 2;      goto EXIT2;
1958    }    }
1959    
1960  /* Compile the regular expressions that are provided in a file. */  /* Compile the regular expressions that are provided in a file. */
# Line 1551  if (pattern_filename != NULL) Line 1978  if (pattern_filename != NULL)
1978        {        {
1979        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1980          strerror(errno));          strerror(errno));
1981        return 2;        goto EXIT2;
1982        }        }
1983      filename = pattern_filename;      filename = pattern_filename;
1984      }      }
# Line 1564  if (pattern_filename != NULL) Line 1991  if (pattern_filename != NULL)
1991      linenumber++;      linenumber++;
1992      if (buffer[0] == 0) continue;   /* Skip blank lines */      if (buffer[0] == 0) continue;   /* Skip blank lines */
1993      if (!compile_pattern(buffer, pcre_options, filename, linenumber))      if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1994        return 2;        goto EXIT2;
1995      }      }
1996    
1997    if (f != stdin) fclose(f);    if (f != stdin) fclose(f);
# Line 1580  for (j = 0; j < pattern_count; j++) Line 2007  for (j = 0; j < pattern_count; j++)
2007      char s[16];      char s[16];
2008      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2009      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2010      return 2;      goto EXIT2;
2011      }      }
2012      hint_count++;
2013    }    }
2014    
2015  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
# Line 1594  if (exclude_pattern != NULL) Line 2022  if (exclude_pattern != NULL)
2022      {      {
2023      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2024        errptr, error);        errptr, error);
2025      return 2;      goto EXIT2;
2026      }      }
2027    }    }
2028    
# Line 1606  if (include_pattern != NULL) Line 2034  if (include_pattern != NULL)
2034      {      {
2035      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2036        errptr, error);        errptr, error);
2037      return 2;      goto EXIT2;
2038      }      }
2039    }    }
2040    
2041  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */
2042    
2043  if (i >= argc)  if (i >= argc)
2044    return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    {
2045      rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2046      goto EXIT;
2047      }
2048    
2049  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2050  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
# Line 1630  for (; i < argc; i++) Line 2061  for (; i < argc; i++)
2061      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
2062    }    }
2063    
2064    EXIT:
2065    if (pattern_list != NULL)
2066      {
2067      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2068      free(pattern_list);
2069      }
2070    if (hints_list != NULL)
2071      {
2072      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2073      free(hints_list);
2074      }
2075  return rc;  return rc;
2076    
2077    EXIT2:
2078    rc = 2;
2079    goto EXIT;
2080  }  }
2081    
2082  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.87  
changed lines
  Added in v.199

  ViewVC Help
Powered by ViewVC 1.1.5