/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 564 by ph10, Sun Oct 31 16:07:24 2010 UTC revision 779 by ph10, Fri Dec 2 10:39:32 2011 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2010 University of Cambridge             Copyright (c) 1997-2011 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 74  typedef int BOOL; Line 74  typedef int BOOL;
74  #define OFFSET_SIZE 99  #define OFFSET_SIZE 99
75    
76  #if BUFSIZ > 8192  #if BUFSIZ > 8192
77  #define MBUFTHIRD BUFSIZ  #define PATBUFSIZE BUFSIZ
78  #else  #else
79  #define MBUFTHIRD 8192  #define PATBUFSIZE 8192
80  #endif  #endif
81    
82  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
# Line 135  static char *colour_string = (char *)"1; Line 135  static char *colour_string = (char *)"1;
135  static char *colour_option = NULL;  static char *colour_option = NULL;
136  static char *dee_option = NULL;  static char *dee_option = NULL;
137  static char *DEE_option = NULL;  static char *DEE_option = NULL;
138    static char *main_buffer = NULL;
139  static char *newline = NULL;  static char *newline = NULL;
140  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
141  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
# Line 159  static pcre *exclude_dir_compiled = NULL Line 160  static pcre *exclude_dir_compiled = NULL
160  static int after_context = 0;  static int after_context = 0;
161  static int before_context = 0;  static int before_context = 0;
162  static int both_context = 0;  static int both_context = 0;
163    static int bufthird = PCREGREP_BUFSIZE;
164    static int bufsize = 3*PCREGREP_BUFSIZE;
165  static int dee_action = dee_READ;  static int dee_action = dee_READ;
166  static int DEE_action = DEE_READ;  static int DEE_action = DEE_READ;
167  static int error_count = 0;  static int error_count = 0;
168  static int filenames = FN_DEFAULT;  static int filenames = FN_DEFAULT;
169    static int only_matching = -1;
170  static int process_options = 0;  static int process_options = 0;
171    
172    #ifdef SUPPORT_PCREGREP_JIT
173    static int study_options = PCRE_STUDY_JIT_COMPILE;
174    #else
175    static int study_options = 0;
176    #endif
177    
178  static unsigned long int match_limit = 0;  static unsigned long int match_limit = 0;
179  static unsigned long int match_limit_recursion = 0;  static unsigned long int match_limit_recursion = 0;
180    
# Line 178  static BOOL line_offsets = FALSE; Line 188  static BOOL line_offsets = FALSE;
188  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
189  static BOOL number = FALSE;  static BOOL number = FALSE;
190  static BOOL omit_zero_count = FALSE;  static BOOL omit_zero_count = FALSE;
 static BOOL only_matching = FALSE;  
191  static BOOL resource_error = FALSE;  static BOOL resource_error = FALSE;
192  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
193  static BOOL silent = FALSE;  static BOOL silent = FALSE;
# Line 186  static BOOL utf8 = FALSE; Line 195  static BOOL utf8 = FALSE;
195    
196  /* Structure for options and list of them */  /* Structure for options and list of them */
197    
198  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
199         OP_PATLIST };         OP_OP_NUMBER, OP_PATLIST };
200    
201  typedef struct option_item {  typedef struct option_item {
202    int type;    int type;
# Line 214  used to identify them. */ Line 223  used to identify them. */
223  #define N_LBUFFER      (-12)  #define N_LBUFFER      (-12)
224  #define N_M_LIMIT      (-13)  #define N_M_LIMIT      (-13)
225  #define N_M_LIMIT_REC  (-14)  #define N_M_LIMIT_REC  (-14)
226    #define N_BUFSIZE      (-15)
227    #define N_NOJIT        (-16)
228    
229  static option_item optionlist[] = {  static option_item optionlist[] = {
230    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,     N_NULL,   NULL,              "",              "  terminate options" },
231    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
232    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
233    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
234    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },    { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
235    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
236    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
237    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
238    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
239    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
240    { OP_PATLIST,   'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
241    { OP_NODATA,    'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },    { OP_PATLIST,    'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
242    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
243    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },    { OP_STRING,     'f',      &pattern_filename, "file=path",     "read patterns from file" },
244    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
245    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
246    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
247    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
248    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },  #ifdef SUPPORT_PCREGREP_JIT
249    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
250    { OP_NODATA,    N_LBUFFER, NULL,             "line-buffered", "use line buffering" },  #else
251    { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
252    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },  #endif
253    { OP_NUMBER,    N_M_LIMIT,&match_limit,      "match-limit=number", "set PCRE match limit option" },    { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
254    { OP_NUMBER,    N_M_LIMIT_REC,&match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },    { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
255    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
256    { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },    { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
257    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
258    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
259    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
260    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
261    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
262    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
263      { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
264      { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },
265      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
266      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
267      { OP_STRING,     N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
268      { OP_STRING,     N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
269      { OP_STRING,     N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
270      { OP_STRING,     N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
271    
272      /* These two were accidentally implemented with underscores instead of
273      hyphens in the option names. As this was not discovered for several releases,
274      the incorrect versions are left in the table for compatibility. However, the
275      --help function misses out any option that has an underscore in its name. */
276    
277    { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },    { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
278    { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },    { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
279    
280  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
281    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
282  #endif  #endif
# Line 287  const char utf8_table4[] = { Line 313  const char utf8_table4[] = {
313    
314    
315  /*************************************************  /*************************************************
316    *         Exit from the program                  *
317    *************************************************/
318    
319    /* If there has been a resource error, give a suitable message.
320    
321    Argument:  the return code
322    Returns:   does not return
323    */
324    
325    static void
326    pcregrep_exit(int rc)
327    {
328    if (resource_error)
329      {
330      fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
331        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
332        PCRE_ERROR_JIT_STACKLIMIT);
333      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
334      }
335    
336    exit(rc);
337    }
338    
339    
340    /*************************************************
341  *            OS-specific functions               *  *            OS-specific functions               *
342  *************************************************/  *************************************************/
343    
# Line 556  return sys_errlist[n]; Line 607  return sys_errlist[n];
607    
608    
609  /*************************************************  /*************************************************
 *         Exit from the program                  *  
 *************************************************/  
   
 /* If there has been a resource error, give a suitable message.  
   
 Argument:  the return code  
 Returns:   does not return  
 */  
   
 static void  
 pcregrep_exit(int rc)  
 {  
 if (resource_error)  
   {  
   fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "  
     "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);  
   fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");  
   }  
   
 exit(rc);  
 }  
   
   
   
 /*************************************************  
610  *            Read one line of input              *  *            Read one line of input              *
611  *************************************************/  *************************************************/
612    
# Line 626  Arguments: Line 652  Arguments:
652    endptr    end of available data    endptr    end of available data
653    lenptr    where to put the length of the eol sequence    lenptr    where to put the length of the eol sequence
654    
655  Returns:    pointer to the last byte of the line  Returns:    pointer after the last byte of the line,
656                including the newline byte(s)
657  */  */
658    
659  static char *  static char *
# Line 927  is used multiple times for the same subj Line 954  is used multiple times for the same subj
954  to find all possible matches.  to find all possible matches.
955    
956  Arguments:  Arguments:
957    matchptr    the start of the subject    matchptr     the start of the subject
958    length      the length of the subject to match    length       the length of the subject to match
959    offsets     the offets vector to fill in    startoffset  where to start matching
960    mrc         address of where to put the result of pcre_exec()    offsets      the offets vector to fill in
961      mrc          address of where to put the result of pcre_exec()
962    
963  Returns:      TRUE if there was a match  Returns:      TRUE if there was a match
964                FALSE if there was no match                FALSE if there was no match
# Line 938  Returns:      TRUE if there was a match Line 966  Returns:      TRUE if there was a match
966  */  */
967    
968  static BOOL  static BOOL
969  match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)  match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
970      int *mrc)
971  {  {
972  int i;  int i;
973  size_t slen = length;  size_t slen = length;
# Line 947  if (slen > 200) Line 976  if (slen > 200)
976    {    {
977    slen = 200;    slen = 200;
978    msg = "text that starts:\n\n";    msg = "text that starts:\n\n";
979    }    }
980  for (i = 0; i < pattern_count; i++)  for (i = 0; i < pattern_count; i++)
981    {    {
982    *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,    *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
983      PCRE_NOTEMPTY, offsets, OFFSET_SIZE);      startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
984    if (*mrc >= 0) return TRUE;    if (*mrc >= 0) return TRUE;
985    if (*mrc == PCRE_ERROR_NOMATCH) continue;    if (*mrc == PCRE_ERROR_NOMATCH) continue;
986    fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);    fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
# Line 959  for (i = 0; i < pattern_count; i++) Line 988  for (i = 0; i < pattern_count; i++)
988    fprintf(stderr, "%s", msg);    fprintf(stderr, "%s", msg);
989    FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */    FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
990    fprintf(stderr, "\n\n");    fprintf(stderr, "\n\n");
991    if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)    if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
992          *mrc == PCRE_ERROR_JIT_STACKLIMIT)
993      resource_error = TRUE;      resource_error = TRUE;
994    if (error_count++ > 20)    if (error_count++ > 20)
995      {      {
# Line 979  return FALSE;  /* No match, no errors */ Line 1009  return FALSE;  /* No match, no errors */
1009  *************************************************/  *************************************************/
1010    
1011  /* This is called from grep_or_recurse() below. It uses a buffer that is three  /* This is called from grep_or_recurse() below. It uses a buffer that is three
1012  times the value of MBUFTHIRD. The matching point is never allowed to stray into  times the value of bufthird. The matching point is never allowed to stray into
1013  the top third of the buffer, thus keeping more of the file available for  the top third of the buffer, thus keeping more of the file available for
1014  context printing or for multiline scanning. For large files, the pointer will  context printing or for multiline scanning. For large files, the pointer will
1015  be in the middle third most of the time, so the bottom third is available for  be in the middle third most of the time, so the bottom third is available for
# Line 990  Arguments: Line 1020  Arguments:
1020                 the gzFile pointer when reading is via libz                 the gzFile pointer when reading is via libz
1021                 the BZFILE pointer when reading is via libbz2                 the BZFILE pointer when reading is via libbz2
1022    frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2    frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1023      filename     the file name or NULL (for errors)
1024    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
1025                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
1026                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
1027    
1028  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
1029                 1 otherwise (no matches)                 1 otherwise (no matches)
1030                 2 if there is a read error on a .bz2 file                 2 if an overlong line is encountered
1031                   3 if there is a read error on a .bz2 file
1032  */  */
1033    
1034  static int  static int
1035  pcregrep(void *handle, int frtype, char *printname)  pcregrep(void *handle, int frtype, char *filename, char *printname)
1036  {  {
1037  int rc = 1;  int rc = 1;
1038  int linenumber = 1;  int linenumber = 1;
# Line 1009  int count = 0; Line 1041  int count = 0;
1041  int filepos = 0;  int filepos = 0;
1042  int offsets[OFFSET_SIZE];  int offsets[OFFSET_SIZE];
1043  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
1044  char buffer[3*MBUFTHIRD];  char *ptr = main_buffer;
 char *ptr = buffer;  
1045  char *endptr;  char *endptr;
1046  size_t bufflength;  size_t bufflength;
1047  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
# Line 1035  fail. */ Line 1066  fail. */
1066  if (frtype == FR_LIBZ)  if (frtype == FR_LIBZ)
1067    {    {
1068    ingz = (gzFile)handle;    ingz = (gzFile)handle;
1069    bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);    bufflength = gzread (ingz, main_buffer, bufsize);
1070    }    }
1071  else  else
1072  #endif  #endif
# Line 1044  else Line 1075  else
1075  if (frtype == FR_LIBBZ2)  if (frtype == FR_LIBBZ2)
1076    {    {
1077    inbz2 = (BZFILE *)handle;    inbz2 = (BZFILE *)handle;
1078    bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);    bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1079    if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */    if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1080    }                                    /* without the cast it is unsigned. */    }                                    /* without the cast it is unsigned. */
1081  else  else
# Line 1054  else Line 1085  else
1085    in = (FILE *)handle;    in = (FILE *)handle;
1086    if (is_file_tty(in)) input_line_buffered = TRUE;    if (is_file_tty(in)) input_line_buffered = TRUE;
1087    bufflength = input_line_buffered?    bufflength = input_line_buffered?
1088      read_one_line(buffer, 3*MBUFTHIRD, in) :      read_one_line(main_buffer, bufsize, in) :
1089      fread(buffer, 1, 3*MBUFTHIRD, in);      fread(main_buffer, 1, bufsize, in);
1090    }    }
1091    
1092  endptr = buffer + bufflength;  endptr = main_buffer + bufflength;
1093    
1094  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
1095  files, endptr will be at the end of the buffer when we are in the middle of the  files, endptr will be at the end of the buffer when we are in the middle of the
# Line 1069  while (ptr < endptr) Line 1100  while (ptr < endptr)
1100    {    {
1101    int endlinelength;    int endlinelength;
1102    int mrc = 0;    int mrc = 0;
1103      int startoffset = 0;
1104    BOOL match;    BOOL match;
1105    char *matchptr = ptr;    char *matchptr = ptr;
1106    char *t = ptr;    char *t = ptr;
# Line 1086  while (ptr < endptr) Line 1118  while (ptr < endptr)
1118    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
1119    length = multiline? (size_t)(endptr - ptr) : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
1120    
1121      /* Check to see if the line we are looking at extends right to the very end
1122      of the buffer without a line terminator. This means the line is too long to
1123      handle. */
1124    
1125      if (endlinelength == 0 && t == main_buffer + bufsize)
1126        {
1127        fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1128                        "pcregrep: check the --buffer-size option\n",
1129                        linenumber,
1130                        (filename == NULL)? "" : " of file ",
1131                        (filename == NULL)? "" : filename);
1132        return 2;
1133        }
1134    
1135    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
1136    
1137  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
# Line 1145  while (ptr < endptr) Line 1191  while (ptr < endptr)
1191    than NOMATCH. This code is in a subroutine so that it can be re-used for    than NOMATCH. This code is in a subroutine so that it can be re-used for
1192    finding subsequent matches when colouring matched lines. */    finding subsequent matches when colouring matched lines. */
1193    
1194    match = match_patterns(matchptr, length, offsets, &mrc);    match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1195    
1196    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1197    
# Line 1174  while (ptr < endptr) Line 1220  while (ptr < endptr)
1220    
1221      else if (quiet) return 0;      else if (quiet) return 0;
1222    
1223      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched, or a
1224      the --file-offsets and --line-offsets options output offsets for the      captured portion of it, as long as this string is not empty, and the
1225      matching substring (they both force --only-matching). None of these options      --file-offsets and --line-offsets options output offsets for the matching
1226      prints any context. Afterwards, adjust the start and length, and then jump      substring (they both force --only-matching = 0). None of these options
1227      back to look for further matches in the same line. If we are in invert      prints any context. Afterwards, adjust the start and then jump back to look
1228      mode, however, nothing is printed - this could be still useful because the      for further matches in the same line. If we are in invert mode, however,
1229      return code is set. */      nothing is printed and we do not restart - this could still be useful
1230        because the return code is set. */
1231    
1232      else if (only_matching)      else if (only_matching >= 0)
1233        {        {
1234        if (!invert)        if (!invert)
1235          {          {
1236          if (printname != NULL) fprintf(stdout, "%s:", printname);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1237          if (number) fprintf(stdout, "%d:", linenumber);          if (number) fprintf(stdout, "%d:", linenumber);
1238          if (line_offsets)          if (line_offsets)
1239            fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),            fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1240              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1241          else if (file_offsets)          else if (file_offsets)
1242            fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),            fprintf(stdout, "%d,%d\n",
1243                (int)(filepos + matchptr + offsets[0] - ptr),
1244              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1245          else          else if (only_matching < mrc)
1246            {            {
1247            if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);            int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1248            FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);            if (plen > 0)
1249            if (do_colour) fprintf(stdout, "%c[00m", 0x1b);              {
1250                if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1251                FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1252                if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1253                fprintf(stdout, "\n");
1254                }
1255            }            }
1256          fprintf(stdout, "\n");          else if (printname != NULL || number) fprintf(stdout, "\n");
         matchptr += offsets[1];  
         length -= offsets[1];  
1257          match = FALSE;          match = FALSE;
1258          if (line_buffered) fflush(stdout);          if (line_buffered) fflush(stdout);
1259          rc = 0;    /* Had some success */          rc = 0;                      /* Had some success */
1260            startoffset = offsets[1];    /* Restart after the match */
1261          goto ONLY_MATCHING_RESTART;          goto ONLY_MATCHING_RESTART;
1262          }          }
1263        }        }
# Line 1264  while (ptr < endptr) Line 1316  while (ptr < endptr)
1316          int linecount = 0;          int linecount = 0;
1317          char *p = ptr;          char *p = ptr;
1318    
1319          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&          while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1320                 linecount < before_context)                 linecount < before_context)
1321            {            {
1322            linecount++;            linecount++;
1323            p = previous_line(p, buffer);            p = previous_line(p, main_buffer);
1324            }            }
1325    
1326          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 1301  while (ptr < endptr) Line 1353  while (ptr < endptr)
1353        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1354        the match will always be before the first newline sequence. */        the match will always be before the first newline sequence. */
1355    
1356        if (multiline)        if (multiline & !invert)
1357          {          {
1358          int ellength;          char *endmatch = ptr + offsets[1];
1359          char *endmatch = ptr;          t = ptr;
1360          if (!invert)          while (t < endmatch)
1361            {            {
1362            endmatch += offsets[1];            t = end_of_line(t, endptr, &endlinelength);
1363            t = ptr;            if (t < endmatch) linenumber++; else break;
           while (t < endmatch)  
             {  
             t = end_of_line(t, endptr, &ellength);  
             if (t <= endmatch) linenumber++; else break;  
             }  
1364            }            }
1365          endmatch = end_of_line(endmatch, endptr, &ellength);          linelength = t - ptr - endlinelength;
         linelength = endmatch - ptr - ellength;  
1366          }          }
1367    
1368        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 1339  while (ptr < endptr) Line 1385  while (ptr < endptr)
1385  #endif  #endif
1386    
1387        /* We have to split the line(s) up if colouring, and search for further        /* We have to split the line(s) up if colouring, and search for further
1388        matches. */        matches, but not of course if the line is a non-match. */
1389    
1390        if (do_colour)        if (do_colour && !invert)
1391          {          {
1392          int last_offset = 0;          int plength;
1393          FWRITE(ptr, 1, offsets[0], stdout);          FWRITE(ptr, 1, offsets[0], stdout);
1394          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1395          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1396          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1397          for (;;)          for (;;)
1398            {            {
1399            last_offset += offsets[1];            startoffset = offsets[1];
1400            matchptr += offsets[1];            if (startoffset >= (int)linelength + endlinelength ||
1401            length -= offsets[1];                !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1402            if (!match_patterns(matchptr, length, offsets, &mrc)) break;              break;
1403            FWRITE(matchptr, 1, offsets[0], stdout);            FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1404            fprintf(stdout, "%c[%sm", 0x1b, colour_string);            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1405            FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);            FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1406            fprintf(stdout, "%c[00m", 0x1b);            fprintf(stdout, "%c[00m", 0x1b);
1407            }            }
1408          FWRITE(ptr + last_offset, 1,  
1409            (linelength + endlinelength) - last_offset, stdout);          /* In multiline mode, we may have already printed the complete line
1410            and its line-ending characters (if they matched the pattern), so there
1411            may be no more to print. */
1412    
1413            plength = (int)((linelength + endlinelength) - startoffset);
1414            if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1415          }          }
1416    
1417        /* Not colouring; no need to search for further matches */        /* Not colouring; no need to search for further matches */
# Line 1409  while (ptr < endptr) Line 1460  while (ptr < endptr)
1460    /* If input is line buffered, and the buffer is not yet full, read another    /* If input is line buffered, and the buffer is not yet full, read another
1461    line and add it into the buffer. */    line and add it into the buffer. */
1462    
1463    if (input_line_buffered && bufflength < sizeof(buffer))    if (input_line_buffered && bufflength < (size_t)bufsize)
1464      {      {
1465      int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);      int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1466      bufflength += add;      bufflength += add;
1467      endptr += add;      endptr += add;
1468      }      }
# Line 1421  while (ptr < endptr) Line 1472  while (ptr < endptr)
1472    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
1473    about to be lost, print them. */    about to be lost, print them. */
1474    
1475    if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)    if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1476      {      {
1477      if (after_context > 0 &&      if (after_context > 0 &&
1478          lastmatchnumber > 0 &&          lastmatchnumber > 0 &&
1479          lastmatchrestart < buffer + MBUFTHIRD)          lastmatchrestart < main_buffer + bufthird)
1480        {        {
1481        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1482        lastmatchnumber = 0;        lastmatchnumber = 0;
# Line 1433  while (ptr < endptr) Line 1484  while (ptr < endptr)
1484    
1485      /* Now do the shuffle */      /* Now do the shuffle */
1486    
1487      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1488      ptr -= MBUFTHIRD;      ptr -= bufthird;
1489    
1490  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
1491      if (frtype == FR_LIBZ)      if (frtype == FR_LIBZ)
1492        bufflength = 2*MBUFTHIRD +        bufflength = 2*bufthird +
1493          gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);          gzread (ingz, main_buffer + 2*bufthird, bufthird);
1494      else      else
1495  #endif  #endif
1496    
1497  #ifdef SUPPORT_LIBBZ2  #ifdef SUPPORT_LIBBZ2
1498      if (frtype == FR_LIBBZ2)      if (frtype == FR_LIBBZ2)
1499        bufflength = 2*MBUFTHIRD +        bufflength = 2*bufthird +
1500          BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);          BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1501      else      else
1502  #endif  #endif
1503    
1504      bufflength = 2*MBUFTHIRD +      bufflength = 2*bufthird +
1505        (input_line_buffered?        (input_line_buffered?
1506         read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :         read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1507         fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));         fread(main_buffer + 2*bufthird, 1, bufthird, in));
1508      endptr = buffer + bufflength;      endptr = main_buffer + bufflength;
1509    
1510      /* Adjust any last match point */      /* Adjust any last match point */
1511    
1512      if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;      if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1513      }      }
1514    }     /* Loop through the whole file */    }     /* Loop through the whole file */
1515    
1516  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
1517  hyphenpending if it prints something. */  hyphenpending if it prints something. */
1518    
1519  if (!only_matching && !count_only)  if (only_matching < 0 && !count_only)
1520    {    {
1521    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1522    hyphenpending |= endhyphenpending;    hyphenpending |= endhyphenpending;
# Line 1538  BZFILE *inbz2 = NULL; Line 1589  BZFILE *inbz2 = NULL;
1589    
1590  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1591    {    {
1592    return pcregrep(stdin, FR_PLAIN,    return pcregrep(stdin, FR_PLAIN, stdin_name,
1593      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1594        stdin_name : NULL);        stdin_name : NULL);
1595    }    }
# Line 1670  if (handle == NULL) Line 1721  if (handle == NULL)
1721    
1722  /* Now grep the file */  /* Now grep the file */
1723    
1724  rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||  rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1725    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1726    
1727  /* Close in an appropriate manner. */  /* Close in an appropriate manner. */
# Line 1681  if (frtype == FR_LIBZ) Line 1732  if (frtype == FR_LIBZ)
1732  else  else
1733  #endif  #endif
1734    
1735  /* If it is a .bz2 file and the result is 2, it means that the first attempt to  /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1736  read failed. If the error indicates that the file isn't in fact bzipped, try  read failed. If the error indicates that the file isn't in fact bzipped, try
1737  again as a normal file. */  again as a normal file. */
1738    
1739  #ifdef SUPPORT_LIBBZ2  #ifdef SUPPORT_LIBBZ2
1740  if (frtype == FR_LIBBZ2)  if (frtype == FR_LIBBZ2)
1741    {    {
1742    if (rc == 2)    if (rc == 3)
1743      {      {
1744      int errnum;      int errnum;
1745      const char *err = BZ2_bzerror(inbz2, &errnum);      const char *err = BZ2_bzerror(inbz2, &errnum);
# Line 1700  if (frtype == FR_LIBBZ2) Line 1751  if (frtype == FR_LIBBZ2)
1751      else if (!silent)      else if (!silent)
1752        fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",        fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1753          pathname, err);          pathname, err);
1754        rc = 2;    /* The normal "something went wrong" code */
1755      }      }
1756    BZ2_bzclose(inbz2);    BZ2_bzclose(inbz2);
1757    }    }
# Line 1775  for (op = optionlist; op->one_char != 0; Line 1827  for (op = optionlist; op->one_char != 0;
1827    {    {
1828    int n;    int n;
1829    char s[4];    char s[4];
1830    
1831      /* Two options were accidentally implemented and documented with underscores
1832      instead of hyphens in their names, something that was not noticed for quite a
1833      few releases. When fixing this, I left the underscored versions in the list
1834      in case people were using them. However, we don't want to display them in the
1835      help data. There are no other options that contain underscores, and we do not
1836      expect ever to implement such options. Therefore, just omit any option that
1837      contains an underscore. */
1838    
1839      if (strchr(op->long_name, '_') != NULL) continue;
1840    
1841    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1842    n = 30 - printf("  %s --%s", s, op->long_name);    n = 31 - printf("  %s --%s", s, op->long_name);
1843    if (n < 1) n = 1;    if (n < 1) n = 1;
1844    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                     ", op->help_text);
1845    }    }
1846    
1847  printf("\nWhen reading patterns from a file instead of using a command line option,\n");  printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
1848    printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
1849    printf("When reading patterns from a file instead of using a command line option,\n");
1850  printf("trailing white space is removed and blank lines are ignored.\n");  printf("trailing white space is removed and blank lines are ignored.\n");
1851  printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",
1852      MAX_PATTERN_COUNT, PATBUFSIZE);
1853    
1854  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1855  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
# Line 1803  switch(letter) Line 1869  switch(letter)
1869    {    {
1870    case N_FOFFSETS: file_offsets = TRUE; break;    case N_FOFFSETS: file_offsets = TRUE; break;
1871    case N_HELP: help(); pcregrep_exit(0);    case N_HELP: help(); pcregrep_exit(0);
   case N_LOFFSETS: line_offsets = number = TRUE; break;  
1872    case N_LBUFFER: line_buffered = TRUE; break;    case N_LBUFFER: line_buffered = TRUE; break;
1873      case N_LOFFSETS: line_offsets = number = TRUE; break;
1874      case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
1875    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1876    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1877    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
# Line 1814  switch(letter) Line 1881  switch(letter)
1881    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1882    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1883    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
1884    case 'o': only_matching = TRUE; break;    case 'o': only_matching = 0; break;
1885    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1886    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
1887    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
# Line 1885  Returns:         TRUE on success, FALSE Line 1952  Returns:         TRUE on success, FALSE
1952  static BOOL  static BOOL
1953  compile_single_pattern(char *pattern, int options, char *filename, int count)  compile_single_pattern(char *pattern, int options, char *filename, int count)
1954  {  {
1955  char buffer[MBUFTHIRD + 16];  char buffer[PATBUFSIZE];
1956  const char *error;  const char *error;
1957  int errptr;  int errptr;
1958    
# Line 1896  if (pattern_count >= MAX_PATTERN_COUNT) Line 1963  if (pattern_count >= MAX_PATTERN_COUNT)
1963    return FALSE;    return FALSE;
1964    }    }
1965    
1966  sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,  sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,
1967    suffix[process_options]);    suffix[process_options]);
1968  pattern_list[pattern_count] =  pattern_list[pattern_count] =
1969    pcre_compile(buffer, options, &error, &errptr, pcretables);    pcre_compile(buffer, options, &error, &errptr, pcretables);
# Line 1955  compile_pattern(char *pattern, int optio Line 2022  compile_pattern(char *pattern, int optio
2022  if ((process_options & PO_FIXED_STRINGS) != 0)  if ((process_options & PO_FIXED_STRINGS) != 0)
2023    {    {
2024    char *eop = pattern + strlen(pattern);    char *eop = pattern + strlen(pattern);
2025    char buffer[MBUFTHIRD];    char buffer[PATBUFSIZE];
2026    for(;;)    for(;;)
2027      {      {
2028      int ellength;      int ellength;
# Line 1993  char *patterns[MAX_PATTERN_COUNT]; Line 2060  char *patterns[MAX_PATTERN_COUNT];
2060  const char *locale_from = "--locale";  const char *locale_from = "--locale";
2061  const char *error;  const char *error;
2062    
2063    #ifdef SUPPORT_PCREGREP_JIT
2064    pcre_jit_stack *jit_stack = NULL;
2065    #endif
2066    
2067  /* Set the default line ending value from the default in the PCRE library;  /* Set the default line ending value from the default in the PCRE library;
2068  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2069  Note that the return values from pcre_config(), though derived from the ASCII  Note that the return values from pcre_config(), though derived from the ASCII
# Line 2154  for (i = 1; i < argc; i++) Line 2225  for (i = 1; i < argc; i++)
2225      while (*s != 0)      while (*s != 0)
2226        {        {
2227        for (op = optionlist; op->one_char != 0; op++)        for (op = optionlist; op->one_char != 0; op++)
2228          { if (*s == op->one_char) break; }          {
2229            if (*s == op->one_char) break;
2230            }
2231        if (op->one_char == 0)        if (op->one_char == 0)
2232          {          {
2233          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2234            *s, argv[i]);            *s, argv[i]);
2235          pcregrep_exit(usage(2));          pcregrep_exit(usage(2));
2236          }          }
2237        if (op->type != OP_NODATA || s[1] == 0)  
2238          /* Check for a single-character option that has data: OP_OP_NUMBER
2239          is used for one that either has a numerical number or defaults, i.e. the
2240          data is optional. If a digit follows, there is data; if not, carry on
2241          with other single-character options in the same string. */
2242    
2243          option_data = s+1;
2244          if (op->type == OP_OP_NUMBER)
2245            {
2246            if (isdigit((unsigned char)s[1])) break;
2247            }
2248          else   /* Check for end or a dataless option */
2249          {          {
2250          option_data = s+1;          if (op->type != OP_NODATA || s[1] == 0) break;
         break;  
2251          }          }
2252    
2253          /* Handle a single-character option with no data, then loop for the
2254          next character in the string. */
2255    
2256        pcre_options = handle_option(*s++, pcre_options);        pcre_options = handle_option(*s++, pcre_options);
2257        }        }
2258      }      }
# Line 2182  for (i = 1; i < argc; i++) Line 2269  for (i = 1; i < argc; i++)
2269    
2270    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2271    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
2272    separate item. At the moment, the only such options are "colo(u)r" and    separate item. At the moment, the only such options are "colo(u)r",
2273    Jeffrey Friedl's special -S debugging option. */    "only-matching", and Jeffrey Friedl's special -S debugging option. */
2274    
2275    if (*option_data == 0 &&    if (*option_data == 0 &&
2276        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
# Line 2193  for (i = 1; i < argc; i++) Line 2280  for (i = 1; i < argc; i++)
2280        case N_COLOUR:        case N_COLOUR:
2281        colour_option = (char *)"auto";        colour_option = (char *)"auto";
2282        break;        break;
2283    
2284          case 'o':
2285          only_matching = 0;
2286          break;
2287    
2288  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
2289        case 'S':        case 'S':
2290        S_arg = 0;        S_arg = 0;
# Line 2230  for (i = 1; i < argc; i++) Line 2322  for (i = 1; i < argc; i++)
2322    
2323    /* Otherwise, deal with single string or numeric data values. */    /* Otherwise, deal with single string or numeric data values. */
2324    
2325    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)    else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2326               op->type != OP_OP_NUMBER)
2327      {      {
2328      *((char **)op->dataptr) = option_data;      *((char **)op->dataptr) = option_data;
2329      }      }
# Line 2245  for (i = 1; i < argc; i++) Line 2338  for (i = 1; i < argc; i++)
2338      while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;      while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2339      while (isdigit((unsigned char)(*endptr)))      while (isdigit((unsigned char)(*endptr)))
2340        n = n * 10 + (int)(*endptr++ - '0');        n = n * 10 + (int)(*endptr++ - '0');
2341        if (toupper(*endptr) == 'K')
2342          {
2343          n *= 1024;
2344          endptr++;
2345          }
2346        else if (toupper(*endptr) == 'M')
2347          {
2348          n *= 1024*1024;
2349          endptr++;
2350          }
2351      if (*endptr != 0)      if (*endptr != 0)
2352        {        {
2353        if (longop)        if (longop)
# Line 2260  for (i = 1; i < argc; i++) Line 2363  for (i = 1; i < argc; i++)
2363            option_data, op->one_char);            option_data, op->one_char);
2364        pcregrep_exit(usage(2));        pcregrep_exit(usage(2));
2365        }        }
2366      *((int *)op->dataptr) = n;      if (op->type == OP_LONGNUMBER)
2367            *((unsigned long int *)op->dataptr) = n;
2368        else
2369            *((int *)op->dataptr) = n;
2370      }      }
2371    }    }
2372    
# Line 2274  if (both_context > 0) Line 2380  if (both_context > 0)
2380    }    }
2381    
2382  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2383  However, the latter two set the only_matching flag. */  However, the latter two set only_matching. */
2384    
2385  if ((only_matching && (file_offsets || line_offsets)) ||  if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2386      (file_offsets && line_offsets))      (file_offsets && line_offsets))
2387    {    {
2388    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
# Line 2284  if ((only_matching && (file_offsets || l Line 2390  if ((only_matching && (file_offsets || l
2390    pcregrep_exit(usage(2));    pcregrep_exit(usage(2));
2391    }    }
2392    
2393  if (file_offsets || line_offsets) only_matching = TRUE;  if (file_offsets || line_offsets) only_matching = 0;
2394    
2395  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2396  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
# Line 2408  if (jfriedl_XT != 0 || jfriedl_XR != 0) Line 2514  if (jfriedl_XT != 0 || jfriedl_XR != 0)
2514    }    }
2515  #endif  #endif
2516    
2517  /* Get memory to store the pattern and hints lists. */  /* Get memory for the main buffer, and to store the pattern and hints lists. */
2518    
2519    bufsize = 3*bufthird;
2520    main_buffer = (char *)malloc(bufsize);
2521  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2522  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2523    
2524  if (pattern_list == NULL || hints_list == NULL)  if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)
2525    {    {
2526    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
2527    goto EXIT2;    goto EXIT2;
# Line 2445  if (pattern_filename != NULL) Line 2553  if (pattern_filename != NULL)
2553    int linenumber = 0;    int linenumber = 0;
2554    FILE *f;    FILE *f;
2555    char *filename;    char *filename;
2556    char buffer[MBUFTHIRD];    char buffer[PATBUFSIZE];
2557    
2558    if (strcmp(pattern_filename, "-") == 0)    if (strcmp(pattern_filename, "-") == 0)
2559      {      {
# Line 2464  if (pattern_filename != NULL) Line 2572  if (pattern_filename != NULL)
2572      filename = pattern_filename;      filename = pattern_filename;
2573      }      }
2574    
2575    while (fgets(buffer, MBUFTHIRD, f) != NULL)    while (fgets(buffer, PATBUFSIZE, f) != NULL)
2576      {      {
2577      char *s = buffer + (int)strlen(buffer);      char *s = buffer + (int)strlen(buffer);
2578      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
# Line 2478  if (pattern_filename != NULL) Line 2586  if (pattern_filename != NULL)
2586    if (f != stdin) fclose(f);    if (f != stdin) fclose(f);
2587    }    }
2588    
2589  /* Study the regular expressions, as we will be running them many times */  /* Study the regular expressions, as we will be running them many times. Unless
2590    JIT has been explicitly disabled, arrange a stack for it to use. */
2591    
2592    #ifdef SUPPORT_PCREGREP_JIT
2593    if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2594      jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
2595    #endif
2596    
2597  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
2598    {    {
2599    hints_list[j] = pcre_study(pattern_list[j], 0, &error);    hints_list[j] = pcre_study(pattern_list[j], study_options, &error);
2600    if (error != NULL)    if (error != NULL)
2601      {      {
2602      char s[16];      char s[16];
# Line 2491  for (j = 0; j < pattern_count; j++) Line 2605  for (j = 0; j < pattern_count; j++)
2605      goto EXIT2;      goto EXIT2;
2606      }      }
2607    hint_count++;    hint_count++;
2608    #ifdef SUPPORT_PCREGREP_JIT
2609      if (jit_stack != NULL && hints_list[j] != NULL)
2610        pcre_assign_jit_stack(hints_list[j], NULL, jit_stack);
2611    #endif
2612    }    }
2613    
2614  /* If --match-limit or --recursion-limit was set, put the value(s) into the  /* If --match-limit or --recursion-limit was set, put the value(s) into the
2615  pcre_extra block for each pattern. */  pcre_extra block for each pattern. */
2616    
# Line 2503  if (match_limit > 0 || match_limit_recur Line 2621  if (match_limit > 0 || match_limit_recur
2621      if (hints_list[j] == NULL)      if (hints_list[j] == NULL)
2622        {        {
2623        hints_list[j] = malloc(sizeof(pcre_extra));        hints_list[j] = malloc(sizeof(pcre_extra));
2624        if (hints_list[j] == NULL)        if (hints_list[j] == NULL)
2625          {          {
2626          fprintf(stderr, "pcregrep: malloc failed\n");          fprintf(stderr, "pcregrep: malloc failed\n");
2627          pcregrep_exit(2);          pcregrep_exit(2);
2628          }          }
2629        }        }
2630      if (match_limit > 0)      if (match_limit > 0)
2631        {        {
2632        hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;        hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2633        hints_list[j]->match_limit = match_limit;        hints_list[j]->match_limit = match_limit;
2634        }        }
2635      if (match_limit_recursion > 0)      if (match_limit_recursion > 0)
2636        {        {
2637        hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;        hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2638        hints_list[j]->match_limit_recursion = match_limit_recursion;        hints_list[j]->match_limit_recursion = match_limit_recursion;
2639        }        }
2640      }      }
2641    }    }
2642    
2643  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
2644    
# Line 2576  if (include_dir_pattern != NULL) Line 2694  if (include_dir_pattern != NULL)
2694    
2695  if (i >= argc)  if (i >= argc)
2696    {    {
2697    rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);    rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2698        (filenames > FN_DEFAULT)? stdin_name : NULL);
2699    goto EXIT;    goto EXIT;
2700    }    }
2701    
# Line 2596  for (; i < argc; i++) Line 2715  for (; i < argc; i++)
2715    }    }
2716    
2717  EXIT:  EXIT:
2718    #ifdef SUPPORT_PCREGREP_JIT
2719    if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2720    #endif
2721    if (main_buffer != NULL) free(main_buffer);
2722  if (pattern_list != NULL)  if (pattern_list != NULL)
2723    {    {
2724    for (i = 0; i < pattern_count; i++) free(pattern_list[i]);    for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
# Line 2603  if (pattern_list != NULL) Line 2726  if (pattern_list != NULL)
2726    }    }
2727  if (hints_list != NULL)  if (hints_list != NULL)
2728    {    {
2729    for (i = 0; i < hint_count; i++)    for (i = 0; i < hint_count; i++)
2730      {      {
2731      if (hints_list[i] != NULL) free(hints_list[i]);      if (hints_list[i] != NULL) pcre_free_study(hints_list[i]);
2732      }      }
2733    free(hints_list);    free(hints_list);
2734    }    }
2735  pcregrep_exit(rc);  pcregrep_exit(rc);

Legend:
Removed from v.564  
changed lines
  Added in v.779

  ViewVC Help
Powered by ViewVC 1.1.5