/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 571 by ph10, Tue Nov 16 17:51:37 2010 UTC revision 586 by ph10, Wed Jan 12 17:36:47 2011 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2010 University of Cambridge             Copyright (c) 1997-2011 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 186  static BOOL utf8 = FALSE; Line 186  static BOOL utf8 = FALSE;
186    
187  /* Structure for options and list of them */  /* Structure for options and list of them */
188    
189  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
190         OP_PATLIST };         OP_OP_NUMBER, OP_PATLIST };
191    
192  typedef struct option_item {  typedef struct option_item {
193    int type;    int type;
# Line 216  used to identify them. */ Line 216  used to identify them. */
216  #define N_M_LIMIT_REC  (-14)  #define N_M_LIMIT_REC  (-14)
217    
218  static option_item optionlist[] = {  static option_item optionlist[] = {
219    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,     N_NULL,   NULL,              "",              "  terminate options" },
220    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
221    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
222    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
223    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
224    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
225    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
226    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
227    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
228    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
229    { OP_PATLIST,   'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },    { OP_PATLIST,    'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
230    { OP_NODATA,    'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
231    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,     'f',      &pattern_filename, "file=path",     "read patterns from file" },
232    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },    { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
233    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
234    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
235    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
236    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
237    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
238    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
239    { OP_NODATA,    N_LBUFFER, NULL,             "line-buffered", "use line buffering" },    { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
240    { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },    { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
241    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
242    { OP_NUMBER,    N_M_LIMIT,&match_limit,      "match-limit=number", "set PCRE match limit option" },    { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
243    { OP_NUMBER,    N_M_LIMIT_REC,&match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },    { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
244    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
245    { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },    { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
246    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
247    { OP_OP_NUMBER, 'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },    { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },
248    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
249    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
250    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_STRING,     N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
251    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_STRING,     N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
252    { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },    { OP_STRING,     N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
253    { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },    { OP_STRING,     N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
254    
255    /* These two were accidentally implemented with underscores instead of    /* These two were accidentally implemented with underscores instead of
256    hyphens in the option names. As this was not discovered for several releases,    hyphens in the option names. As this was not discovered for several releases,
257    the incorrect versions are left in the table for compatibility. However, the    the incorrect versions are left in the table for compatibility. However, the
258    --help function misses out any option that has an underscore in its name. */    --help function misses out any option that has an underscore in its name. */
259    
260    { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },    { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
261    { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },    { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
262    
# Line 296  const char utf8_table4[] = { Line 296  const char utf8_table4[] = {
296    
297    
298  /*************************************************  /*************************************************
299    *         Exit from the program                  *
300    *************************************************/
301    
302    /* If there has been a resource error, give a suitable message.
303    
304    Argument:  the return code
305    Returns:   does not return
306    */
307    
308    static void
309    pcregrep_exit(int rc)
310    {
311    if (resource_error)
312      {
313      fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
314        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
315      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
316      }
317    
318    exit(rc);
319    }
320    
321    
322    /*************************************************
323  *            OS-specific functions               *  *            OS-specific functions               *
324  *************************************************/  *************************************************/
325    
# Line 565  return sys_errlist[n]; Line 589  return sys_errlist[n];
589    
590    
591  /*************************************************  /*************************************************
 *         Exit from the program                  *  
 *************************************************/  
   
 /* If there has been a resource error, give a suitable message.  
   
 Argument:  the return code  
 Returns:   does not return  
 */  
   
 static void  
 pcregrep_exit(int rc)  
 {  
 if (resource_error)  
   {  
   fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "  
     "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);  
   fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");  
   }  
   
 exit(rc);  
 }  
   
   
   
 /*************************************************  
592  *            Read one line of input              *  *            Read one line of input              *
593  *************************************************/  *************************************************/
594    
# Line 956  if (slen > 200) Line 955  if (slen > 200)
955    {    {
956    slen = 200;    slen = 200;
957    msg = "text that starts:\n\n";    msg = "text that starts:\n\n";
958    }    }
959  for (i = 0; i < pattern_count; i++)  for (i = 0; i < pattern_count; i++)
960    {    {
961    *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,    *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
# Line 1183  while (ptr < endptr) Line 1182  while (ptr < endptr)
1182    
1183      else if (quiet) return 0;      else if (quiet) return 0;
1184    
1185      /* The --only-matching option prints just the substring that matched, or a      /* The --only-matching option prints just the substring that matched, or a
1186      captured portion of it, as long as this string is not empty, and the      captured portion of it, as long as this string is not empty, and the
1187      --file-offsets and --line-offsets options output offsets for the matching      --file-offsets and --line-offsets options output offsets for the matching
1188      substring (they both force --only-matching = 0). None of these options      substring (they both force --only-matching = 0). None of these options
# Line 1202  while (ptr < endptr) Line 1201  while (ptr < endptr)
1201            fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),            fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1202              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1203          else if (file_offsets)          else if (file_offsets)
1204            fprintf(stdout, "%d,%d\n",            fprintf(stdout, "%d,%d\n",
1205              (int)(filepos + matchptr + offsets[0] - ptr),              (int)(filepos + matchptr + offsets[0] - ptr),
1206              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1207          else if (only_matching < mrc)          else if (only_matching < mrc)
1208            {            {
1209            int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];            int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1210            if (plen > 0)            if (plen > 0)
1211              {              {
1212              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1213              FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);              FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1214              if (do_colour) fprintf(stdout, "%c[00m", 0x1b);              if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1215              fprintf(stdout, "\n");              fprintf(stdout, "\n");
1216              }              }
1217            }            }
1218          else if (printname != NULL || number) fprintf(stdout, "\n");          else if (printname != NULL || number) fprintf(stdout, "\n");
1219          matchptr += offsets[1];          matchptr += offsets[1];
# Line 1355  while (ptr < endptr) Line 1354  while (ptr < endptr)
1354  #endif  #endif
1355    
1356        /* We have to split the line(s) up if colouring, and search for further        /* We have to split the line(s) up if colouring, and search for further
1357        matches. */        matches, but not of course if the line is a non-match. */
1358    
1359        if (do_colour)        if (do_colour && !invert)
1360          {          {
1361          int last_offset = 0;          int last_offset = 0;
1362          FWRITE(ptr, 1, offsets[0], stdout);          FWRITE(ptr, 1, offsets[0], stdout);
# Line 1791  for (op = optionlist; op->one_char != 0; Line 1790  for (op = optionlist; op->one_char != 0;
1790    {    {
1791    int n;    int n;
1792    char s[4];    char s[4];
1793    
1794    /* Two options were accidentally implemented and documented with underscores    /* Two options were accidentally implemented and documented with underscores
1795    instead of hyphens in their names, something that was not noticed for quite a    instead of hyphens in their names, something that was not noticed for quite a
1796    few releases. When fixing this, I left the underscored versions in the list    few releases. When fixing this, I left the underscored versions in the list
1797    in case people were using them. However, we don't want to display them in the    in case people were using them. However, we don't want to display them in the
1798    help data. There are no other options that contain underscores, and we do not    help data. There are no other options that contain underscores, and we do not
1799    expect ever to implement such options. Therefore, just omit any option that    expect ever to implement such options. Therefore, just omit any option that
1800    contains an underscore. */    contains an underscore. */
1801    
1802    if (strchr(op->long_name, '_') != NULL) continue;    if (strchr(op->long_name, '_') != NULL) continue;
1803    
1804    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1805    n = 31 - printf("  %s --%s", s, op->long_name);    n = 31 - printf("  %s --%s", s, op->long_name);
1806    if (n < 1) n = 1;    if (n < 1) n = 1;
# Line 2181  for (i = 1; i < argc; i++) Line 2180  for (i = 1; i < argc; i++)
2180      while (*s != 0)      while (*s != 0)
2181        {        {
2182        for (op = optionlist; op->one_char != 0; op++)        for (op = optionlist; op->one_char != 0; op++)
2183          {          {
2184          if (*s == op->one_char) break;          if (*s == op->one_char) break;
2185          }          }
2186        if (op->one_char == 0)        if (op->one_char == 0)
2187          {          {
# Line 2190  for (i = 1; i < argc; i++) Line 2189  for (i = 1; i < argc; i++)
2189            *s, argv[i]);            *s, argv[i]);
2190          pcregrep_exit(usage(2));          pcregrep_exit(usage(2));
2191          }          }
2192    
2193        /* Check for a single-character option that has data: OP_OP_NUMBER        /* Check for a single-character option that has data: OP_OP_NUMBER
2194        is used for one that either has a numerical number or defaults, i.e. the        is used for one that either has a numerical number or defaults, i.e. the
2195        data is optional. If a digit follows, there is data; if not, carry on        data is optional. If a digit follows, there is data; if not, carry on
2196        with other single-character options in the same string. */        with other single-character options in the same string. */
2197    
2198        option_data = s+1;        option_data = s+1;
2199        if (op->type == OP_OP_NUMBER)        if (op->type == OP_OP_NUMBER)
2200          {          {
2201          if (isdigit((unsigned char)s[1])) break;          if (isdigit((unsigned char)s[1])) break;
2202          }          }
2203        else   /* Check for end or a dataless option */        else   /* Check for end or a dataless option */
2204          {          {
2205          if (op->type != OP_NODATA || s[1] == 0) break;          if (op->type != OP_NODATA || s[1] == 0) break;
2206          }          }
2207    
2208        /* Handle a single-character option with no data, then loop for the        /* Handle a single-character option with no data, then loop for the
2209        next character in the string. */        next character in the string. */
2210    
2211        pcre_options = handle_option(*s++, pcre_options);        pcre_options = handle_option(*s++, pcre_options);
# Line 2225  for (i = 1; i < argc; i++) Line 2224  for (i = 1; i < argc; i++)
2224    
2225    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2226    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
2227    separate item. At the moment, the only such options are "colo(u)r",    separate item. At the moment, the only such options are "colo(u)r",
2228    "only-matching", and Jeffrey Friedl's special -S debugging option. */    "only-matching", and Jeffrey Friedl's special -S debugging option. */
2229    
2230    if (*option_data == 0 &&    if (*option_data == 0 &&
# Line 2236  for (i = 1; i < argc; i++) Line 2235  for (i = 1; i < argc; i++)
2235        case N_COLOUR:        case N_COLOUR:
2236        colour_option = (char *)"auto";        colour_option = (char *)"auto";
2237        break;        break;
2238    
2239        case 'o':        case 'o':
2240        only_matching = 0;        only_matching = 0;
2241        break;        break;
2242    
2243  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
2244        case 'S':        case 'S':
2245        S_arg = 0;        S_arg = 0;
# Line 2278  for (i = 1; i < argc; i++) Line 2277  for (i = 1; i < argc; i++)
2277    
2278    /* Otherwise, deal with single string or numeric data values. */    /* Otherwise, deal with single string or numeric data values. */
2279    
2280    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)    else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2281               op->type != OP_OP_NUMBER)
2282      {      {
2283      *((char **)op->dataptr) = option_data;      *((char **)op->dataptr) = option_data;
2284      }      }
# Line 2308  for (i = 1; i < argc; i++) Line 2308  for (i = 1; i < argc; i++)
2308            option_data, op->one_char);            option_data, op->one_char);
2309        pcregrep_exit(usage(2));        pcregrep_exit(usage(2));
2310        }        }
2311      *((int *)op->dataptr) = n;      if (op->type == OP_LONGNUMBER)
2312            *((unsigned long int *)op->dataptr) = n;
2313        else
2314            *((int *)op->dataptr) = n;
2315      }      }
2316    }    }
2317    
# Line 2540  for (j = 0; j < pattern_count; j++) Line 2543  for (j = 0; j < pattern_count; j++)
2543      }      }
2544    hint_count++;    hint_count++;
2545    }    }
2546    
2547  /* If --match-limit or --recursion-limit was set, put the value(s) into the  /* If --match-limit or --recursion-limit was set, put the value(s) into the
2548  pcre_extra block for each pattern. */  pcre_extra block for each pattern. */
2549    
# Line 2551  if (match_limit > 0 || match_limit_recur Line 2554  if (match_limit > 0 || match_limit_recur
2554      if (hints_list[j] == NULL)      if (hints_list[j] == NULL)
2555        {        {
2556        hints_list[j] = malloc(sizeof(pcre_extra));        hints_list[j] = malloc(sizeof(pcre_extra));
2557        if (hints_list[j] == NULL)        if (hints_list[j] == NULL)
2558          {          {
2559          fprintf(stderr, "pcregrep: malloc failed\n");          fprintf(stderr, "pcregrep: malloc failed\n");
2560          pcregrep_exit(2);          pcregrep_exit(2);
2561          }          }
2562        }        }
2563      if (match_limit > 0)      if (match_limit > 0)
2564        {        {
2565        hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;        hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2566        hints_list[j]->match_limit = match_limit;        hints_list[j]->match_limit = match_limit;
2567        }        }
2568      if (match_limit_recursion > 0)      if (match_limit_recursion > 0)
2569        {        {
2570        hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;        hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2571        hints_list[j]->match_limit_recursion = match_limit_recursion;        hints_list[j]->match_limit_recursion = match_limit_recursion;
2572        }        }
2573      }      }
2574    }    }
2575    
2576  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
2577    
# Line 2651  if (pattern_list != NULL) Line 2654  if (pattern_list != NULL)
2654    }    }
2655  if (hints_list != NULL)  if (hints_list != NULL)
2656    {    {
2657    for (i = 0; i < hint_count; i++)    for (i = 0; i < hint_count; i++)
2658      {      {
2659      if (hints_list[i] != NULL) free(hints_list[i]);      if (hints_list[i] != NULL) free(hints_list[i]);
2660      }      }
2661    free(hints_list);    free(hints_list);
2662    }    }
2663  pcregrep_exit(rc);  pcregrep_exit(rc);

Legend:
Removed from v.571  
changed lines
  Added in v.586

  ViewVC Help
Powered by ViewVC 1.1.5