/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 93 by nigel, Sat Feb 24 21:41:42 2007 UTC revision 336 by ph10, Sat Apr 12 15:59:03 2008 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  */  */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 48  POSSIBILITY OF SUCH DAMAGE.
48  #include <locale.h>  #include <locale.h>
49  #include <errno.h>  #include <errno.h>
50    
51    #ifdef SUPPORT_LIBREADLINE
52    #include <unistd.h>
53    #include <readline/readline.h>
54    #include <readline/history.h>
55    #endif
56    
57    
58  /* A number of things vary for Windows builds. Originally, pcretest opened its  /* A number of things vary for Windows builds. Originally, pcretest opened its
59  input and output without "b"; then I was told that "b" was needed in some  input and output without "b"; then I was told that "b" was needed in some
# Line 67  input mode under Windows. */ Line 77  input mode under Windows. */
77  #endif  #endif
78    
79    
80  #define PCRE_SPY        /* For Win32 build, import data, not export */  /* We have to include pcre_internal.h because we need the internal info for
81    displaying the results of pcre_study() and we also need to know about the
82  /* We include pcre_internal.h because we need the internal info for displaying  internal macros, structures, and other internal data values; pcretest has
83  the results of pcre_study() and we also need to know about the internal  "inside information" compared to a program that strictly follows the PCRE API.
84  macros, structures, and other internal data values; pcretest has "inside  
85  information" compared to a program that strictly follows the PCRE API. */  Although pcre_internal.h does itself include pcre.h, we explicitly include it
86    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
87    appropriately for an application, not for building PCRE. */
88    
89    #include "pcre.h"
90  #include "pcre_internal.h"  #include "pcre_internal.h"
91    
92  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to the data tables that PCRE uses. So as not to have to keep
# Line 87  symbols to prevent clashes. */ Line 100  symbols to prevent clashes. */
100  #define _pcre_utf8_table4      utf8_table4  #define _pcre_utf8_table4      utf8_table4
101  #define _pcre_utt              utt  #define _pcre_utt              utt
102  #define _pcre_utt_size         utt_size  #define _pcre_utt_size         utt_size
103    #define _pcre_utt_names        utt_names
104  #define _pcre_OP_lengths       OP_lengths  #define _pcre_OP_lengths       OP_lengths
105    
106  #include "pcre_tables.c"  #include "pcre_tables.c"
# Line 114  Makefile. */ Line 128  Makefile. */
128  #include "pcreposix.h"  #include "pcreposix.h"
129  #endif  #endif
130    
131  /* It is also possible, for the benefit of the version imported into Exim, to  /* It is also possible, for the benefit of the version currently imported into
132  build pcretest without support for UTF8 (define NOUTF8), without the interface  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
133  to the DFA matcher (NODFA), and without the doublecheck of the old "info"  interface to the DFA matcher (NODFA), and without the doublecheck of the old
134  function (define NOINFOCHECK). */  "info" function (define NOINFOCHECK). In fact, we automatically cut out the
135    UTF8 support if PCRE is built without it. */
136    
137    #ifndef SUPPORT_UTF8
138    #ifndef NOUTF8
139    #define NOUTF8
140    #endif
141    #endif
142    
143    
144  /* Other parameters */  /* Other parameters */
# Line 142  static int callout_count; Line 163  static int callout_count;
163  static int callout_extra;  static int callout_extra;
164  static int callout_fail_count;  static int callout_fail_count;
165  static int callout_fail_id;  static int callout_fail_id;
166    static int debug_lengths;
167  static int first_callout;  static int first_callout;
168  static int locale_set = 0;  static int locale_set = 0;
169  static int show_malloc;  static int show_malloc;
# Line 173  optimal way of handling this, but hey, t Line 195  optimal way of handling this, but hey, t
195  Arguments:  Arguments:
196    f            the file to read    f            the file to read
197    start        where in buffer to start (this *must* be within buffer)    start        where in buffer to start (this *must* be within buffer)
198      prompt       for stdin or readline()
199    
200  Returns:       pointer to the start of new data  Returns:       pointer to the start of new data
201                 could be a copy of start, or could be moved                 could be a copy of start, or could be moved
# Line 180  Returns:       pointer to the start of n Line 203  Returns:       pointer to the start of n
203  */  */
204    
205  static uschar *  static uschar *
206  extend_inputline(FILE *f, uschar *start)  extend_inputline(FILE *f, uschar *start, const char *prompt)
207  {  {
208  uschar *here = start;  uschar *here = start;
209    
# Line 191  for (;;) Line 214  for (;;)
214    if (rlen > 1000)    if (rlen > 1000)
215      {      {
216      int dlen;      int dlen;
217      if (fgets((char *)here, rlen,  f) == NULL)  
218        return (here == start)? NULL : start;      /* If libreadline support is required, use readline() to read a line if the
219        input is a terminal. Note that readline() removes the trailing newline, so
220        we must put it back again, to be compatible with fgets(). */
221    
222    #ifdef SUPPORT_LIBREADLINE
223        if (isatty(fileno(f)))
224          {
225          size_t len;
226          char *s = readline(prompt);
227          if (s == NULL) return (here == start)? NULL : start;
228          len = strlen(s);
229          if (len > 0) add_history(s);
230          if (len > rlen - 1) len = rlen - 1;
231          memcpy(here, s, len);
232          here[len] = '\n';
233          here[len+1] = 0;
234          free(s);
235          }
236        else
237    #endif
238    
239        /* Read the next line by normal means, prompting if the file is stdin. */
240    
241          {
242          if (f == stdin) printf(prompt);
243          if (fgets((char *)here, rlen,  f) == NULL)
244            return (here == start)? NULL : start;
245          }
246    
247      dlen = (int)strlen((char *)here);      dlen = (int)strlen((char *)here);
248      if (dlen > 0 && here[dlen - 1] == '\n') return start;      if (dlen > 0 && here[dlen - 1] == '\n') return start;
249      here += dlen;      here += dlen;
# Line 649  return count; Line 700  return count;
700    
701    
702  /*************************************************  /*************************************************
703    *         Case-independent strncmp() function    *
704    *************************************************/
705    
706    /*
707    Arguments:
708      s         first string
709      t         second string
710      n         number of characters to compare
711    
712    Returns:    < 0, = 0, or > 0, according to the comparison
713    */
714    
715    static int
716    strncmpic(uschar *s, uschar *t, int n)
717    {
718    while (n--)
719      {
720      int c = tolower(*s++) - tolower(*t++);
721      if (c) return c;
722      }
723    return 0;
724    }
725    
726    
727    
728    /*************************************************
729  *         Check newline indicator                *  *         Check newline indicator                *
730  *************************************************/  *************************************************/
731    
732  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes, where
733  xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
734    no match.
735    
736  Arguments:  Arguments:
737    p           points after the leading '<'    p           points after the leading '<'
# Line 665  Returns:      appropriate PCRE_NEWLINE_x Line 743  Returns:      appropriate PCRE_NEWLINE_x
743  static int  static int
744  check_newline(uschar *p, FILE *f)  check_newline(uschar *p, FILE *f)
745  {  {
746  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
747  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
748  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
749  if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
750    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
751    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
752    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
753  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
754  return 0;  return 0;
755  }  }
# Line 682  return 0; Line 763  return 0;
763  static void  static void
764  usage(void)  usage(void)
765  {  {
766  printf("Usage:     pcretest [options] [<input> [<output>]]\n");  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
767    printf("Input and output default to stdin and stdout.\n");
768    #ifdef SUPPORT_LIBREADLINE
769    printf("If input is a terminal, readline() is used to read from it.\n");
770    #else
771    printf("This version of pcretest is not linked with readline().\n");
772    #endif
773    printf("\nOptions:\n");
774  printf("  -b       show compiled code (bytecode)\n");  printf("  -b       show compiled code (bytecode)\n");
775  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
776  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
# Line 840  while (argc > 1 && argv[op][0] == '-') Line 928  while (argc > 1 && argv[op][0] == '-')
928      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
929      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
930        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
931          (rc == -2)? "ANYCRLF" :
932        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
933        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
934        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
935                                         "all Unicode newlines");
936      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
937      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
938      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
# Line 851  while (argc > 1 && argv[op][0] == '-') Line 943  while (argc > 1 && argv[op][0] == '-')
943      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %d\n", rc);
944      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
945      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
946      exit(0);      goto EXIT;
947      }      }
948    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(argv[op], "-help") == 0 ||
949             strcmp(argv[op], "--help") == 0)             strcmp(argv[op], "--help") == 0)
# Line 877  offsets = (int *)malloc(size_offsets_max Line 969  offsets = (int *)malloc(size_offsets_max
969  if (offsets == NULL)  if (offsets == NULL)
970    {    {
971    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
972      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
973    yield = 1;    yield = 1;
974    goto EXIT;    goto EXIT;
975    }    }
# Line 945  while (!done) Line 1037  while (!done)
1037    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
1038    
1039    use_utf8 = 0;    use_utf8 = 0;
1040      debug_lengths = 1;
1041    
1042    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (extend_inputline(infile, buffer) == NULL) break;  
1043    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1044    fflush(outfile);    fflush(outfile);
1045    
# Line 1047  while (!done) Line 1139  while (!done)
1139    
1140    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1141      {      {
1142      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1143      goto SKIP_DATA;      goto SKIP_DATA;
1144      }      }
1145    
# Line 1063  while (!done) Line 1155  while (!done)
1155        pp++;        pp++;
1156        }        }
1157      if (*pp != 0) break;      if (*pp != 0) break;
1158      if (infile == stdin) printf("    > ");      if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     if ((pp = extend_inputline(infile, pp)) == NULL)  
1159        {        {
1160        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1161        done = 1;        done = 1;
# Line 1127  while (!done) Line 1218  while (!done)
1218        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1219        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1220        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1221          case 'Z': debug_lengths = 0; break;
1222        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1223        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1224    
# Line 1155  while (!done) Line 1247  while (!done)
1247    
1248        case '<':        case '<':
1249          {          {
1250          int x = check_newline(pp, outfile);          if (strncmp((char *)pp, "JS>", 3) == 0)
1251          if (x == 0) goto SKIP_DATA;            {
1252          options |= x;            options |= PCRE_JAVASCRIPT_COMPAT;
1253          while (*pp++ != '>');            pp += 3;
1254              }
1255            else
1256              {
1257              int x = check_newline(pp, outfile);
1258              if (x == 0) goto SKIP_DATA;
1259              options |= x;
1260              while (*pp++ != '>');
1261              }
1262          }          }
1263        break;        break;
1264    
# Line 1237  while (!done) Line 1337  while (!done)
1337          {          {
1338          for (;;)          for (;;)
1339            {            {
1340            if (extend_inputline(infile, buffer) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1341              {              {
1342              done = 1;              done = 1;
1343              goto CONTINUE;              goto CONTINUE;
# Line 1300  while (!done) Line 1400  while (!done)
1400      if (do_flip)      if (do_flip)
1401        {        {
1402        real_pcre *rre = (real_pcre *)re;        real_pcre *rre = (real_pcre *)re;
1403        rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));        rre->magic_number =
1404            byteflip(rre->magic_number, sizeof(rre->magic_number));
1405        rre->size = byteflip(rre->size, sizeof(rre->size));        rre->size = byteflip(rre->size, sizeof(rre->size));
1406        rre->options = byteflip(rre->options, sizeof(rre->options));        rre->options = byteflip(rre->options, sizeof(rre->options));
1407        rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));        rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1408        rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));        rre->top_bracket =
1409        rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1410        rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));        rre->top_backref =
1411        rre->name_table_offset = byteflip(rre->name_table_offset,          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1412          rre->first_byte =
1413            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1414          rre->req_byte =
1415            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1416          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1417          sizeof(rre->name_table_offset));          sizeof(rre->name_table_offset));
1418        rre->name_entry_size = byteflip(rre->name_entry_size,        rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1419          sizeof(rre->name_entry_size));          sizeof(rre->name_entry_size));
1420        rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));        rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1421            sizeof(rre->name_count));
1422    
1423        if (extra != NULL)        if (extra != NULL)
1424          {          {
# Line 1328  while (!done) Line 1435  while (!done)
1435      if (do_debug)      if (do_debug)
1436        {        {
1437        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
1438        pcre_printint(re, outfile);        pcre_printint(re, outfile, debug_lengths);
1439        }        }
1440    
1441      if (do_showinfo)      if (do_showinfo)
# Line 1337  while (!done) Line 1444  while (!done)
1444  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1445        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1446  #endif  #endif
1447        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char, okpartial, jchanged,
1448            hascrorlf;
1449        int nameentrysize, namecount;        int nameentrysize, namecount;
1450        const uschar *nametable;        const uschar *nametable;
1451    
# Line 1350  while (!done) Line 1458  while (!done)
1458        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1459        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1460        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1461          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1462          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1463          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1464    
1465  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1466        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
# Line 1391  while (!done) Line 1502  while (!done)
1502            }            }
1503          }          }
1504    
1505        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1506        to fish it out via out back door */        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1507    
1508        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
1509        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
          }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
1510    
1511        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1512          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1513            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1514            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1515            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1516            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1517            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1518            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1519              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1520              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1521            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1522            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1523            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
# Line 1419  while (!done) Line 1526  while (!done)
1526            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1527            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1528    
1529          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1530    
1531        switch (get_options & PCRE_NEWLINE_BITS)        switch (get_options & PCRE_NEWLINE_BITS)
1532          {          {
1533          case PCRE_NEWLINE_CR:          case PCRE_NEWLINE_CR:
# Line 1433  while (!done) Line 1542  while (!done)
1542          fprintf(outfile, "Forced newline sequence: CRLF\n");          fprintf(outfile, "Forced newline sequence: CRLF\n");
1543          break;          break;
1544    
1545            case PCRE_NEWLINE_ANYCRLF:
1546            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1547            break;
1548    
1549          case PCRE_NEWLINE_ANY:          case PCRE_NEWLINE_ANY:
1550          fprintf(outfile, "Forced newline sequence: ANY\n");          fprintf(outfile, "Forced newline sequence: ANY\n");
1551          break;          break;
# Line 1537  while (!done) Line 1650  while (!done)
1650        else        else
1651          {          {
1652          uschar sbuf[8];          uschar sbuf[8];
1653          sbuf[0] = (true_size >> 24)  & 255;          sbuf[0] = (uschar)((true_size >> 24) & 255);
1654          sbuf[1] = (true_size >> 16)  & 255;          sbuf[1] = (uschar)((true_size >> 16) & 255);
1655          sbuf[2] = (true_size >>  8)  & 255;          sbuf[2] = (uschar)((true_size >>  8) & 255);
1656          sbuf[3] = (true_size)  & 255;          sbuf[3] = (uschar)((true_size) & 255);
1657    
1658          sbuf[4] = (true_study_size >> 24)  & 255;          sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1659          sbuf[5] = (true_study_size >> 16)  & 255;          sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1660          sbuf[6] = (true_study_size >>  8)  & 255;          sbuf[6] = (uschar)((true_study_size >>  8) & 255);
1661          sbuf[7] = (true_study_size)  & 255;          sbuf[7] = (uschar)((true_study_size) & 255);
1662    
1663          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
1664              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1582  while (!done) Line 1695  while (!done)
1695    for (;;)    for (;;)
1696      {      {
1697      uschar *q;      uschar *q;
1698      uschar *bptr = dbuffer;      uschar *bptr;
1699      int *use_offsets = offsets;      int *use_offsets = offsets;
1700      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1701      int callout_data = 0;      int callout_data = 0;
# Line 1619  while (!done) Line 1732  while (!done)
1732      len = 0;      len = 0;
1733      for (;;)      for (;;)
1734        {        {
1735        if (infile == stdin) printf("data> ");        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
       if (extend_inputline(infile, buffer + len) == NULL)  
1736          {          {
1737          if (len > 0) break;          if (len > 0) break;
1738          done = 1;          done = 1;
# Line 1638  while (!done) Line 1750  while (!done)
1750      p = buffer;      p = buffer;
1751      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1752    
1753      q = dbuffer;      bptr = q = dbuffer;
1754      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1755        {        {
1756        int i = 0;        int i = 0;
# Line 1833  while (!done) Line 1945  while (!done)
1945            if (offsets == NULL)            if (offsets == NULL)
1946              {              {
1947              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1948                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
1949              yield = 1;              yield = 1;
1950              goto EXIT;              goto EXIT;
1951              }              }
# Line 2202  while (!done) Line 2314  while (!done)
2314          }          }
2315    
2316        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2317        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2318        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2319        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2320        offset values to achieve this. We won't be at the end of the string -  
2321        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any" or
2322          "anycrlf". If the previous match was at the end of a line terminated by
2323          CRLF, an advance of one character just passes the \r, whereas we should
2324          prefer the longer newline sequence, as does the code in pcre_exec().
2325          Fudge the offset value to achieve this.
2326    
2327          Otherwise, in the case of UTF-8 matching, the advance must be one
2328          character, not one byte. */
2329    
2330        else        else
2331          {          {
2332          if (g_notempty != 0)          if (g_notempty != 0)
2333            {            {
2334            int onechar = 1;            int onechar = 1;
2335              unsigned int obits = ((real_pcre *)re)->options;
2336            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2337            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2338                {
2339                int d;
2340                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2341                obits = (d == '\r')? PCRE_NEWLINE_CR :
2342                        (d == '\n')? PCRE_NEWLINE_LF :
2343                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2344                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2345                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2346                }
2347              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2348                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2349                  &&
2350                  start_offset < len - 1 &&
2351                  bptr[start_offset] == '\r' &&
2352                  bptr[start_offset+1] == '\n')
2353                onechar++;
2354              else if (use_utf8)
2355              {              {
2356              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2357                {                {
# Line 2249  while (!done) Line 2386  while (!done)
2386        character. */        character. */
2387    
2388        g_notempty = 0;        g_notempty = 0;
2389    
2390        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2391          {          {
2392          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;

Legend:
Removed from v.93  
changed lines
  Added in v.336

  ViewVC Help
Powered by ViewVC 1.1.5