/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1087 by chpe, Tue Oct 16 15:55:38 2012 UTC revision 1090 by chpe, Tue Oct 16 15:55:48 2012 UTC
# Line 831  static const unsigned char *last_callout Line 831  static const unsigned char *last_callout
831    
832  static int buffer_size = 50000;  static int buffer_size = 50000;
833  static pcre_uint8 *buffer = NULL;  static pcre_uint8 *buffer = NULL;
 static pcre_uint8 *dbuffer = NULL;  
834  static pcre_uint8 *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
835    
836  /* Another buffer is needed translation to 16/32-bit character strings. It will  /* Another buffer is needed translation to 16/32-bit character strings. It will
# Line 1666  for (;;) Line 1665  for (;;)
1665      {      {
1666      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
1667      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
     pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);  
1668      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1669    
1670      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_pbuffer == NULL)
1671        {        {
1672        fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);        fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1673        exit(1);        exit(1);
# Line 1684  for (;;) Line 1682  for (;;)
1682      here = new_buffer + (here - buffer);      here = new_buffer + (here - buffer);
1683    
1684      free(buffer);      free(buffer);
     free(dbuffer);  
1685      free(pbuffer);      free(pbuffer);
1686    
1687      buffer = new_buffer;      buffer = new_buffer;
     dbuffer = new_dbuffer;  
1688      pbuffer = new_pbuffer;      pbuffer = new_pbuffer;
1689      }      }
1690    }    }
# Line 2719  int all_use_dfa = 0; Line 2715  int all_use_dfa = 0;
2715  int verify_jit = 0;  int verify_jit = 0;
2716  int yield = 0;  int yield = 0;
2717  int stack_size;  int stack_size;
2718    pcre_uint8 *dbuffer = NULL;
2719    size_t dbuffer_size = 1u << 14;
2720    
2721  #if !defined NOPOSIX  #if !defined NOPOSIX
2722  int posix = 0;  int posix = 0;
# Line 2762  debugging. They grow automatically when Line 2760  debugging. They grow automatically when
2760  and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */  and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2761    
2762  buffer = (pcre_uint8 *)malloc(buffer_size);  buffer = (pcre_uint8 *)malloc(buffer_size);
 dbuffer = (pcre_uint8 *)malloc(buffer_size);  
2763  pbuffer = (pcre_uint8 *)malloc(buffer_size);  pbuffer = (pcre_uint8 *)malloc(buffer_size);
2764    
2765  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
# Line 4060  while (!done) Line 4057  while (!done)
4057    
4058    for (;;)    for (;;)
4059      {      {
4060      pcre_uint8 *q;  #ifdef SUPPORT_PCRE8
4061        pcre_uint8 *q8;
4062    #endif
4063    #ifdef SUPPORT_PCRE16
4064        pcre_uint16 *q16;
4065    #endif
4066    #ifdef SUPPORT_PCRE32
4067        pcre_uint32 *q32;
4068    #endif
4069      pcre_uint8 *bptr;      pcre_uint8 *bptr;
4070      int *use_offsets = offsets;      int *use_offsets = offsets;
4071      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
# Line 4132  while (!done) Line 4137  while (!done)
4137      p = buffer;      p = buffer;
4138      while (isspace(*p)) p++;      while (isspace(*p)) p++;
4139    
4140      bptr = q = dbuffer;  #ifndef NOUTF
4141        /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4142           invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4143        if (use_utf)
4144          {
4145          char *q;
4146          pcre_uint32 c;
4147          int n = 1;
4148    
4149          for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &c);
4150          if (n <= 0)
4151            {
4152            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4153            goto NEXT_DATA;
4154            }
4155          }
4156    #endif
4157    
4158        /* Allocate a buffer to hold the data line. len+1 is an upper bound on
4159           the number of pcre_uchar units that will be needed. */
4160        if (dbuffer == NULL || len >= dbuffer_size)
4161          {
4162          dbuffer_size *= 2;
4163          dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4164          if (dbuffer == NULL)
4165            {
4166            fprintf(stderr, "pcretest: malloc(%d) failed\n", dbuffer_size);
4167            exit(1);
4168            }
4169          }
4170    
4171    #ifdef SUPPORT_PCRE8
4172        q8 = (pcre_uint8 *) dbuffer;
4173    #endif
4174    #ifdef SUPPORT_PCRE16
4175        q16 = (pcre_uint16 *) dbuffer;
4176    #endif
4177    #ifdef SUPPORT_PCRE32
4178        q32 = (pcre_uint32 *) dbuffer;
4179    #endif
4180    
4181      while ((c = *p++) != 0)      while ((c = *p++) != 0)
4182        {        {
4183        int i = 0;        int i = 0;
# Line 4145  while (!done) Line 4190  while (!done)
4190    
4191        if (c != '\\')        if (c != '\\')
4192          {          {
4193          if (use_utf)  #ifndef NOUTF
4194            {          if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4195            *q++ = c;  #endif
           continue;  
           }  
4196          }          }
4197    
4198        /* Handle backslash escapes */        /* Handle backslash escapes */
# Line 4210  while (!done) Line 4253  while (!done)
4253            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4254            p++;            p++;
4255            }            }
4256          if (use_utf)  #if !defined NOUTF && defined SUPPORT_PCRE8
4257            if (use_utf && (pcre_mode == PCRE8_MODE))
4258            {            {
4259            *q++ = c;            *q8++ = c;
4260            continue;            continue;
4261            }            }
4262    #endif
4263          break;          break;
4264    
4265          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 4427  while (!done) Line 4472  while (!done)
4472        than 127       in UTF mode must have come from \x{...} or octal constructs        than 127       in UTF mode must have come from \x{...} or octal constructs
4473        because values from \x.. get this far only in non-UTF mode. */        because values from \x.. get this far only in non-UTF mode. */
4474    
4475  #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32  #ifdef SUPPORT_PCRE8
4476        if (pcre_mode != PCRE8_MODE || use_utf)        if (pcre_mode == PCRE8_MODE)
4477          {          {
4478          pcre_uint8 buff8[8];  #ifndef NOUTF
4479          int ii, utn;          if (use_utf)
4480          utn = ord2utf8(c, buff8);            {
4481          for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];            q8 += ord2utf8(c, q8);
4482              }
4483            else
4484    #endif
4485              {
4486              if (c > 0xffu)
4487                {
4488                fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4489                  "and UTF-8 mode is not enabled.\n", c);
4490                fprintf(outfile, "** Truncation will probably give the wrong "
4491                  "result.\n");
4492                }
4493    
4494              *q8++ = c;
4495              }
4496          }          }
       else  
4497  #endif  #endif
4498    #ifdef SUPPORT_PCRE16
4499          if (pcre_mode == PCRE16_MODE)
4500          {          {
4501          if (c > 255)  #ifndef NOUTF
4502            if (use_utf)
4503              {
4504              if (c > 0x10ffffu)
4505                {
4506                fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4507                  "cannot be converted to UTF-16\n");
4508                goto NEXT_DATA;
4509                }
4510              else if (c >= 0x10000u)
4511                {
4512                c-= 0x10000u;
4513                *q16++ = 0xD800 | (c >> 10);
4514                *q16++ = 0xDC00 | (c & 0x3ff);
4515                }
4516              else
4517                *q16++ = c;
4518              }
4519            else
4520    #endif
4521            {            {
4522            fprintf(outfile, "** Character \\x{%x} is greater than 255 "            if (c > 0xffffu)
4523              "and UTF-8 mode is not enabled.\n", c);              {
4524            fprintf(outfile, "** Truncation will probably give the wrong "              fprintf(outfile, "** Character value is greater than 0xffff "
4525              "result.\n");                "and UTF-16 mode is not enabled.\n", c);
4526                fprintf(outfile, "** Truncation will probably give the wrong "
4527                  "result.\n");
4528                }
4529    
4530              *q16++ = c;
4531            }            }
         *q++ = c;  
4532          }          }
4533    #endif
4534    #ifdef SUPPORT_PCRE32
4535          if (pcre_mode == PCRE32_MODE)
4536            {
4537            *q32++ = c;
4538            }
4539    #endif
4540    
4541        }        }
4542    
4543      /* Reached end of subject string */      /* Reached end of subject string */
4544    
4545      *q = 0;  #ifdef SUPPORT_PCRE8
4546      len = (int)(q - dbuffer);      if (pcre_mode == PCRE8_MODE)
4547        {
4548          *q8 = 0;
4549          len = (int)(q8 - (pcre_uint8 *)dbuffer);
4550        }
4551    #endif
4552    #ifdef SUPPORT_PCRE16
4553        if (pcre_mode == PCRE16_MODE)
4554        {
4555          *q16 = 0;
4556          len = (int)(q16 - (pcre_uint16 *)dbuffer);
4557        }
4558    #endif
4559    #ifdef SUPPORT_PCRE32
4560        if (pcre_mode == PCRE32_MODE)
4561        {
4562          *q32 = 0;
4563          len = (int)(q32 - (pcre_uint32 *)dbuffer);
4564        }
4565    #endif
4566    
4567      /* Move the data to the end of the buffer so that a read over the end of      /* Move the data to the end of the buffer so that a read over the end of
4568      the buffer will be seen by valgrind, even if it doesn't cause a crash. If      the buffer will be seen by valgrind, even if it doesn't cause a crash. If
4569      we are using the POSIX interface, we must include the terminating zero. */      we are using the POSIX interface, we must include the terminating zero. */
4570    
4571        bptr = dbuffer;
4572    
4573  #if !defined NOPOSIX  #if !defined NOPOSIX
4574      if (posix || do_posix)      if (posix || do_posix)
4575        {        {
4576        memmove(bptr + buffer_size - len - 1, bptr, len + 1);        memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
4577        bptr += buffer_size - len - 1;        bptr += dbuffer_size - len - 1;
4578        }        }
4579      else      else
4580  #endif  #endif
4581        {        {
4582        memmove(bptr + buffer_size - len, bptr, len);        bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
       bptr += buffer_size - len;  
4583        }        }
4584    
4585      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
# Line 4532  while (!done) Line 4643  while (!done)
4643    
4644      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
4645    
 #ifdef SUPPORT_PCRE16  
     if (pcre_mode == PCRE16_MODE)  
       {  
       len = to16(TRUE, bptr, REAL_PCRE_OPTIONS(re) & PCRE_UTF8, len);  
       switch(len)  
         {  
         case -1:  
         fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "  
           "converted to UTF-16\n");  
         goto NEXT_DATA;  
   
         case -2:  
         fprintf(outfile, "**Failed: character value greater than 0x10ffff "  
           "cannot be converted to UTF-16\n");  
         goto NEXT_DATA;  
   
         case -3:  
         fprintf(outfile, "**Failed: character value greater than 0xffff "  
           "cannot be converted to 16-bit in non-UTF mode\n");  
         goto NEXT_DATA;  
   
         default:  
         break;  
         }  
       bptr = (pcre_uint8 *)buffer16;  
       }  
 #endif  
   
 #ifdef SUPPORT_PCRE32  
     if (pcre_mode == PCRE32_MODE)  
       {  
       len = to32(TRUE, bptr, REAL_PCRE_OPTIONS(re) & PCRE_UTF32, len);  
       switch(len)  
         {  
         case -1:  
         fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "  
           "converted to UTF-32\n");  
         goto NEXT_DATA;  
   
         case -2:  
         fprintf(outfile, "**Failed: character value greater than 0x10ffff "  
           "cannot be converted to UTF-32\n");  
         goto NEXT_DATA;  
   
         case -3:  
         fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");  
         goto NEXT_DATA;  
   
         default:  
         break;  
         }  
       bptr = (pcre_uint8 *)buffer32;  
       }  
 #endif  
   
4646      /* Ensure that there is a JIT callback if we want to verify that JIT was      /* Ensure that there is a JIT callback if we want to verify that JIT was
4647      actually used. If jit_stack == NULL, no stack has yet been assigned. */      actually used. If jit_stack == NULL, no stack has yet been assigned. */
4648    

Legend:
Removed from v.1087  
changed lines
  Added in v.1090

  ViewVC Help
Powered by ViewVC 1.1.5