/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 841 by zherczeg, Sat Dec 31 07:04:43 2011 UTC revision 842 by ph10, Sat Dec 31 15:19:04 2011 UTC
# Line 1126  for (j = i; j > 0; j--) Line 1126  for (j = i; j > 0; j--)
1126  *utf8bytes = utf8_table2[i] | cvalue;  *utf8bytes = utf8_table2[i] | cvalue;
1127  return i + 1;  return i + 1;
1128  }  }
1129  #endif /* NOUTF || SUPPORT_PCRE16 */  #endif
   
1130    
1131    
1132  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
# Line 1145  Note that this function does not object Line 1144  Note that this function does not object
1144  deliberate; it makes it possible to construct UTF-16 strings that are invalid,  deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1145  for the purpose of testing that they are correctly faulted.  for the purpose of testing that they are correctly faulted.
1146    
1147  Patterns to be converted are either plain ASCII or UTF-8; data lines are always  Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1148  in UTF-8 so that values greater than 255 can be handled.  in UTF-8 so that values greater than 255 can be handled.
1149    
1150  Arguments:  Arguments:
# Line 1157  Arguments: Line 1156  Arguments:
1156  Returns:     number of 16-bit data items used (excluding trailing zero)  Returns:     number of 16-bit data items used (excluding trailing zero)
1157               OR -1 if a UTF-8 string is malformed               OR -1 if a UTF-8 string is malformed
1158               OR -2 if a value > 0x10ffff is encountered               OR -2 if a value > 0x10ffff is encountered
1159               OR -3 if a value > 0xffff is encountered when not in UTF mode               OR -3 if a value > 0xffff is encountered when not in UTF mode
1160  */  */
1161    
1162  static int  static int
# Line 2336  while (argc > 1 && argv[op][0] == '-') Line 2335  while (argc > 1 && argv[op][0] == '-')
2335          goto EXIT;          goto EXIT;
2336          }          }
2337        if (strcmp(argv[op + 1], "newline") == 0)        if (strcmp(argv[op + 1], "newline") == 0)
2338          {          {
2339          (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);          (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2340          /* Note that these values are always the ASCII values, even          /* Note that these values are always the ASCII values, even
2341          in EBCDIC environments. CR is 13 and NL is 10. */          in EBCDIC environments. CR is 13 and NL is 10. */
# Line 2345  while (argc > 1 && argv[op][0] == '-') Line 2344  while (argc > 1 && argv[op][0] == '-')
2344            (rc == -2)? "ANYCRLF" :            (rc == -2)? "ANYCRLF" :
2345            (rc == -1)? "ANY" : "???");            (rc == -1)? "ANY" : "???");
2346          goto EXIT;          goto EXIT;
2347          }          }
2348        printf("Unknown -C option: %s\n", argv[op + 1]);        printf("Unknown -C option: %s\n", argv[op + 1]);
2349        goto EXIT;        goto EXIT;
2350        }        }
# Line 2869  while (!done) Line 2868  while (!done)
2868          fprintf(outfile, "**Failed: character value greater than 0x10ffff "          fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2869            "cannot be converted to UTF-16\n");            "cannot be converted to UTF-16\n");
2870          goto SKIP_DATA;          goto SKIP_DATA;
2871    
2872          case -3: /* "Impossible error" when to16 is called arg1 FALSE */          case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2873          fprintf(outfile, "**Failed: character value greater than 0xffff "          fprintf(outfile, "**Failed: character value greater than 0xffff "
2874            "cannot be converted to 16-bit in non-UTF mode\n");            "cannot be converted to 16-bit in non-UTF mode\n");
2875          goto SKIP_DATA;          goto SKIP_DATA;
2876    
2877          default:          default:
2878          break;          break;
# Line 3386  while (!done) Line 3385  while (!done)
3385        {        {
3386        int i = 0;        int i = 0;
3387        int n = 0;        int n = 0;
3388    
3389        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3390        In non-UTF mode, allow the value of the byte to fall through to later,        In non-UTF mode, allow the value of the byte to fall through to later,
3391        where values greater than 127 are turned into UTF-8 when running in        where values greater than 127 are turned into UTF-8 when running in
3392        16-bit mode. */        16-bit mode. */
3393    
3394        if (c != '\\')        if (c != '\\')
3395          {          {
3396          if (use_utf)          if (use_utf)
3397            {            {
3398            *q++ = c;            *q++ = c;
3399            continue;            continue;
3400            }            }
3401          }          }
3402    
3403        /* Handle backslash escapes */        /* Handle backslash escapes */
3404    
3405        else switch ((c = *p++))        else switch ((c = *p++))
3406          {          {
3407          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 3442  while (!done) Line 3441  while (!done)
3441            /* Not correct form for \x{...}; fall through */            /* Not correct form for \x{...}; fall through */
3442            }            }
3443    
3444          /* \x without {} always defines just one byte in 8-bit mode. This          /* \x without {} always defines just one byte in 8-bit mode. This
3445          allows UTF-8 characters to be constructed byte by byte, and also allows          allows UTF-8 characters to be constructed byte by byte, and also allows
3446          invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.          invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3447          Otherwise, pass it down to later code so that it can be turned into          Otherwise, pass it down to later code so that it can be turned into
3448          UTF-8 when running in 16-bit mode. */          UTF-8 when running in 16-bit mode. */
3449    
3450          c = 0;          c = 0;
# Line 3455  while (!done) Line 3454  while (!done)
3454            p++;            p++;
3455            }            }
3456          if (use_utf)          if (use_utf)
3457            {            {
3458            *q++ = c;            *q++ = c;
3459            continue;            continue;
3460            }            }
3461          break;          break;
3462    
3463          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 3663  while (!done) Line 3662  while (!done)
3662          continue;          continue;
3663          }          }
3664    
3665        /* We now have a character value in c that may be greater than 255. In        /* We now have a character value in c that may be greater than 255. In
3666        16-bit mode, we always convert characters to UTF-8 so that values greater        16-bit mode, we always convert characters to UTF-8 so that values greater
3667        than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we        than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3668        convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF        convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3669        mode must have come from \x{...} or octal constructs because values from        mode must have come from \x{...} or octal constructs because values from
3670        \x.. get this far only in non-UTF mode. */        \x.. get this far only in non-UTF mode. */
3671    
3672    #if !defined NOUTF || defined SUPPORT_PCRE16
3673        if (use_pcre16 || use_utf)        if (use_pcre16 || use_utf)
3674          {          {
3675          pcre_uint8 buff8[8];          pcre_uint8 buff8[8];
# Line 3678  while (!done) Line 3678  while (!done)
3678          for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];          for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3679          }          }
3680        else        else
3681    #endif
3682          {          {
3683          if (c > 255)          if (c > 255)
3684            {            {
# Line 3689  while (!done) Line 3690  while (!done)
3690          *q++ = c;          *q++ = c;
3691          }          }
3692        }        }
3693    
3694      /* Reached end of subject string */      /* Reached end of subject string */
3695    
3696      *q = 0;      *q = 0;
3697      len = (int)(q - dbuffer);      len = (int)(q - dbuffer);
3698    
# Line 3793  while (!done) Line 3794  while (!done)
3794          case -3:          case -3:
3795          fprintf(outfile, "**Failed: character value greater than 0xffff "          fprintf(outfile, "**Failed: character value greater than 0xffff "
3796            "cannot be converted to 16-bit in non-UTF mode\n");            "cannot be converted to 16-bit in non-UTF mode\n");
3797          goto NEXT_DATA;          goto NEXT_DATA;
3798    
3799          default:          default:
3800          break;          break;

Legend:
Removed from v.841  
changed lines
  Added in v.842

  ViewVC Help
Powered by ViewVC 1.1.5