/[pcre]/code/trunk/pcre16_utf16_utils.c
ViewVC logotype

Diff of /code/trunk/pcre16_utf16_utils.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/branches/pcre16/pcre16_utf16_utils.c revision 764 by zherczeg, Wed Nov 23 17:23:20 2011 UTC code/trunk/pcre16_utf16_utils.c revision 860 by zherczeg, Mon Jan 9 20:12:58 2012 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2009 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 46  strings to host byte order. */ Line 46  strings to host byte order. */
46  #include "config.h"  #include "config.h"
47  #endif  #endif
48    
49    /* Generate code with 16 bit character support. */
50    #define COMPILE_PCRE16
51    
52  #include "pcre_internal.h"  #include "pcre_internal.h"
53    
54    /*************************************************
55    *  Convert any UTF-16 string to host byte order  *
56    *************************************************/
57    
58    /* This function takes an UTF-16 string and converts
59    it to host byte order. The length can be explicitly set,
60    or automatically detected for zero terminated strings.
61    BOMs can be kept or discarded during the conversion.
62    Conversion can be done in place (output == input).
63    
64    Arguments:
65      output     the output buffer, its size must be greater
66                 or equal than the input string
67      input      any UTF-16 string
68      length     the number of 16-bit units in the input string
69                 can be less than zero for zero terminated strings
70      host_byte_order
71                 A non-zero value means the input is in host byte
72                 order, which can be dynamically changed by BOMs later.
73                 Initially it contains the starting byte order and returns
74                 with the last byte order so it can be used for stream
75                 processing. It can be NULL, which set the host byte
76                 order mode by default.
77      keep_boms  for a non-zero value, the BOM (0xfeff) characters
78                 are copied as well
79    
80    Returns:     the number of 16-bit units placed into the output buffer,
81                 including the zero-terminator
82    */
83    
84  int  int
85  pcre16_utf16_to_host_byte_order(PCRE_SCHAR16 *output, PCRE_SPTR16 input, int length, int keep_boms)  pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *output, PCRE_SPTR16 input,
86      int length, int *host_byte_order, int keep_boms)
87  {  {
88  #ifdef SUPPORT_UTF16  #ifdef SUPPORT_UTF
89  /* This function converts any UTF-16 string to host byte order and optionally removes  /* This function converts any UTF-16 string to host byte order and optionally
90  any Byte Order Marks (BOMS). Returns with the remainig length. */  removes any Byte Order Marks (BOMS). Returns with the remainig length. */
91  BOOL same_bo = TRUE;  int host_bo = host_byte_order != NULL ? *host_byte_order : 1;
92  PCRE_SPTR16 end = input + length;  pcre_uchar *optr = (pcre_uchar *)output;
93    const pcre_uchar *iptr = (const pcre_uchar *)input;
94    const pcre_uchar *end;
95  /* The c variable must be unsigned. */  /* The c variable must be unsigned. */
96  register uschar c;  register pcre_uchar c;
97    
98    if (length < 0)
99      length = STRLEN_UC(iptr) + 1;
100    end = iptr + length;
101    
102  while (input < end)  while (iptr < end)
103    {    {
104    c = *input++;    c = *iptr++;
105    if (c == 0xfeff || c == 0xfffe)    if (c == 0xfeff || c == 0xfffe)
106      {      {
107      /* Detecting the byte order of the machine is unnecessary, it is      /* Detecting the byte order of the machine is unnecessary, it is
108      enough to know that the UTF-16 string has the same byte order or not. */      enough to know that the UTF-16 string has the same byte order or not. */
109      same_bo = c == 0xfeff;      host_bo = c == 0xfeff;
110      if (keep_boms != 0)      if (keep_boms != 0)
111        *output++ = 0xfeff;        *optr++ = 0xfeff;
112      else      else
113        length--;        length--;
114      }      }
115    else    else
116      *output++ = same_bo ? c : ((c >> 8) | (c << 8)); /* Flip bytes if needed. */      *optr++ = host_bo ? c : ((c >> 8) | (c << 8)); /* Flip bytes if needed. */
117    }    }
118    if (host_byte_order != NULL)
119      *host_byte_order = host_bo;
120    
121  #else  #else /* SUPPORT_UTF */
122  (void)(output);  /* Keep picky compilers happy */  (void)(output);  /* Keep picky compilers happy */
123  (void)(input);  (void)(input);
124  (void)(keep_boms);  (void)(keep_boms);
125  #endif  #endif /* SUPPORT_UTF */
126  return length;  return length;
127  }  }
128    

Legend:
Removed from v.764  
changed lines
  Added in v.860

  ViewVC Help
Powered by ViewVC 1.1.5