/[pcre]/code/trunk/pcre32_valid_utf32.c
ViewVC logotype

Diff of /code/trunk/pcre32_valid_utf32.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcre_refcount.c revision 359 by ph10, Wed Jul 9 16:20:19 2008 UTC code/trunk/pcre32_valid_utf32.c revision 1088 by chpe, Tue Oct 16 15:55:41 2012 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2008 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  */  */
39    
40    
41  /* This module contains the external function pcre_refcount(), which is an  /* This module contains an internal function for validating UTF-32 character
42  auxiliary function that can be used to maintain a reference count in a compiled  strings. */
 pattern data block. This might be helpful in applications where the block is  
 shared by different users. */  
43    
44    
45  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
46  #include "config.h"  #include "config.h"
47  #endif  #endif
48    
49  #include "pcre_internal.h"  /* Generate code with 32 bit character support. */
50    #define COMPILE_PCRE32
51    
52    #include "pcre_internal.h"
53    
54  /*************************************************  /*************************************************
55  *           Maintain reference count             *  *         Validate a UTF-32 string                *
56  *************************************************/  *************************************************/
57    
58  /* The reference count is a 16-bit field, initialized to zero. It is not  /* This function is called (optionally) at the start of compile or match, to
59  possible to transfer a non-zero count from one host to a different host that  check that a supposed UTF-32 string is actually valid. The early check means
60  has a different byte order - though I can't see why anyone in their right mind  that subsequent code can assume it is dealing with a valid string. The check
61  would ever want to do that!  can be turned off for maximum performance, but the consequences of supplying an
62    invalid string are then undefined.
63    
64    More information about the details of the error are passed
65    back in the returned value:
66    
67    PCRE_UTF32_ERR0  No error
68    PCRE_UTF32_ERR1  Surrogate character
69    PCRE_UTF32_ERR2  Disallowed character 0xfffe
70    PCRE_UTF32_ERR3  Character > 0x10ffff
71    
72  Arguments:  Arguments:
73    argument_re   points to compiled code    string       points to the string
74    adjust        value to add to the count    length       length of string, or -1 if the string is zero-terminated
75      errp         pointer to an error position offset variable
76    
77  Returns:        the (possibly updated) count value (a non-negative number), or  Returns:       = 0    if the string is a valid UTF-32 string
78                  a negative error number                 > 0    otherwise, setting the offset of the bad character
79  */  */
80    
81  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  int
82  pcre_refcount(pcre *argument_re, int adjust)  PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset)
83  {  {
84  real_pcre *re = (real_pcre *)argument_re;  #ifdef SUPPORT_UTF
85  if (re == NULL) return PCRE_ERROR_NULL;  register PCRE_PUCHAR p;
86  re->ref_count = (-adjust > re->ref_count)? 0 :  register pcre_uchar c;
87                  (adjust + re->ref_count > 65535)? 65535 :  
88                  re->ref_count + adjust;  if (length < 0)
89  return re->ref_count;    {
90      for (p = string; *p != 0; p++);
91      length = p - string;
92      }
93    
94    for (p = string; length-- > 0; p++)
95      {
96      c = *p & UTF32_MASK;
97    
98      if ((c & 0xfffff800u) != 0xd800u)
99        {
100        /* Normal UTF-32 code point. Neither high nor low surrogate. */
101    
102        /* This is probably a 16-bit BOM. Regardless, the string is rejected. */
103        if (c == 0xfffeu)
104          {
105          *erroroffset = p - string;
106          return PCRE_UTF32_ERR2;
107          }
108        else if (c > 0x10ffffu)
109          {
110          *erroroffset = p - string;
111          return PCRE_UTF32_ERR3;
112          }
113        }
114      else
115        {
116        /* A surrogate */
117        *erroroffset = p - string;
118        return PCRE_UTF32_ERR1;
119        }
120      }
121    
122    #else  /* SUPPORT_UTF */
123    (void)(string);  /* Keep picky compilers happy */
124    (void)(length);
125    #endif /* SUPPORT_UTF */
126    
127    return PCRE_UTF32_ERR0;   /* This indicates success */
128  }  }
129    
130  /* End of pcre_refcount.c */  /* End of pcre32_valid_utf32.c */

Legend:
Removed from v.359  
changed lines
  Added in v.1088

  ViewVC Help
Powered by ViewVC 1.1.5