/[pcre]/code/trunk/pcre32_valid_utf32.c
ViewVC logotype

Diff of /code/trunk/pcre32_valid_utf32.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcre_refcount.c revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC code/trunk/pcre32_valid_utf32.c revision 1055 by chpe, Tue Oct 16 15:53:30 2012 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  */  */
39    
40    
41  /* This module contains the external function pcre_refcount(), which is an  /* This module contains an internal function for validating UTF-32 character
42  auxiliary function that can be used to maintain a reference count in a compiled  strings. */
43  pattern data block. This might be helpful in applications where the block is  
44  shared by different users. */  
45    #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49    /* Generate code with 32 bit character support. */
50    #define COMPILE_PCRE32
51    
52  #include "pcre_internal.h"  #include "pcre_internal.h"
53    
54    #define MASK (0x1fffffu)
55    
56  /*************************************************  /*************************************************
57  *           Maintain reference count             *  *         Validate a UTF-32 string                *
58  *************************************************/  *************************************************/
59    
60  /* The reference count is a 16-bit field, initialized to zero. It is not  /* This function is called (optionally) at the start of compile or match, to
61  possible to transfer a non-zero count from one host to a different host that  check that a supposed UTF-32 string is actually valid. The early check means
62  has a different byte order - though I can't see why anyone in their right mind  that subsequent code can assume it is dealing with a valid string. The check
63  would ever want to do that!  can be turned off for maximum performance, but the consequences of supplying an
64    invalid string are then undefined.
65    
66    From release 8.21 more information about the details of the error are passed
67    back in the returned value:
68    
69    PCRE_UTF32_ERR0  No error
70    PCRE_UTF32_ERR1  Surrogate character
71    PCRE_UTF32_ERR2  Not allowed character
72    
73  Arguments:  Arguments:
74    argument_re   points to compiled code    string       points to the string
75    adjust        value to add to the count    length       length of string, or -1 if the string is zero-terminated
76      errp         pointer to an error position offset variable
77    
78  Returns:        the (possibly updated) count value (a non-negative number), or  Returns:       = 0    if the string is a valid UTF-32 string
79                  a negative error number                 > 0    otherwise, setting the offset of the bad character
80  */  */
81    
82  EXPORT int  int
83  pcre_refcount(pcre *argument_re, int adjust)  PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset)
84  {  {
85  real_pcre *re = (real_pcre *)argument_re;  #ifdef SUPPORT_UTF
86  if (re == NULL) return PCRE_ERROR_NULL;  register PCRE_PUCHAR p;
87  re->ref_count = (-adjust > re->ref_count)? 0 :  register pcre_uchar c;
88                  (adjust + re->ref_count > 65535)? 65535 :  
89                  re->ref_count + adjust;  if (length < 0)
90  return re->ref_count;    {
91      for (p = string; *p != 0; p++);
92      length = p - string;
93      }
94    
95    for (p = string; length-- > 0; p++)
96      {
97      c = *p & MASK;
98    
99      if ((c & 0xfffff800u) != 0xd800u)
100        {
101        /* Normal UTF-32 code point. Neither high nor low surrogate. */
102    
103        /* This is probably a 16-bit BOM. Regardless, the string is rejected. */
104        if (c == 0xfffeu)
105          {
106          *erroroffset = p - string;
107          return PCRE_UTF32_ERR2;
108          }
109        }
110      else
111        {
112        /* A surrogate */
113        *erroroffset = p - string;
114        return PCRE_UTF32_ERR1;
115        }
116      }
117    
118    #else  /* SUPPORT_UTF */
119    (void)(string);  /* Keep picky compilers happy */
120    (void)(length);
121    #endif /* SUPPORT_UTF */
122    
123    return PCRE_UTF32_ERR0;   /* This indicates success */
124  }  }
125    
126  /* End of pcre_refcount.c */  /* End of pcre32_valid_utf32.c */

Legend:
Removed from v.77  
changed lines
  Added in v.1055

  ViewVC Help
Powered by ViewVC 1.1.5