/[pcre]/code/trunk/pcre_valid_utf8.c
ViewVC logotype

Diff of /code/trunk/pcre_valid_utf8.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1260 by chpe, Tue Oct 16 15:56:18 2012 UTC revision 1261 by ph10, Wed Feb 27 16:27:01 2013 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2012 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 92  PCRE_UTF8_ERR18  Overlong 5-byte sequenc Line 92  PCRE_UTF8_ERR18  Overlong 5-byte sequenc
92  PCRE_UTF8_ERR19  Overlong 6-byte sequence (won't ever occur)  PCRE_UTF8_ERR19  Overlong 6-byte sequence (won't ever occur)
93  PCRE_UTF8_ERR20  Isolated 0x80 byte (not within UTF-8 character)  PCRE_UTF8_ERR20  Isolated 0x80 byte (not within UTF-8 character)
94  PCRE_UTF8_ERR21  Byte with the illegal value 0xfe or 0xff  PCRE_UTF8_ERR21  Byte with the illegal value 0xfe or 0xff
95  PCRE_UTF8_ERR22  Non-character  PCRE_UTF8_ERR22  Unused (was non-character)
96    
97  Arguments:  Arguments:
98    string       points to the string    string       points to the string
# Line 118  if (length < 0) Line 118  if (length < 0)
118  for (p = string; length-- > 0; p++)  for (p = string; length-- > 0; p++)
119    {    {
120    register pcre_uchar ab, c, d;    register pcre_uchar ab, c, d;
   pcre_uint32 v = 0;  
121    
122    c = *p;    c = *p;
123    if (c < 128) continue;                /* ASCII character */    if (c < 128) continue;                /* ASCII character */
# Line 187  for (p = string; length-- > 0; p++) Line 186  for (p = string; length-- > 0; p++)
186        *erroroffset = (int)(p - string) - 2;        *erroroffset = (int)(p - string) - 2;
187        return PCRE_UTF8_ERR14;        return PCRE_UTF8_ERR14;
188        }        }
     v = ((c & 0x0f) << 12) | ((d & 0x3f) << 6) | (*p & 0x3f);  
189      break;      break;
190    
191      /* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2      /* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2
# Line 215  for (p = string; length-- > 0; p++) Line 213  for (p = string; length-- > 0; p++)
213        *erroroffset = (int)(p - string) - 3;        *erroroffset = (int)(p - string) - 3;
214        return PCRE_UTF8_ERR13;        return PCRE_UTF8_ERR13;
215        }        }
     v = ((c & 0x07) << 18) | ((d & 0x3f) << 12) | ((p[-1] & 0x3f) << 6) | (*p & 0x3f);  
216      break;      break;
217    
218      /* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be      /* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be
# Line 290  for (p = string; length-- > 0; p++) Line 287  for (p = string; length-- > 0; p++)
287      *erroroffset = (int)(p - string) - ab;      *erroroffset = (int)(p - string) - ab;
288      return (ab == 4)? PCRE_UTF8_ERR11 : PCRE_UTF8_ERR12;      return (ab == 4)? PCRE_UTF8_ERR11 : PCRE_UTF8_ERR12;
289      }      }
   
   /* Reject non-characters. The pointer p is currently at the last byte of the  
   character. */  
   if ((v & 0xfffeu) == 0xfffeu || (v >= 0xfdd0 && v <= 0xfdef))  
     {  
     *erroroffset = (int)(p - string) - ab;  
     return PCRE_UTF8_ERR22;  
     }  
290    }    }
291    
292  #else  /* Not SUPPORT_UTF */  #else  /* Not SUPPORT_UTF */

Legend:
Removed from v.1260  
changed lines
  Added in v.1261

  ViewVC Help
Powered by ViewVC 1.1.5