6 |
and semantics are as close as possible to those of the Perl 5 language. |
and semantics are as close as possible to those of the Perl 5 language. |
7 |
|
|
8 |
Written by Philip Hazel |
Written by Philip Hazel |
9 |
Copyright (c) 1997-2006 University of Cambridge |
Copyright (c) 1997-2007 University of Cambridge |
10 |
|
|
11 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
12 |
Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
44 |
clashes with the library. */ |
clashes with the library. */ |
45 |
|
|
46 |
|
|
47 |
|
#ifdef HAVE_CONFIG_H |
48 |
|
#include <config.h> |
49 |
|
#endif |
50 |
|
|
51 |
#include "pcre_internal.h" |
#include "pcre_internal.h" |
52 |
|
|
53 |
|
|
65 |
/* These are the breakpoints for different numbers of bytes in a UTF-8 |
/* These are the breakpoints for different numbers of bytes in a UTF-8 |
66 |
character. */ |
character. */ |
67 |
|
|
68 |
|
#ifdef SUPPORT_UTF8 |
69 |
|
|
70 |
const int _pcre_utf8_table1[] = |
const int _pcre_utf8_table1[] = |
71 |
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; |
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; |
72 |
|
|
78 |
const int _pcre_utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; |
const int _pcre_utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; |
79 |
const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; |
const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; |
80 |
|
|
81 |
/* Table of the number of extra characters, indexed by the first character |
/* Table of the number of extra bytes, indexed by the first byte masked with |
82 |
masked with 0x3f. The highest number for a valid UTF-8 character is in fact |
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */ |
|
0x3d. */ |
|
83 |
|
|
84 |
const uschar _pcre_utf8_table4[] = { |
const uschar _pcre_utf8_table4[] = { |
85 |
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
94 |
{ "Any", PT_ANY, 0 }, |
{ "Any", PT_ANY, 0 }, |
95 |
{ "Arabic", PT_SC, ucp_Arabic }, |
{ "Arabic", PT_SC, ucp_Arabic }, |
96 |
{ "Armenian", PT_SC, ucp_Armenian }, |
{ "Armenian", PT_SC, ucp_Armenian }, |
97 |
|
{ "Balinese", PT_SC, ucp_Balinese }, |
98 |
{ "Bengali", PT_SC, ucp_Bengali }, |
{ "Bengali", PT_SC, ucp_Bengali }, |
99 |
{ "Bopomofo", PT_SC, ucp_Bopomofo }, |
{ "Bopomofo", PT_SC, ucp_Bopomofo }, |
100 |
{ "Braille", PT_SC, ucp_Braille }, |
{ "Braille", PT_SC, ucp_Braille }, |
110 |
{ "Common", PT_SC, ucp_Common }, |
{ "Common", PT_SC, ucp_Common }, |
111 |
{ "Coptic", PT_SC, ucp_Coptic }, |
{ "Coptic", PT_SC, ucp_Coptic }, |
112 |
{ "Cs", PT_PC, ucp_Cs }, |
{ "Cs", PT_PC, ucp_Cs }, |
113 |
|
{ "Cuneiform", PT_SC, ucp_Cuneiform }, |
114 |
{ "Cypriot", PT_SC, ucp_Cypriot }, |
{ "Cypriot", PT_SC, ucp_Cypriot }, |
115 |
{ "Cyrillic", PT_SC, ucp_Cyrillic }, |
{ "Cyrillic", PT_SC, ucp_Cyrillic }, |
116 |
{ "Deseret", PT_SC, ucp_Deseret }, |
{ "Deseret", PT_SC, ucp_Deseret }, |
153 |
{ "N", PT_GC, ucp_N }, |
{ "N", PT_GC, ucp_N }, |
154 |
{ "Nd", PT_PC, ucp_Nd }, |
{ "Nd", PT_PC, ucp_Nd }, |
155 |
{ "New_Tai_Lue", PT_SC, ucp_New_Tai_Lue }, |
{ "New_Tai_Lue", PT_SC, ucp_New_Tai_Lue }, |
156 |
|
{ "Nko", PT_SC, ucp_Nko }, |
157 |
{ "Nl", PT_PC, ucp_Nl }, |
{ "Nl", PT_PC, ucp_Nl }, |
158 |
{ "No", PT_PC, ucp_No }, |
{ "No", PT_PC, ucp_No }, |
159 |
{ "Ogham", PT_SC, ucp_Ogham }, |
{ "Ogham", PT_SC, ucp_Ogham }, |
166 |
{ "Pd", PT_PC, ucp_Pd }, |
{ "Pd", PT_PC, ucp_Pd }, |
167 |
{ "Pe", PT_PC, ucp_Pe }, |
{ "Pe", PT_PC, ucp_Pe }, |
168 |
{ "Pf", PT_PC, ucp_Pf }, |
{ "Pf", PT_PC, ucp_Pf }, |
169 |
|
{ "Phags_Pa", PT_SC, ucp_Phags_Pa }, |
170 |
|
{ "Phoenician", PT_SC, ucp_Phoenician }, |
171 |
{ "Pi", PT_PC, ucp_Pi }, |
{ "Pi", PT_PC, ucp_Pi }, |
172 |
{ "Po", PT_PC, ucp_Po }, |
{ "Po", PT_PC, ucp_Po }, |
173 |
{ "Ps", PT_PC, ucp_Ps }, |
{ "Ps", PT_PC, ucp_Ps }, |
200 |
|
|
201 |
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table); |
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table); |
202 |
|
|
203 |
|
#endif /* SUPPORT_UTF8 */ |
204 |
|
|
205 |
/* End of pcre_tables.c */ |
/* End of pcre_tables.c */ |