/[pcre]/code/trunk/pcre_tables.c
ViewVC logotype

Diff of /code/trunk/pcre_tables.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 117 by ph10, Fri Mar 9 15:59:06 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 39  POSSIBILITY OF SUCH DAMAGE. Line 39  POSSIBILITY OF SUCH DAMAGE.
39    
40    
41  /* This module contains some fixed tables that are used by more than one of the  /* This module contains some fixed tables that are used by more than one of the
42  PCRE code modules. */  PCRE code modules. The tables are also #included by the pcretest program, which
43    uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name
44    clashes with the library. */
45    
46    
47  #include "pcre_internal.h"  #include "pcre_internal.h"
48    
49    
50  /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that  /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
51  the definition is next to the definition of the opcodes in internal.h. */  the definition is next to the definition of the opcodes in pcre_internal.h. */
52    
53  const uschar _pcre_OP_lengths[] = { OP_LENGTHS };  const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
54    
# Line 59  const uschar _pcre_OP_lengths[] = { OP_L Line 61  const uschar _pcre_OP_lengths[] = { OP_L
61  /* These are the breakpoints for different numbers of bytes in a UTF-8  /* These are the breakpoints for different numbers of bytes in a UTF-8
62  character. */  character. */
63    
64    #ifdef SUPPORT_UTF8
65    
66  const int _pcre_utf8_table1[] =  const int _pcre_utf8_table1[] =
67    { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};    { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
68    
# Line 70  first byte of a character, indexed by th Line 74  first byte of a character, indexed by th
74  const int _pcre_utf8_table2[] = { 0,    0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  const int _pcre_utf8_table2[] = { 0,    0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
75  const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
76    
77  /* Table of the number of extra characters, indexed by the first character  /* Table of the number of extra bytes, indexed by the first byte masked with
78  masked with 0x3f. The highest number for a valid UTF-8 character is in fact  0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
 0x3d. */  
79    
80  const uschar _pcre_utf8_table4[] = {  const uschar _pcre_utf8_table4[] = {
81    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
# Line 80  const uschar _pcre_utf8_table4[] = { Line 83  const uschar _pcre_utf8_table4[] = {
83    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
84    3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };    3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
85    
86  /* This table translates Unicode property names into code values for the  /* This table translates Unicode property names into type and code values. It
87  ucp_findchar() function. It is used by pcretest as well as by the library  is searched by binary chop, so must be in collating sequence of name. */
 functions. */  
88    
89  const ucp_type_table _pcre_utt[] = {  const ucp_type_table _pcre_utt[] = {
90    { "C",  128 + ucp_C },    { "Any",                 PT_ANY,  0 },
91    { "Cc", ucp_Cc },    { "Arabic",              PT_SC,   ucp_Arabic },
92    { "Cf", ucp_Cf },    { "Armenian",            PT_SC,   ucp_Armenian },
93    { "Cn", ucp_Cn },    { "Balinese",            PT_SC,   ucp_Balinese },
94    { "Co", ucp_Co },    { "Bengali",             PT_SC,   ucp_Bengali },
95    { "Cs", ucp_Cs },    { "Bopomofo",            PT_SC,   ucp_Bopomofo },
96    { "L",  128 + ucp_L },    { "Braille",             PT_SC,   ucp_Braille },
97    { "Ll", ucp_Ll },    { "Buginese",            PT_SC,   ucp_Buginese },
98    { "Lm", ucp_Lm },    { "Buhid",               PT_SC,   ucp_Buhid },
99    { "Lo", ucp_Lo },    { "C",                   PT_GC,   ucp_C },
100    { "Lt", ucp_Lt },    { "Canadian_Aboriginal", PT_SC,   ucp_Canadian_Aboriginal },
101    { "Lu", ucp_Lu },    { "Cc",                  PT_PC,   ucp_Cc },
102    { "M",  128 + ucp_M },    { "Cf",                  PT_PC,   ucp_Cf },
103    { "Mc", ucp_Mc },    { "Cherokee",            PT_SC,   ucp_Cherokee },
104    { "Me", ucp_Me },    { "Cn",                  PT_PC,   ucp_Cn },
105    { "Mn", ucp_Mn },    { "Co",                  PT_PC,   ucp_Co },
106    { "N",  128 + ucp_N },    { "Common",              PT_SC,   ucp_Common },
107    { "Nd", ucp_Nd },    { "Coptic",              PT_SC,   ucp_Coptic },
108    { "Nl", ucp_Nl },    { "Cs",                  PT_PC,   ucp_Cs },
109    { "No", ucp_No },    { "Cuneiform",           PT_SC,   ucp_Cuneiform },
110    { "P",  128 + ucp_P },    { "Cypriot",             PT_SC,   ucp_Cypriot },
111    { "Pc", ucp_Pc },    { "Cyrillic",            PT_SC,   ucp_Cyrillic },
112    { "Pd", ucp_Pd },    { "Deseret",             PT_SC,   ucp_Deseret },
113    { "Pe", ucp_Pe },    { "Devanagari",          PT_SC,   ucp_Devanagari },
114    { "Pf", ucp_Pf },    { "Ethiopic",            PT_SC,   ucp_Ethiopic },
115    { "Pi", ucp_Pi },    { "Georgian",            PT_SC,   ucp_Georgian },
116    { "Po", ucp_Po },    { "Glagolitic",          PT_SC,   ucp_Glagolitic },
117    { "Ps", ucp_Ps },    { "Gothic",              PT_SC,   ucp_Gothic },
118    { "S",  128 + ucp_S },    { "Greek",               PT_SC,   ucp_Greek },
119    { "Sc", ucp_Sc },    { "Gujarati",            PT_SC,   ucp_Gujarati },
120    { "Sk", ucp_Sk },    { "Gurmukhi",            PT_SC,   ucp_Gurmukhi },
121    { "Sm", ucp_Sm },    { "Han",                 PT_SC,   ucp_Han },
122    { "So", ucp_So },    { "Hangul",              PT_SC,   ucp_Hangul },
123    { "Z",  128 + ucp_Z },    { "Hanunoo",             PT_SC,   ucp_Hanunoo },
124    { "Zl", ucp_Zl },    { "Hebrew",              PT_SC,   ucp_Hebrew },
125    { "Zp", ucp_Zp },    { "Hiragana",            PT_SC,   ucp_Hiragana },
126    { "Zs", ucp_Zs }    { "Inherited",           PT_SC,   ucp_Inherited },
127      { "Kannada",             PT_SC,   ucp_Kannada },
128      { "Katakana",            PT_SC,   ucp_Katakana },
129      { "Kharoshthi",          PT_SC,   ucp_Kharoshthi },
130      { "Khmer",               PT_SC,   ucp_Khmer },
131      { "L",                   PT_GC,   ucp_L },
132      { "L&",                  PT_LAMP, 0 },
133      { "Lao",                 PT_SC,   ucp_Lao },
134      { "Latin",               PT_SC,   ucp_Latin },
135      { "Limbu",               PT_SC,   ucp_Limbu },
136      { "Linear_B",            PT_SC,   ucp_Linear_B },
137      { "Ll",                  PT_PC,   ucp_Ll },
138      { "Lm",                  PT_PC,   ucp_Lm },
139      { "Lo",                  PT_PC,   ucp_Lo },
140      { "Lt",                  PT_PC,   ucp_Lt },
141      { "Lu",                  PT_PC,   ucp_Lu },
142      { "M",                   PT_GC,   ucp_M },
143      { "Malayalam",           PT_SC,   ucp_Malayalam },
144      { "Mc",                  PT_PC,   ucp_Mc },
145      { "Me",                  PT_PC,   ucp_Me },
146      { "Mn",                  PT_PC,   ucp_Mn },
147      { "Mongolian",           PT_SC,   ucp_Mongolian },
148      { "Myanmar",             PT_SC,   ucp_Myanmar },
149      { "N",                   PT_GC,   ucp_N },
150      { "Nd",                  PT_PC,   ucp_Nd },
151      { "New_Tai_Lue",         PT_SC,   ucp_New_Tai_Lue },
152      { "Nko",                 PT_SC,   ucp_Nko },
153      { "Nl",                  PT_PC,   ucp_Nl },
154      { "No",                  PT_PC,   ucp_No },
155      { "Ogham",               PT_SC,   ucp_Ogham },
156      { "Old_Italic",          PT_SC,   ucp_Old_Italic },
157      { "Old_Persian",         PT_SC,   ucp_Old_Persian },
158      { "Oriya",               PT_SC,   ucp_Oriya },
159      { "Osmanya",             PT_SC,   ucp_Osmanya },
160      { "P",                   PT_GC,   ucp_P },
161      { "Pc",                  PT_PC,   ucp_Pc },
162      { "Pd",                  PT_PC,   ucp_Pd },
163      { "Pe",                  PT_PC,   ucp_Pe },
164      { "Pf",                  PT_PC,   ucp_Pf },
165      { "Phags_Pa",            PT_SC,   ucp_Phags_Pa },
166      { "Phoenician",          PT_SC,   ucp_Phoenician },
167      { "Pi",                  PT_PC,   ucp_Pi },
168      { "Po",                  PT_PC,   ucp_Po },
169      { "Ps",                  PT_PC,   ucp_Ps },
170      { "Runic",               PT_SC,   ucp_Runic },
171      { "S",                   PT_GC,   ucp_S },
172      { "Sc",                  PT_PC,   ucp_Sc },
173      { "Shavian",             PT_SC,   ucp_Shavian },
174      { "Sinhala",             PT_SC,   ucp_Sinhala },
175      { "Sk",                  PT_PC,   ucp_Sk },
176      { "Sm",                  PT_PC,   ucp_Sm },
177      { "So",                  PT_PC,   ucp_So },
178      { "Syloti_Nagri",        PT_SC,   ucp_Syloti_Nagri },
179      { "Syriac",              PT_SC,   ucp_Syriac },
180      { "Tagalog",             PT_SC,   ucp_Tagalog },
181      { "Tagbanwa",            PT_SC,   ucp_Tagbanwa },
182      { "Tai_Le",              PT_SC,   ucp_Tai_Le },
183      { "Tamil",               PT_SC,   ucp_Tamil },
184      { "Telugu",              PT_SC,   ucp_Telugu },
185      { "Thaana",              PT_SC,   ucp_Thaana },
186      { "Thai",                PT_SC,   ucp_Thai },
187      { "Tibetan",             PT_SC,   ucp_Tibetan },
188      { "Tifinagh",            PT_SC,   ucp_Tifinagh },
189      { "Ugaritic",            PT_SC,   ucp_Ugaritic },
190      { "Yi",                  PT_SC,   ucp_Yi },
191      { "Z",                   PT_GC,   ucp_Z },
192      { "Zl",                  PT_PC,   ucp_Zl },
193      { "Zp",                  PT_PC,   ucp_Zp },
194      { "Zs",                  PT_PC,   ucp_Zs }
195  };  };
196    
197  const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);  const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
198    
199    #endif  /* SUPPORT_UTF8 */
200    
201  /* End of pcre_tables.c */  /* End of pcre_tables.c */

Legend:
Removed from v.77  
changed lines
  Added in v.117

  ViewVC Help
Powered by ViewVC 1.1.5