/[pcre]/code/trunk/pcre_ucp_findchar.c
ViewVC logotype

Contents of /code/trunk/pcre_ucp_findchar.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 85 - (show annotations)
Sat Feb 24 21:41:13 2007 UTC (12 years, 5 months ago) by nigel
File MIME type: text/plain
File size: 4598 byte(s)
Load pcre-6.4 into code/trunk.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2005 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module compiles code for supporting the use of Unicode character
42 properties. We use the (embryonic at the time of writing) UCP library, by
43 including some of its files, copies of which have been put in the PCRE
44 distribution. The actual search function is reproduced here, with its name
45 changed. */
46
47
48 #include "pcre_internal.h"
49
50 #include "ucp.h" /* Category definitions */
51 #include "ucpinternal.h" /* Internal table details */
52 #include "ucptable.c" /* The table itself */
53
54
55
56 /*************************************************
57 * Search table and return data *
58 *************************************************/
59
60 /* Two values are returned: the category is ucp_C, ucp_L, etc. The detailed
61 character type is ucp_Lu, ucp_Nd, etc.
62
63 Arguments:
64 c the character value
65 type_ptr the detailed character type is returned here
66 case_ptr for letters, the opposite case is returned here, if there
67 is one, else zero
68
69 Returns: the character type category or -1 if not found
70 */
71
72 PCRE_EXPORT int
73 _pcre_ucp_findchar(const int c, int *type_ptr, int *case_ptr)
74 {
75 cnode *node = ucp_table;
76 register int cc = c;
77 int case_offset;
78
79 for (;;)
80 {
81 register int d = node->f1 | ((node->f0 & f0_chhmask) << 16);
82 if (cc == d) break;
83 if (cc < d)
84 {
85 if ((node->f0 & f0_leftexists) == 0) return -1;
86 node ++;
87 }
88 else
89 {
90 register int roffset = (node->f2 & f2_rightmask) >> f2_rightshift;
91 if (roffset == 0) return -1;
92 node += 1 << (roffset - 1);
93 }
94 }
95
96 switch ((*type_ptr = ((node->f0 & f0_typemask) >> f0_typeshift)))
97 {
98 case ucp_Cc:
99 case ucp_Cf:
100 case ucp_Cn:
101 case ucp_Co:
102 case ucp_Cs:
103 return ucp_C;
104 break;
105
106 case ucp_Ll:
107 case ucp_Lu:
108 case_offset = node->f2 & f2_casemask;
109 if ((case_offset & 0x0100) != 0) case_offset |= 0xfffff000;
110 *case_ptr = (case_offset == 0)? 0 : cc + case_offset;
111 return ucp_L;
112
113 case ucp_Lm:
114 case ucp_Lo:
115 case ucp_Lt:
116 *case_ptr = 0;
117 return ucp_L;
118 break;
119
120 case ucp_Mc:
121 case ucp_Me:
122 case ucp_Mn:
123 return ucp_M;
124 break;
125
126 case ucp_Nd:
127 case ucp_Nl:
128 case ucp_No:
129 return ucp_N;
130 break;
131
132 case ucp_Pc:
133 case ucp_Pd:
134 case ucp_Pe:
135 case ucp_Pf:
136 case ucp_Pi:
137 case ucp_Ps:
138 case ucp_Po:
139 return ucp_P;
140 break;
141
142 case ucp_Sc:
143 case ucp_Sk:
144 case ucp_Sm:
145 case ucp_So:
146 return ucp_S;
147 break;
148
149 case ucp_Zl:
150 case ucp_Zp:
151 case ucp_Zs:
152 return ucp_Z;
153 break;
154
155 default: /* "Should never happen" */
156 return -1;
157 break;
158 }
159 }
160
161 /* End of pcre_ucp_findchar.c */

  ViewVC Help
Powered by ViewVC 1.1.5