/[pcre]/code/trunk/pcre_tables.c
ViewVC logotype

Diff of /code/trunk/pcre_tables.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 975 by ph10, Sat Jun 2 11:03:06 2012 UTC revision 1011 by ph10, Sat Aug 25 11:36:15 2012 UTC
# Line 109  const int PRIV(ucp_gentype)[] = { Line 109  const int PRIV(ucp_gentype)[] = {
109    ucp_Z, ucp_Z, ucp_Z                 /* Zl, Zp, Zs */    ucp_Z, ucp_Z, ucp_Z                 /* Zl, Zp, Zs */
110  };  };
111    
112    /* This byte table encodes the rules for finding the end of an extended
113    grapheme cluster. It could be done with bits instead of bytes, but the saving
114    in memory would be small and there would be more computation at runtime.
115    
116    Every code point has a grapheme break property which is one of the ucp_gbXX
117    values defined in ucp.h. The number of such properties is ucp_gbCount. The
118    2-dimensional table is indexed by the properties of two adjacent code points.
119    The value is non-zero if a grapheme break is NOT permitted between the relevant
120    two code points. The breaking rules are as follows:
121    
122    1. Break at the start and end of text (pretty obviously).
123    
124    2. Do not break between a CR and LF: (0,1) is set; otherwise, break before and
125       after controls: (x,0), (x,1), (x,2), (0,x), (1,x), and (2,x) are not set,
126       except for (0,1).
127    
128    3. Do not break Hangul syllable sequences: (6,6), (6,7), (6,9), (6,10),
129       (7,7), (7,8), (8,8), (9,7), (9,8), and (10,8) are set. The rules for Hangul
130       sequences are:
131    
132        L may be followed by L, V, LV or LVT
133        LV or V may be followed by V or T
134        LVT or T may be followed by T
135    
136    4. Do not break before extending characters: (x,3) is set except for (0,3),
137       (1,3), and (2,3).
138    
139    The next two rules are only for extended grapheme clusters (but that's what we
140    are implementing).
141    
142    5. Do not break before SpacingMarks: (x,5) is set except for (0,5), (1,5),
143       and (2,5).
144    
145    6. Do not break after Prepend characters: (4,x) is set except for (4,0), (4,1),
146       and (4,2).
147    
148    8. Otherwise, break everywhere.
149    */
150    
151    const pcre_uint8 PRIV(ucp_gbtable[]) = {
152    /* 0  1  2  3  4  5  6  7  8  9 10 11 */
153       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     /*  0 CR */
154       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     /*  1 LF */
155       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     /*  2 Control */
156       0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,     /*  3 Extend */
157       0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,     /*  4 Prepend */
158       0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,     /*  5 SpacingMark */
159       0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0,     /*  6 L */
160       0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0,     /*  7 V */
161       0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0,     /*  8 T */
162       0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0,     /*  9 LV */
163       0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0,     /* 10 LVT */
164       0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0      /* 11 Other */
165    };
166    
167  #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
168  /* This table reverses PRIV(ucp_gentype). We can save the cost  /* This table reverses PRIV(ucp_gentype). We can save the cost
169  of a memory load. */  of a memory load. */

Legend:
Removed from v.975  
changed lines
  Added in v.1011

  ViewVC Help
Powered by ViewVC 1.1.5