/[pcre]/code/trunk/maint/GenerateUtt.py
ViewVC logotype

Diff of /code/trunk/maint/GenerateUtt.py

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 592 by ph10, Sat Apr 30 17:37:37 2011 UTC revision 1260 by ph10, Wed Feb 27 15:41:22 2013 UTC
# Line 1  Line 1 
1  #! /usr/bin/python  #! /usr/bin/python
2    
3  # Generate utt tables.  # Generate utt tables. Note: this script is written in Python 2 and is
4    # incompatible with Python 3. However, the 2to3 conversion script has been
5    # successfully tested on it.
6    
7  # The source file pcre_tables.c contains (amongst other things), a table that  # The source file pcre_tables.c contains (amongst other things), a table that
8  # is indexed by script name. In order to reduce the number of relocations when  # is indexed by script name. In order to reduce the number of relocations when
# Line 16  Line 18 
18  # Modified by PH 01-March-2010 to add new scripts for Unicode 5.2.0.  # Modified by PH 01-March-2010 to add new scripts for Unicode 5.2.0.
19  # Modified by PH 04-May-2010 to add new "X.." special categories.  # Modified by PH 04-May-2010 to add new "X.." special categories.
20  # Modified by PH 30-April-2011 to add new scripts for Unicode 6.0.0  # Modified by PH 30-April-2011 to add new scripts for Unicode 6.0.0
21    # Modified by ChPe 30-September-2012 to add this note; no other changes were
22    # necessary for Unicode 6.2.0 support.
23    # Modfied by PH 26-February-2013 to add the Xuc special category.
24    
25  script_names = ['Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Buginese', 'Buhid', 'Canadian_Aboriginal', \  script_names = ['Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Buginese', 'Buhid', 'Canadian_Aboriginal', \
26   'Cherokee', 'Common', 'Coptic', 'Cypriot', 'Cyrillic', 'Deseret', 'Devanagari', 'Ethiopic', 'Georgian', \   'Cherokee', 'Common', 'Coptic', 'Cypriot', 'Cyrillic', 'Deseret', 'Devanagari', 'Ethiopic', 'Georgian', \
# Line 34  script_names = ['Arabic', 'Armenian', 'B Line 39  script_names = ['Arabic', 'Armenian', 'B
39   'Javanese', 'Kaithi', 'Lisu', 'Meetei_Mayek', \   'Javanese', 'Kaithi', 'Lisu', 'Meetei_Mayek', \
40   'Old_South_Arabian', 'Old_Turkic', 'Samaritan', 'Tai_Tham', 'Tai_Viet', \   'Old_South_Arabian', 'Old_Turkic', 'Samaritan', 'Tai_Tham', 'Tai_Viet', \
41   # New for Unicode 6.0.0   # New for Unicode 6.0.0
42   'Batak', 'Brahmi', 'Mandaic'   'Batak', 'Brahmi', 'Mandaic', \
43    # New for Unicode 6.1.0
44     'Chakma', 'Meroitic_Cursive', 'Meroitic_Hieroglyphs', 'Miao', 'Sharada', 'Sora_Sompeng', 'Takri'
45   ]   ]
46    
47  category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu',  category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu',
# Line 56  utt_table.append(('L&',  'PT_LAMP')) Line 63  utt_table.append(('L&',  'PT_LAMP'))
63  utt_table.append(('Xan', 'PT_ALNUM'))  utt_table.append(('Xan', 'PT_ALNUM'))
64  utt_table.append(('Xps', 'PT_PXSPACE'))  utt_table.append(('Xps', 'PT_PXSPACE'))
65  utt_table.append(('Xsp', 'PT_SPACE'))  utt_table.append(('Xsp', 'PT_SPACE'))
66    utt_table.append(('Xuc', 'PT_UCNC'))
67  utt_table.append(('Xwd', 'PT_WORD'))  utt_table.append(('Xwd', 'PT_WORD'))
68    
69  # Sort the table.  # Sort the table.
# Line 79  for utt in utt_table: Line 87  for utt in utt_table:
87  # Print the actual table, using the string names  # Print the actual table, using the string names
88    
89  print ''  print ''
90  print 'const char _pcre_utt_names[] = ';  print 'const char PRIV(utt_names)[] =';
91  last = ''  last = ''
92  for utt in utt_table:  for utt in utt_table:
93          if utt == utt_table[-1]:          if utt == utt_table[-1]:
94                  last = ';'                  last = ';'
95          print '  STRING_%s0%s' % (utt[0].replace('&', '_AMPERSAND'), last)          print '  STRING_%s0%s' % (utt[0].replace('&', '_AMPERSAND'), last)
96  # This was how it was done before the EBCDIC-compatible modification.  # This was how it was done before the EBCDIC-compatible modification.
97  #        print '  "%s\\0"%s' % (utt[0], last)  #        print '  "%s\\0"%s' % (utt[0], last)
98    
99  print '\nconst ucp_type_table _pcre_utt[] = { '  print '\nconst ucp_type_table PRIV(utt)[] = {'
100  offset = 0  offset = 0
101  last = ','  last = ','
102  for utt in utt_table:  for utt in utt_table:
103          if utt[1] in ('PT_ANY', 'PT_LAMP', 'PT_ALNUM', 'PT_PXSPACE',          if utt[1] in ('PT_ANY', 'PT_LAMP', 'PT_ALNUM', 'PT_PXSPACE',
104            'PT_SPACE', 'PT_WORD'):            'PT_SPACE', 'PT_UCNC', 'PT_WORD'):
105                  value = '0'                  value = '0'
106          else:          else:
107                  value = 'ucp_' + utt[0]                  value = 'ucp_' + utt[0]
108          if utt == utt_table[-1]:          if utt == utt_table[-1]:
109                  last = ''                  last = ''
110          print '  { %3d, %s, %s }%s ' % (offset, utt[1], value, last)          print '  { %3d, %s, %s }%s' % (offset, utt[1], value, last)
111          offset += len(utt[0]) + 1          offset += len(utt[0]) + 1
112  print '};'  print '};'

Legend:
Removed from v.592  
changed lines
  Added in v.1260

  ViewVC Help
Powered by ViewVC 1.1.5