/[pcre]/code/trunk/maint/GenerateUtt.py
ViewVC logotype

Diff of /code/trunk/maint/GenerateUtt.py

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 490 by ph10, Tue Mar 17 21:16:01 2009 UTC revision 491 by ph10, Mon Mar 1 17:45:08 2010 UTC
# Line 4  Line 4 
4    
5  # The source file pcre_tables.c contains (amongst other things), a table that  # The source file pcre_tables.c contains (amongst other things), a table that
6  # is indexed by script name. In order to reduce the number of relocations when  # is indexed by script name. In order to reduce the number of relocations when
7  # loading the library, the names are held as a single large string, with  # loading the library, the names are held as a single large string, with
8  # offsets in the table. This is tedious to maintain by hand. Therefore, this  # offsets in the table. This is tedious to maintain by hand. Therefore, this
9  # script is used to generate the table. The output is sent to stdout.  # script is used to generate the table. The output is sent to stdout.
10    
11  # Modified by PH 17-March-2009 to generate the more verbose form that works  # Modified by PH 17-March-2009 to generate the more verbose form that works
12  # for UTF-support in EBCDIC as well as ASCII environments.  # for UTF-support in EBCDIC as well as ASCII environments.
13    # Modified by PH 01-March-2010 to add new scripts from Unicode 5.2.0.
14    
15  script_names = ['Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Buginese', 'Buhid', 'Canadian_Aboriginal', \  script_names = ['Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Buginese', 'Buhid', 'Canadian_Aboriginal', \
16   'Cherokee', 'Common', 'Coptic', 'Cypriot', 'Cyrillic', 'Deseret', 'Devanagari', 'Ethiopic', 'Georgian', \   'Cherokee', 'Common', 'Coptic', 'Cypriot', 'Cyrillic', 'Deseret', 'Devanagari', 'Ethiopic', 'Georgian', \
# Line 21  script_names = ['Arabic', 'Armenian', 'B Line 22  script_names = ['Arabic', 'Armenian', 'B
22   # New for Unicode 5.0   # New for Unicode 5.0
23   'Balinese', 'Cuneiform', 'Nko', 'Phags_Pa', 'Phoenician', \   'Balinese', 'Cuneiform', 'Nko', 'Phags_Pa', 'Phoenician', \
24   # New for Unicode 5.1   # New for Unicode 5.1
25   'Carian', 'Cham', 'Kayah_Li', 'Lepcha', 'Lycian', 'Lydian', 'Ol_Chiki', 'Rejang', 'Saurashtra', 'Sundanese', 'Vai'   'Carian', 'Cham', 'Kayah_Li', 'Lepcha', 'Lycian', 'Lydian', 'Ol_Chiki', 'Rejang', 'Saurashtra', 'Sundanese', 'Vai', \
26     # New for Unicode 5.2
27     'Avestan', 'Bamum', 'Egyptian_Hieroglyphs', 'Imperial_Aramaic', \
28     'Inscriptional_Pahlavi', 'Inscriptional_Parthian', \
29     'Javanese', 'Kaithi', 'Lisu', 'Meetei_Mayek', \
30     'Old_South_Arabian', 'Old_Turkic', 'Samaritan', 'Tai_Tham', 'Tai_Viet'
31   ]   ]
32    
33  category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu',  category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu',
34    'Mc', 'Me', 'Mn', 'Nd', 'Nl', 'No', 'Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po', 'Ps',    'Mc', 'Me', 'Mn', 'Nd', 'Nl', 'No', 'Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po', 'Ps',
35    'Sc', 'Sk', 'Sm', 'So', 'Zl', 'Zp', 'Zs' ]    'Sc', 'Sk', 'Sm', 'So', 'Zl', 'Zp', 'Zs' ]
# Line 46  for utt in utt_table: Line 52  for utt in utt_table:
52          for c in utt[0]:          for c in utt[0]:
53                  if c == '_':                  if c == '_':
54                          print 'STR_UNDERSCORE',                          print 'STR_UNDERSCORE',
55                  elif c == '&':                  elif c == '&':
56                          print 'STR_AMPERSAND',                          print 'STR_AMPERSAND',
57                  else:                  else:
58                          print 'STR_%s' % c,;                          print 'STR_%s' % c,;
59          print '"\\0"'          print '"\\0"'
60    
61  # Print the actual table, using the string names  # Print the actual table, using the string names
62    
# Line 61  for utt in utt_table: Line 67  for utt in utt_table:
67          if utt == utt_table[-1]:          if utt == utt_table[-1]:
68                  last = ';'                  last = ';'
69          print '  STRING_%s0%s' % (utt[0].replace('&', '_AMPERSAND'), last)          print '  STRING_%s0%s' % (utt[0].replace('&', '_AMPERSAND'), last)
70  # This was how it was done before the EBCDIC-compatible modification.  # This was how it was done before the EBCDIC-compatible modification.
71  #        print '  "%s\\0"%s' % (utt[0], last)  #        print '  "%s\\0"%s' % (utt[0], last)
72    
73  print '\nconst ucp_type_table _pcre_utt[] = { '  print '\nconst ucp_type_table _pcre_utt[] = { '

Legend:
Removed from v.490  
changed lines
  Added in v.491

  ViewVC Help
Powered by ViewVC 1.1.5