/[pcre]/code/trunk/maint/MultiStage2.py
ViewVC logotype

Diff of /code/trunk/maint/MultiStage2.py

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 351 by ph10, Fri Jul 4 18:27:16 2008 UTC revision 352 by ph10, Mon Jul 7 15:12:56 2008 UTC
# Line 25  Line 25 
25  #  Adjusted data file names to take from the Unicode.tables directory  #  Adjusted data file names to take from the Unicode.tables directory
26  #  Adjusted global table names by prefixing _pcre_.  #  Adjusted global table names by prefixing _pcre_.
27  #  Commented out stuff relating to the casefolding table, which isn't used.  #  Commented out stuff relating to the casefolding table, which isn't used.
28    #  Corrected size calculation
29  #  #
30  # The tables generated by this script are used by macros defined in  # The tables generated by this script are used by macros defined in
31  # pcre_internal.h. They look up Unicode character properties using short  # pcre_internal.h. They look up Unicode character properties using short
# Line 189  def combine_tables(*tables): Line 190  def combine_tables(*tables):
190                  index.append(i)                  index.append(i)
191          return index, records          return index, records
192    
193  def print_records(records):  def get_record_size_struct(records):
194          print 'const ucd_record _pcre_ucd_records[] = { /* %d bytes */' % (len(records) * 4)          size = 0
195            structure = '/* When recompiling tables with a new Unicode version,\n' + \
196            'please check types in the structure definition from pcre_internal.h:\ntypedef struct {\n'
197            for i in range(len(records[0])):
198                    record_slice = map(lambda record: record[i], records)
199                    slice_type, slice_size = get_type_size(record_slice)
200                    # add padding: round up to the nearest power of slice_size
201                    size = (size + slice_size - 1) & -slice_size
202                    size += slice_size
203                    structure += '%s property_%d;\n' % (slice_type, i)
204    
205            # round up to the first item of the next structure in array
206            record_slice = map(lambda record: record[0], records)
207            slice_type, slice_size = get_type_size(record_slice)
208            size = (size + slice_size - 1) & -slice_size
209    
210            structure += '} ucd_record; */\n\n'
211            return size, structure
212    
213    def test_record_size():
214            tests = [ \
215              ( [(3,), (6,), (6,), (1,)], 1 ), \
216              ( [(300,), (600,), (600,), (100,)], 2 ), \
217              ( [(25, 3), (6, 6), (34, 6), (68, 1)], 2 ), \
218              ( [(300, 3), (6, 6), (340, 6), (690, 1)], 4 ), \
219              ( [(3, 300), (6, 6), (6, 340), (1, 690)], 4 ), \
220              ( [(300, 300), (6, 6), (6, 340), (1, 690)], 4 ), \
221              ( [(3, 100000), (6, 6), (6, 123456), (1, 690)], 8 ), \
222              ( [(100000, 300), (6, 6), (123456, 6), (1, 690)], 8 ), \
223            ]
224            for test in tests:
225                size, struct = get_record_size_struct(test[0])
226                assert(size == test[1])
227                #print struct
228    
229    def print_records(records, record_size):
230            print 'const ucd_record _pcre_ucd_records[] = { ' + \
231                  '/* %d bytes, record size %d */' % (len(records) * record_size, record_size)
232          records = zip(records.keys(), records.values())          records = zip(records.keys(), records.values())
233          records.sort(None, lambda x: x[1])          records.sort(None, lambda x: x[1])
234          for i, record in enumerate(records):          for i, record in enumerate(records):
# Line 213  category_names = ['Cc', 'Cf', 'Cn', 'Co' Line 251  category_names = ['Cc', 'Cf', 'Cn', 'Co'
251    'Mc', 'Me', 'Mn', 'Nd', 'Nl', 'No', 'Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po', 'Ps',    'Mc', 'Me', 'Mn', 'Nd', 'Nl', 'No', 'Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po', 'Ps',
252    'Sc', 'Sk', 'Sm', 'So', 'Zl', 'Zp', 'Zs' ]    'Sc', 'Sk', 'Sm', 'So', 'Zl', 'Zp', 'Zs' ]
253    
254    test_record_size()
255    
256  script = read_table('Unicode.tables/Scripts.txt', make_get_names(script_names), script_names.index('Common'))  script = read_table('Unicode.tables/Scripts.txt', make_get_names(script_names), script_names.index('Common'))
257  category = read_table('Unicode.tables/DerivedGeneralCategory.txt', make_get_names(category_names), category_names.index('Cn'))  category = read_table('Unicode.tables/DerivedGeneralCategory.txt', make_get_names(category_names), category_names.index('Cn'))
# Line 220  other_case = read_table('Unicode.tables/ Line 259  other_case = read_table('Unicode.tables/
259  # case_fold = read_table('CaseFolding.txt', get_case_folding_value, 0)  # case_fold = read_table('CaseFolding.txt', get_case_folding_value, 0)
260    
261  table, records = combine_tables(script, category, other_case)  table, records = combine_tables(script, category, other_case)
262    record_size, record_struct = get_record_size_struct(records.keys())
263    
264  # Find the optimum block size for the two-stage table  # Find the optimum block size for the two-stage table
265  min_size = sys.maxint  min_size = sys.maxint
266  for block_size in [2 ** i for i in range(5,10)]:  for block_size in [2 ** i for i in range(5,10)]:
267          size = len(records) * 4          size = len(records) * record_size
268          stage1, stage2 = compress_table(table, block_size)          stage1, stage2 = compress_table(table, block_size)
269          size += get_tables_size(stage1, stage2)          size += get_tables_size(stage1, stage2)
270          #print "/* block size %5d  => %5d bytes */" % (block_size, size)          #print "/* block size %5d  => %5d bytes */" % (block_size, size)
# Line 241  print Line 281  print
281  print "/* Unicode character database. */"  print "/* Unicode character database. */"
282  print "/* This file was autogenerated by the MultiStage2.py script. */"  print "/* This file was autogenerated by the MultiStage2.py script. */"
283  print "/* Total size: %d bytes, block size: %d. */" % (min_size, min_block_size)  print "/* Total size: %d bytes, block size: %d. */" % (min_size, min_block_size)
284  print_records(records)  print record_struct
285    print_records(records, record_size)
286  print_table(min_stage1, '_pcre_ucd_stage1')  print_table(min_stage1, '_pcre_ucd_stage1')
287  print_table(min_stage2, '_pcre_ucd_stage2', min_block_size)  print_table(min_stage2, '_pcre_ucd_stage2', min_block_size)
288  print "#if UCD_BLOCK_SIZE != %d" % min_block_size  print "#if UCD_BLOCK_SIZE != %d" % min_block_size

Legend:
Removed from v.351  
changed lines
  Added in v.352

  ViewVC Help
Powered by ViewVC 1.1.5