1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 from translate.storage import factory
22 import os
23 import cProfile
24 import pstats
25 import random
26 import sys
27
29 """class to aid in benchmarking Translate Toolkit stores"""
30 - def __init__(self, test_dir, storeclass):
31 """sets up benchmarking on the test directory"""
32 self.test_dir = os.path.abspath(test_dir)
33 self.StoreClass = storeclass
34 self.extension = self.StoreClass.Extensions[0]
35 self.project_dir = os.path.join(self.test_dir, "benchmark")
36 self.file_dir = os.path.join(self.project_dir, "zxx")
37
39 """removes the given directory"""
40 if os.path.exists(self.test_dir):
41 for dirpath, subdirs, filenames in os.walk(self.test_dir, topdown=False):
42 for name in filenames:
43 os.remove(os.path.join(dirpath, name))
44 for name in subdirs:
45 os.rmdir(os.path.join(dirpath, name))
46 if os.path.exists(self.test_dir):
47 os.rmdir(self.test_dir)
48 assert not os.path.exists(self.test_dir)
49
50 - def create_sample_files(self, num_dirs, files_per_dir, strings_per_file, source_words_per_string, target_words_per_string):
51 """creates sample files for benchmarking"""
52 if not os.path.exists(self.test_dir):
53 os.mkdir(self.test_dir)
54 if not os.path.exists(self.project_dir):
55 os.mkdir(self.project_dir)
56 if not os.path.exists(self.file_dir):
57 os.mkdir(self.file_dir)
58 for dirnum in range(num_dirs):
59 if num_dirs > 1:
60 dirname = os.path.join(self.file_dir, "sample_%d" % dirnum)
61 if not os.path.exists(dirname):
62 os.mkdir(dirname)
63 else:
64 dirname = self.file_dir
65 for filenum in range(files_per_dir):
66 sample_file = self.StoreClass()
67 for stringnum in range(strings_per_file):
68 source_string = " ".join(["word%d" % (random.randint(0, strings_per_file) * i) for i in range(source_words_per_string)])
69 sample_unit = sample_file.addsourceunit(source_string)
70 sample_unit.target = " ".join(["drow%d" % (random.randint(0, strings_per_file) * i) for i in range(target_words_per_string)])
71 sample_file.savefile(os.path.join(dirname, "file_%d.%s" % (filenum, self.extension)))
72
74 """parses all the files in the test directory into memory"""
75 count = 0
76 for dirpath, subdirs, filenames in os.walk(self.file_dir, topdown=False):
77 for name in filenames:
78 pofilename = os.path.join(dirpath, name)
79 parsedfile = self.StoreClass(open(pofilename, 'r'))
80 count += len(parsedfile.units)
81 print "counted %d units" % count
82
83 if __name__ == "__main__":
84 storetype = "po"
85 if len(sys.argv) > 1:
86 storetype = sys.argv[1]
87 if storetype in factory.classes:
88 storeclass = factory.classes[storetype]
89 else:
90 print "StoreClass: '%s' is not a base class that the class factory can load" % storetype
91 sys.exit()
92 for sample_file_sizes in [
93
94
95 (1, 1, 10000, 5, 10),
96
97
98
99
100
101
102
103
104 ]:
105 benchmarker = TranslateBenchmarker("BenchmarkDir", storeclass)
106 benchmarker.clear_test_dir()
107 benchmarker.create_sample_files(*sample_file_sizes)
108 methods = [("create_sample_files", "*sample_file_sizes"), ("parse_file", ""), ]
109 for methodname, methodparam in methods:
110 print methodname, "%d dirs, %d files, %d strings, %d/%d words" % sample_file_sizes
111 print "_______________________________________________________"
112 statsfile = "%s_%s" % (methodname, storetype) + '_%d_%d_%d_%d_%d.stats' % sample_file_sizes
113 cProfile.run('benchmarker.%s(%s)' % (methodname, methodparam), statsfile)
114 stats = pstats.Stats(statsfile)
115 stats.sort_stats('cumulative').print_stats(20)
116 print "_______________________________________________________"
117
118