1
2 from documentFactory import BaseDocumentStream
3 from document import StringDocument
4
5 import os, re
6
8
13
15
16
17 vs = rec.process_xpath('phrase[@cat="VP"]/word')
18 docs = []
19 processed = []
20 for v in vs:
21
22 (name, attrs) = rec._convert_elem(v[0])
23 prepstr = ""
24 iobjstr = ""
25 arg1 = attrs[u'arg1']
26 vtxt = v[1][2:]
27 vid = attrs['id']
28 if vid in processed:
29 continue
30 verb = ['<verb>', '<w pos="%s" base="%s">%s</w>' % (attrs['pos'], attrs['base'], vtxt)]
31 el1 = rec.process_xpath('phrase[@id="%s"]/descendant::word' % arg1)
32 txt = ['<subject>']
33 for w in el1:
34 (name, nattrs) = rec._convert_elem(w[0])
35 txt.append('<w pos="%s">%s</w>' % (nattrs['pos'], w[1][2:]))
36 txt.append("</subject>")
37 subj = ' '.join(txt)
38
39 try:
40 arg2 = attrs[u'arg2']
41
42
43 el2 = rec.process_xpath('phrase[@id="%s"]' % arg2)
44 (name, nattrs) = rec._convert_elem(el2[0][0])
45 nid = nattrs['id']
46 while nattrs[u'cat'] == "VP":
47 allv = rec.process_xpath('phrase[@id="%s"]/descendant::word' % nid)
48 (name, avattrs) = rec._convert_elem(allv[0][0])
49 verb.append('<w pos="%s" base="%s">%s</w>' % (avattrs['pos'], avattrs['base'], allv[0][1][2:]))
50 processed.append(avattrs['id'])
51 avarg2 = avattrs['arg2']
52 if avarg2 == arg1:
53 avarg2 = avattrs['arg1']
54 if avarg2 == '-1':
55
56 break
57 el2 = rec.process_xpath('phrase[@id="%s"]' % avarg2 )
58 (name, nattrs) = rec._convert_elem(el2[0][0])
59 nid = nattrs['id']
60
61 el2 = rec.process_xpath('phrase[@id="%s"]/descendant::word' % nid)
62 txt = ['<object>']
63 for w in el2:
64 (name, nattrs) = rec._convert_elem(w[0])
65 txt.append('<w pos="%s">%s</w>' % (nattrs['pos'], w[1][2:]))
66 txt.append("</object>")
67 obj = ' '.join(txt)
68 except KeyError:
69 obj = "<object/>"
70
71 ppxp = rec.process_xpath("word[@arg1='%s']" % (int(vid) -1))
72 if ppxp:
73 (name, attrs) = rec._convert_elem(ppxp[0][0])
74 ptag = '<w pos="%s">%s</w>' % (attrs['pos'], ppxp[0][1][2:])
75 prepstr = "<prep>%s</prep>\n" % ptag
76 try:
77 iobjxp = rec.process_xpath("phrase[@id='%s']/descendant::word" % attrs['arg2'])
78 iobjlist = ['<iobject>']
79 for w in iobjxp:
80 (name, nattrs) = rec._convert_elem(w[0])
81 iobjlist.append('<w pos="%s">%s</w>' % (nattrs['pos'], w[1][2:]))
82 iobjlist.append('</iobject>')
83 iobjstr = ' '.join(iobjlist) + "\n"
84 except:
85 prepstr = ""
86 iobjstr = ""
87
88 verb.append('</verb>')
89 verb = ' '.join(verb)
90 docstr = "<svopi>\n %s\n %s\n %s\n%s%s</svopi>" % (subj, verb, obj, prepstr, iobjstr)
91 doc = StringDocument(docstr)
92 if cache == 0:
93 yield doc
94 elif cache == 1:
95 raise NotImplementedError
96 else:
97 docs.append(doc)
98 self.documents = docs
99 raise StopIteration
100