1
2
3
4 """
5
6 Library for parsing, displaying, querying and serializing DOAP
7
8 """
9
10 import sys
11 import logging
12 import xmlrpclib
13 from cStringIO import StringIO
14 from xml.sax._exceptions import SAXParseException
15
16 from rdfalchemy import rdfSubject
17 from rdflib import ConjunctiveGraph, Namespace
18
19 from doapfiend.utils import fetch_file
20 from doapfiend.model import Project
21 from doapfiend.plugins import load_plugins
22
23 LOG = logging.getLogger('doapfiend')
24 XMLRPC_SERVER = xmlrpclib.ServerProxy('http://doapspace.org/xmlrpc/')
25 DOAP_NS = Namespace('http://usefulinc.com/ns/doap#')
26
27
28 -def follow_homepages(rdf_xml):
29 '''
30 If there is a 'doap:Project homepage' it will be looked up
31 on doapspace.org using get_by_homepage to find any other
32 DOAP. This is useful if we're looking at FOAF and a project
33 is mentioned by homepage. It can also be used on DOAP files
34 to search for additional DOAP files about the same project.
35
36 @param rdf_xml: RDF serialized as XML
37 @type : string
38
39 @rtype: int
40 @returns: 0 on sucess or 1 if there was no DOAP in the RDF
41 '''
42 homepages = list(get_homepages(rdf_xml))
43 nbr_homepage_urls = len(homepages)
44 if nbr_homepage_urls >= 1:
45 print_doap_by_homepages(homepages)
46 else:
47 print 'No DOAP found in that RDF.'
48 return 1
49
50
52 '''
53 If there is a 'doap:Project homepage' it will be looked up
54 on doapspace.org using get_by_homepage to find any other
55 DOAP. This is useful if we're looking at FOAF and a project
56 is mentioned by homepage. It can also be used on DOAP files
57 to search for additional DOAP files about the same project.
58
59 @param rdf: RDF serialized as XML
60 @type : string
61
62 @rtype: int
63 @returns: 0 on sucess or 1 if there was no DOAP in the RDF
64 '''
65 homepages = list(get_homepages(rdf))
66 nbr_homepage_urls = len(homepages)
67 if nbr_homepage_urls >= 1:
68 for hpage_url in homepages:
69 print "Found project homepage:", hpage_url
70
71 hpages = query_by_homepage(hpage_url)
72 for _src, hpage_url in hpages:
73 print ' Found DOAP: ', hpage_url
74 else:
75 print 'No DOAP found in that RDF.'
76 return 1
77
78
80 '''
81 Given a list of homepage URLs, search for DOAP for each and print
82
83 @param homepages: Project homepage
84 @type : list
85
86 @rtype: None
87 @returns: None
88 '''
89 for hpage_url in homepages:
90 print "Found project homepage", hpage_url
91
92 hpages = query_by_homepage(hpage_url)
93 for _src, hpage_url in hpages:
94 print 'Found DOAP at ', hpage_url
95 doap_xml = fetch_doap(hpage_url)
96 print_doap(doap_xml)
97
98 -def get_homepages(rdf, format='xml'):
99 '''
100 Find all doap:homepage in RDF
101
102 @param rdf: RDF
103 @type rdf: string
104
105 @param format: Serialization format
106 @type format: string
107
108 @rtype: generator
109 @returns: homepages
110 '''
111 store = ConjunctiveGraph()
112 store.parse(StringIO(rdf), publicID=None, format=format)
113 if rdf_has_doap(store):
114 for _s, o in store.subject_objects(DOAP_NS["homepage"]):
115 yield(str(o))
116
118 '''
119 Returns True if triplestore has the DOAP namespace defined
120
121 @param store: triplestore
122 @type store: rdflib ConjunctiveGraph
123
124 @rtype: boolean
125 @returns: True if triplestore contains DOAP namespace
126
127 '''
128 for namespace in store.namespaces():
129 if namespace[1] == DOAP_NS:
130 return True
131
132 -def load_graph(doap, format="xml", get_list=False):
133 '''
134 Load a DOAP profile into a RDFAlchemy/rdflib graph
135
136 Supports any serialization format rdflib can parse (xml, n3, etc.)
137
138 @param doap: DOAP
139 @type doap: string
140
141 @param format: Serialization format we're parsing
142 @type format: string
143
144 @param get_list: Return list of Projects if True
145 @type doap: list
146
147 @rtype: Project
148 @returns: a Project{rdfSubject}
149
150 '''
151 rdfSubject.db = ConjunctiveGraph()
152 try:
153 rdfSubject.db.parse(StringIO(doap), format)
154 except SAXParseException:
155 sys.stderr.write("Error: Can't parse RDF/XML.\n")
156 sys.exit(2)
157
158
159
160
161
162
163
164
165
166 if get_list:
167 LOG.debug("doaplib: list of Projects")
168 try:
169 projs = list(Project.ClassInstances())
170 LOG.debug("Found %s Projects." % len(projs))
171 if len(projs) == 0:
172 sys.stderr.write('No DOAP found in that RDF.\n')
173 return projs
174 except StopIteration:
175 sys.stderr.write('No DOAP found in that RDF.\n')
176 sys.exit(2)
177
178 else:
179 try:
180 LOG.debug("doaplib: single Project")
181 return Project.ClassInstances().next()
182 except StopIteration:
183 sys.stderr.write('No DOAP found in that RDF.\n')
184 sys.exit(2)
185 sys.stderr.write('No DOAP found in that RDF.\n')
186
188 '''
189 Get DOAP for a package index project name
190
191 Builtin indexes:
192
193 - 'sf' SourceForge
194 - 'fm' Freshmeat
195 - 'py' Python Package Index
196
197 Note there can be other package indexes available by
198 third party plugins.
199
200 @param index: Package index two letter abbreviation
201 @type index: string
202
203 @param project_name: project name
204 @type project_name: string
205
206 @param proxy: Optional HTTP proxy URL
207 @type proxy: string
208
209 @rtype: string
210 @return: text of file retrieved
211
212 '''
213 for plugin_obj in list(load_plugins()):
214 plugin = plugin_obj()
215 if hasattr(plugin, 'prefix'):
216 if plugin.prefix == index:
217 plugin.query = project_name
218 return plugin.search(proxy)
219
220
222 '''
223 Get list of URL's for DOAP given a project's homepage.
224 The list can contain zero or multiple URLs.
225
226 The return format is:
227 [(source, URL), (source, URL)...]
228
229 'source' is the two letter package index abbreviation or 'ex' for external.
230 'external' meaning the DOAP was spidered on the web.
231 Possible package indexes:
232
233 Current indexes:
234
235 - 'sf' SourceForge
236 - 'fm' Freshmeat
237 - 'py' Python Package Index
238 - 'oh' Packages listed on Ohloh
239
240 @param url: URL of homepage of a project
241 @type url: string
242
243 @rtype: list
244 @return: A list of tuples containing URLs for DOAP found by homepage
245
246 '''
247
248 return XMLRPC_SERVER.query_by_homepage(url)
249
250
251 -def print_doap(doap_xml, color=None, format='text', serializer=None,
252 filename=None):
253 '''
254 Print DOAP as text, xml, or n3 etc. or to stdout or a file
255 A callable serializer object may be passed or a name of a serializer
256 plugin.
257
258 @param doap_xml: DOAP profile in RDF/XML
259 @type doap_xml: string
260
261 @param format: Serialization syntax formatter name
262 @type format: string
263
264 @param serializer: Instance of a serializer
265 @type serializer: callable
266
267 @param filename: Optional filename to write to
268 @type filename: string
269
270 @return: `serializer` or 1 if invalid serialization request
271
272 '''
273
274
275 if not serializer:
276 serializer = get_serializer(format)
277 if not serializer:
278 sys.stderr.write('Unknown serialization requested: %s\n' % format)
279 return 1
280
281 doap = serializer(doap_xml, color)
282 if filename:
283 try:
284 open(filename, 'w').write(doap.encode('utf-8'))
285 except UnicodeDecodeError:
286 open(filename, 'w').write(doap)
287 else:
288 print doap
289
290
292 '''
293 Return a serializer instance given its name
294
295 @param format: Name of serializer
296 @type format: string
297
298 @rtype: function
299 @returns: Instance of a serializer
300 '''
301
302 for plugin_obj in get_plugin('serialize'):
303 plugin = plugin_obj()
304 if plugin.name == format:
305 return plugin.serialize
306
307
309 """
310 Return plugin object if `method` exists
311
312 @param method: name of plugin's method we're calling
313 @type method: string
314
315 @returns: list of plugins with `method`
316
317 """
318 all_plugins = []
319 for plugin in load_plugins():
320
321 if not hasattr(plugin, method):
322 plugin = None
323 else:
324 all_plugins.append(plugin)
325 return all_plugins
326
327
329 '''
330 Fetch DOAP by its URL or filename
331
332 @param url: URL of DOAP profile in RDF/XML serialization
333 @type url: string
334
335 @rtype: text
336 @return: DOAP
337 '''
338 return fetch_file(url, proxy)
339