Package doapfiend :: Module doaplib
[hide private]
[frames] | no frames]

Source Code for Module doapfiend.doaplib

  1  #!/usr/bin/env python 
  2  #pylint: disable-msg=C0103 
  3   
  4  """ 
  5   
  6  Library for parsing, displaying, querying and serializing DOAP 
  7   
  8  """ 
  9   
 10  import sys 
 11  import logging 
 12  import xmlrpclib 
 13  from cStringIO import StringIO 
 14  from xml.sax._exceptions import SAXParseException 
 15   
 16  from rdfalchemy import rdfSubject 
 17  from rdflib import ConjunctiveGraph, Namespace 
 18   
 19  from doapfiend.utils import fetch_file 
 20  from doapfiend.model import Project 
 21  from doapfiend.plugins import load_plugins 
 22   
 23  LOG = logging.getLogger('doapfiend') 
 24  XMLRPC_SERVER = xmlrpclib.ServerProxy('http://doapspace.org/xmlrpc/') 
 25  DOAP_NS = Namespace('http://usefulinc.com/ns/doap#') 
 26   
 27   
28 -def follow_homepages(rdf_xml):
29 ''' 30 If there is a 'doap:Project homepage' it will be looked up 31 on doapspace.org using get_by_homepage to find any other 32 DOAP. This is useful if we're looking at FOAF and a project 33 is mentioned by homepage. It can also be used on DOAP files 34 to search for additional DOAP files about the same project. 35 36 @param rdf_xml: RDF serialized as XML 37 @type : string 38 39 @rtype: int 40 @returns: 0 on sucess or 1 if there was no DOAP in the RDF 41 ''' 42 homepages = list(get_homepages(rdf_xml)) 43 nbr_homepage_urls = len(homepages) 44 if nbr_homepage_urls >= 1: 45 print_doap_by_homepages(homepages) 46 else: 47 print 'No DOAP found in that RDF.' 48 return 1
49 50 77 78 97
98 -def get_homepages(rdf, format='xml'):
99 ''' 100 Find all doap:homepage in RDF 101 102 @param rdf: RDF 103 @type rdf: string 104 105 @param format: Serialization format 106 @type format: string 107 108 @rtype: generator 109 @returns: homepages 110 ''' 111 store = ConjunctiveGraph() 112 store.parse(StringIO(rdf), publicID=None, format=format) 113 if rdf_has_doap(store): 114 for _s, o in store.subject_objects(DOAP_NS["homepage"]): 115 yield(str(o))
116
117 -def rdf_has_doap(store):
118 ''' 119 Returns True if triplestore has the DOAP namespace defined 120 121 @param store: triplestore 122 @type store: rdflib ConjunctiveGraph 123 124 @rtype: boolean 125 @returns: True if triplestore contains DOAP namespace 126 127 ''' 128 for namespace in store.namespaces(): 129 if namespace[1] == DOAP_NS: 130 return True
131
132 -def load_graph(doap, format="xml", get_list=False):
133 ''' 134 Load a DOAP profile into a RDFAlchemy/rdflib graph 135 136 Supports any serialization format rdflib can parse (xml, n3, etc.) 137 138 @param doap: DOAP 139 @type doap: string 140 141 @param format: Serialization format we're parsing 142 @type format: string 143 144 @param get_list: Return list of Projects if True 145 @type doap: list 146 147 @rtype: Project 148 @returns: a Project{rdfSubject} 149 150 ''' 151 rdfSubject.db = ConjunctiveGraph() 152 try: 153 rdfSubject.db.parse(StringIO(doap), format) 154 except SAXParseException: 155 sys.stderr.write("Error: Can't parse RDF/XML.\n") 156 sys.exit(2) 157 #If a serializer works on an entire graph, it doesn't matter which 158 #Project instance we give it. This is true for N3, XML/RDF etc. 159 #The 'text' serializer, on the other hand, prints out a separate 160 #description for each Project found in a graph. This is useful for 161 #'arbitrary' RDF, or FOAF where there may be several Projects listed. 162 #Ideally exactly one Project should be specified in an .rdf file. 163 #In the future load_graph will probably always return a list and let the 164 #plugins determine what to do when there are more than one Project 165 #found. 166 if get_list: 167 LOG.debug("doaplib: list of Projects") 168 try: 169 projs = list(Project.ClassInstances()) 170 LOG.debug("Found %s Projects." % len(projs)) 171 if len(projs) == 0: 172 sys.stderr.write('No DOAP found in that RDF.\n') 173 return projs 174 except StopIteration: 175 sys.stderr.write('No DOAP found in that RDF.\n') 176 sys.exit(2) 177 178 else: 179 try: 180 LOG.debug("doaplib: single Project") 181 return Project.ClassInstances().next() 182 except StopIteration: 183 sys.stderr.write('No DOAP found in that RDF.\n') 184 sys.exit(2) 185 sys.stderr.write('No DOAP found in that RDF.\n')
186
187 -def get_by_pkg_index(index, project_name, proxy=None):
188 ''' 189 Get DOAP for a package index project name 190 191 Builtin indexes: 192 193 - 'sf' SourceForge 194 - 'fm' Freshmeat 195 - 'py' Python Package Index 196 197 Note there can be other package indexes available by 198 third party plugins. 199 200 @param index: Package index two letter abbreviation 201 @type index: string 202 203 @param project_name: project name 204 @type project_name: string 205 206 @param proxy: Optional HTTP proxy URL 207 @type proxy: string 208 209 @rtype: string 210 @return: text of file retrieved 211 212 ''' 213 for plugin_obj in list(load_plugins()): 214 plugin = plugin_obj() 215 if hasattr(plugin, 'prefix'): 216 if plugin.prefix == index: 217 plugin.query = project_name 218 return plugin.search(proxy)
219 220
221 -def query_by_homepage(url):
222 ''' 223 Get list of URL's for DOAP given a project's homepage. 224 The list can contain zero or multiple URLs. 225 226 The return format is: 227 [(source, URL), (source, URL)...] 228 229 'source' is the two letter package index abbreviation or 'ex' for external. 230 'external' meaning the DOAP was spidered on the web. 231 Possible package indexes: 232 233 Current indexes: 234 235 - 'sf' SourceForge 236 - 'fm' Freshmeat 237 - 'py' Python Package Index 238 - 'oh' Packages listed on Ohloh 239 240 @param url: URL of homepage of a project 241 @type url: string 242 243 @rtype: list 244 @return: A list of tuples containing URLs for DOAP found by homepage 245 246 ''' 247 #Should check for env variable for alternate xmplrpc server for testing? 248 return XMLRPC_SERVER.query_by_homepage(url)
249 250 289 290
291 -def get_serializer(format):
292 ''' 293 Return a serializer instance given its name 294 295 @param format: Name of serializer 296 @type format: string 297 298 @rtype: function 299 @returns: Instance of a serializer 300 ''' 301 #Get all plugins with a `serialize` method 302 for plugin_obj in get_plugin('serialize'): 303 plugin = plugin_obj() 304 if plugin.name == format: 305 return plugin.serialize
306 307
308 -def get_plugin(method):
309 """ 310 Return plugin object if `method` exists 311 312 @param method: name of plugin's method we're calling 313 @type method: string 314 315 @returns: list of plugins with `method` 316 317 """ 318 all_plugins = [] 319 for plugin in load_plugins(): 320 #plugin().configure(None, None) 321 if not hasattr(plugin, method): 322 plugin = None 323 else: 324 all_plugins.append(plugin) 325 return all_plugins
326 327
328 -def fetch_doap(url, proxy=None):
329 ''' 330 Fetch DOAP by its URL or filename 331 332 @param url: URL of DOAP profile in RDF/XML serialization 333 @type url: string 334 335 @rtype: text 336 @return: DOAP 337 ''' 338 return fetch_file(url, proxy)
339