Source code for pytomo.lib_dns
#!/usr/bin/env python
"""Module to retrieve the IP address of a URL out of a set of nameservers
Usage: To use the functions provided in this module independently,
first place yourself just above pytomo folder.Then:
import pytomo.start_pytomo
TIMESTAMP = 'test_timestamp'
start_pytomo.configure_log_file(TIMESTAMP)
import pytomo.lib_dns as lib_dns
url = 'www.example.com'
lib_dns.get_ip_addresses(url)
lib_dns.get_default_name_servers()
"""
from __future__ import with_statement, absolute_import
from urlparse import urlsplit
import sys
import time
from .dns import resolver as dns_resolver
from .dns import exception as dns_exception
from . import config_pytomo
[docs]def get_default_name_servers():
"""Return a list of IP addresses of default name servers
>>> get_default_name_servers()
... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
'...............'
>>> # Check for string of the format 'x.x.x.x'
"""
default_resolver = dns_resolver.get_default_resolver()
# find out the exception to catch in case of error
return default_resolver.nameservers[0]
[docs]def reduce_addresses(data):
'''Return a reduced list of IP addresses and resolvers
>>> reduce_addresses([('1.1.1.1', 'default'), ('2.2.2.2', 'open'),
('1.1.1.1', 'goo')])
[('1.1.1.1', 'default_goo'), ('2.2.2.2', 'open')]
'''
result = dict()
req_times = dict()
for ip_address, resolver, req_duration in data:
if ip_address in result:
result[ip_address] = result[ip_address] + '_' + resolver
req_times[ip_address] = min(req_times[ip_address], req_duration)
else:
result[ip_address] = resolver
req_times[ip_address] = req_duration
return [(ip, result[ip], req_times[ip]) for ip in result]
[docs]def get_ip_addresses(url):
"""
Return a list of tuples with the IP address and the resolver used
"""
if not url.startswith('http://'):
url = 'http://'.join(('', url))
hostname = urlsplit(url).netloc
results = []
# Set the DNS Server
resolver = dns_resolver.Resolver()
#Set the lifetime of the DNS query. The default is 30 seconds.
if config_pytomo.DNS_TIMEOUT:
resolver.lifetime = config_pytomo.DNS_TIMEOUT
default_resolver = ('%s_default' % config_pytomo.PROVIDER,
get_default_name_servers())
dns_servers = ([default_resolver] +
config_pytomo.EXTRA_NAME_SERVERS_CC)
for (name, server) in dns_servers:
config_pytomo.LOG.debug("DNS resolution using %s on this address %s"
% (name, server))
start_resol_time = time.time()
resolver.nameservers = [server]
try:
rdatas = resolver.query(hostname)
end_resol_time = time.time()
except dns_resolver.Timeout:
config_pytomo.LOG.info("DNS timeout for %s" % name)
rdatas = None
# If we get a timeout then we ignore the DNS server for the rest of
# the current round.
for i, (lname, _) in enumerate(
config_pytomo.EXTRA_NAME_SERVERS_CC):
if lname == name:
del config_pytomo.EXTRA_NAME_SERVERS_CC[i]
config_pytomo.LOG.info("Ignoring %s for current round of "
"crawl" %name)
continue
except dns_exception.DNSException, mes:
config_pytomo.LOG.exception('Uncaught DNS Exception: %s' % mes)
rdatas = None
continue
except Exception, mes:
config_pytomo.LOG.exception('Uncaught non-DNS Exception: %s' % mes)
rdatas = None
continue
if rdatas:
try:
address = rdatas[0].address
except AttributeError, mes:
config_pytomo.LOG.error('DNS failed: %s' % mes)
continue
config_pytomo.LOG.debug("URL %s resolved as: %s"
% (hostname, address))
results.append((address, '_'.join((name, server)),
end_resol_time - start_resol_time))
return reduce_addresses(results)
if __name__ == '__main__':
import logging
config_pytomo.LOG = logging.getLogger('dns_test')
config_pytomo.LOG.setLevel(config_pytomo.LOG_LEVEL)
HANDLER = logging.StreamHandler(sys.stdout)
LOG_FORMATTER = logging.Formatter("%(asctime)s - %(filename)s:%(lineno)d - "
"%(levelname)s - %(message)s")
HANDLER.setFormatter(LOG_FORMATTER)
config_pytomo.LOG.addHandler(HANDLER)
import doctest
doctest.testmod()
get_ip_addresses(sys.argv[1])