# Copyright (c) 2014, Vienna University of Technology (TU Wien), Department
# of Geodesy and Geoinformation (GEO).
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the Vienna University of Technology - Department of
# Geodesy and Geoinformation nor the names of its contributors may be used to
# endorse or promote products derived from this software without specific
# prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL VIENNA UNIVERSITY OF TECHNOLOGY,
# DEPARTMENT OF GEODESY AND GEOINFORMATION BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# Author: Thomas Mistelbauer Thomas.Mistelbauer@geo.tuwien.ac.at
# Creation date: 2014-06-30
import datetime
import os
from netCDF4 import Dataset, num2date, date2num
import numpy as np
import pandas as pd
import poets.image.netcdf as nt
from poets.image.resampling import resample_to_shape, average_layers
from poets.io.download import download_http, download_ftp, download_sftp, \
get_file_date
import poets.timedate.dateindex as dt
from poets.timedate.dekad import check_dekad
[docs]class BasicSource(object):
"""Base Class for data sources.
Parameters
----------
name : str
Name of the data source.
filename : str
Structure/convention of the file name.
filedate : dict
Position of date fields in filename, given as tuple.
temp_res : str
Temporal resolution of the source.
rootpath : str
Root path where all data will be stored.
host : str
Link to data host.
protocol : str
Protocol for data transfer.
username : str, optional
Username for data access.
password : str, optional
Password for data access.
port : int, optional
Port to data host, defaults to 22.
directory : str, optional
Path to data on host.
dirstruct : list of strings
Structure of source directory, each list item represents a
subdirectory.
begin_date : datetime.date, optional
Date from which on data is available, defaults to 2000-01-01.
variables : list of strings, optional
Variables used from data source, defaults to ['dataset'].
nan_value : int, float, optional
Nan value of the original data as given by the data provider.
dest_nan_value : int, float, optional
NaN value in the final NetCDF file.
dest_regions : list of str, optional
Regions of interest where data should be resampled to.
dest_sp_res : int, float, optional
Spatial resolution of the destination NetCDF file, defaults to 0.25
degree.
dest_temp_res : string, optional
Temporal resolution of the destination NetCDF file, possible values:
('month', 'dekad'), defaults to dekad.
dest_start_date : datetime.datetime, optional
Start date of the destination NetCDF file, defaults to 2000-01-01.
Attributes
----------
name : str
Name of the data source.
filename : str
Structure/convention of the file name.
filedate : dict
Position of date fields in filename, given as tuple.
temp_res : str
Temporal resolution of the source.
host : str
Link to data host.
protocol : str
Protocol for data transfer.
username : str
Username for data access.
password : str
Password for data access.
port : int
Port to data host.
directory : str
Path to data on host.
dirstruct : list of strings
Structure of source directory, each list item represents a
subdirectory.
begin_date : datetime.date
Date from which on data is available.
variables : list of strings
Variables used from data source.
nan_value : int, float
N a number value of the original data as given by the data provider.
dest_nan_value : int, float, optional
NaN value in the final NetCDF file.
tmp_path : str
Path where temporary files and original files are stored and
downloaded.
data_path : str
Path where resampled NetCDF file is stored.
dest_regions : list of str
Regions of interest where data is resampled to.
dest_sp_res : int, float
Spatial resolution of the destination NetCDF file.
dest_temp_res : string
Temporal resolution of the destination NetCDF file.
"""
def __init__(self, name, filename, filedate, temp_res, rootpath,
host, protocol, username=None, password=None, port=22,
directory=None, dirstruct=None,
begin_date=datetime.datetime(2000, 1, 1),
variables=['dataset'],
nan_value=None, dest_nan_value=-99, dest_regions=None,
dest_sp_res=0.25, dest_temp_res='dekad',
dest_start_date=datetime.datetime(2000, 1, 1)):
self.name = name
self.filename = filename
self.filedate = filedate
self.temp_res = temp_res
self.host = host
self.protocol = protocol
self.username = username
self.password = password
self.port = port
self.directory = directory
self.dirstruct = dirstruct
self.begin_date = begin_date
self.variables = variables
self.nan_value = nan_value
self.dest_nan_value = dest_nan_value
self.dest_regions = dest_regions
self.dest_sp_res = dest_sp_res
self.dest_temp_res = dest_temp_res
self.dest_start_date = dest_start_date
self.tmp_path = os.path.join(rootpath, 'TMP', name)
self.data_path = os.path.join(rootpath, 'DATA')
if self.host[-1] != '/':
self.host += '/'
if self.directory is not None and self.directory[-1] != '/':
self.directory += '/'
def _check_current_date(self, begin=True, end=True):
"""Helper method that checks the current date of individual variables
in the netCDF data file.
Parameters
----------
begin : bool, optional
If set True, begin will be returned as None
end : bool, optional
If set True, end will be returned as None
Returns
-------
dates : dict of dicts
None if no date available
"""
dates = {}
for region in self.dest_regions:
nc_name = os.path.join(self.data_path, region + '_'
+ str(self.dest_sp_res) + '_'
+ str(self.dest_temp_res) + '.nc')
if os.path.exists(nc_name):
dates[region] = {}
for var in self.variables:
ncvar = self.name + '_' + var
dates[region][var] = []
with Dataset(nc_name, 'r', format='NETCDF4') as nc:
if begin:
# check first date of data
if ncvar in nc.variables.keys():
for i in range(0, nc.variables['time'].size - 1):
if nc.variables[ncvar][i].mask.min():
continue
else:
times = nc.variables['time']
dat = num2date(nc.variables['time'][i],
units=times.units,
calendar=times.calendar)
dates[region][var].append(dat)
break
else:
dates[region][var].append(None)
else:
dates[region][var].append(None)
if end is True:
# check last date of data
if ncvar in nc.variables.keys():
for i in range(nc.variables['time'].size - 1,
- 1, -1):
if nc.variables[ncvar][i].mask.min():
continue
else:
times = nc.variables['time']
dat = num2date(nc.variables['time'][i],
units=times.units,
calendar=times.calendar)
dates[region][var].append(dat)
break
else:
dates[region][var].append(None)
else:
dates[region][var].append(None)
else:
dates = None
break
return dates
def _get_download_date(self):
"""Gets the date from which to start the data download.
Returns
-------
begin : datetime.datetime
date from which to start the data download.
"""
dates = self._check_current_date(begin=False)
if dates is not None:
begin = datetime.datetime.now()
for region in self.dest_regions:
for var in self.variables:
if dates[region][var][1] is not None:
if dates[region][var][1] < begin:
begin = dates[region][var][1]
begin += datetime.timedelta(days=1)
else:
if self.dest_start_date < self.begin_date:
begin = self.begin_date
else:
begin = self.dest_start_date
else:
begin = self.begin_date
return begin
def _get_tmp_filepath(self, prefix, region):
"""Creates path to a temporary directory.
Returns
-------
str
Path to the temporary direcotry
"""
filename = ('_' + prefix + '_' + region + '_' + str(self.dest_sp_res)
+ '_' + str(self.dest_temp_res) + '.nc')
return os.path.join(self.tmp_path, filename)
def _resample_spatial(self, region, begin, end, delete_rawdata,
shapefile=None):
"""Helper method that calls spatial resampling routines.
Parameters:
region : str
FIPS country code (https://en.wikipedia.org/wiki/FIPS_country_code)
begin : datetime.datetime
Start date of resampling
end : datetime.datetime
End date of resampling
delete_rawdata : bool
True if original downloaded files should be deleted after
resampling
"""
raw_files = []
# filename if tmp file is used
dest_file = self._get_tmp_filepath('spatial', region)
dirList = os.listdir(self.tmp_path)
dirList.sort()
for item in dirList:
src_file = os.path.join(self.tmp_path, item)
raw_files.append(src_file)
fdate = get_file_date(item, self.filedate)
if begin is not None:
if fdate < begin:
continue
if end is not None:
if fdate > end:
continue
else:
print '.',
image, _, _, _, timestamp, metadata = \
resample_to_shape(src_file, region, self.dest_sp_res,
self.name, self.nan_value,
self.dest_nan_value, shapefile)
if timestamp is None:
timestamp = get_file_date(item, self.filedate)
if self.temp_res == self.dest_temp_res:
filename = (region + '_' + str(self.dest_sp_res) + '_'
+ str(self.dest_temp_res) + '.nc')
dfile = os.path.join(self.data_path, filename)
nt.save_image(image, timestamp, region, metadata, dfile,
self.dest_start_date, self.dest_sp_res,
self.dest_nan_value, shapefile)
else:
nt.write_tmp_file(image, timestamp, region, metadata,
dest_file, self.dest_start_date,
self.dest_sp_res, self.dest_nan_value,
shapefile)
if delete_rawdata:
os.unlink(src_file)
print ''
def _resample_temporal(self, region, shapefile=None):
"""Helper method that calls temporal resampling routines.
Parameters:
region : str
Identifier of the region in the shapefile. If the default shapefile
is used, this would be the FIPS country code.
shapefile : str, optional
Path to shape file, uses "world country admin boundary shapefile"
by default.
"""
src_file = self._get_tmp_filepath('spatial', region)
if not os.path.exists(src_file):
print '[Info] No data available for this period'
return False
data = {}
variables, _, period = nt.get_properties(src_file)
dtindex = dt.get_dtindex(self.dest_temp_res, period[0], period[1])
for date in dtindex:
if date > period[1]:
continue
print date
if self.dest_temp_res == 'dekad':
if date.day < 21:
begin = datetime.datetime(date.year, date.month,
date.day - 10 + 1)
else:
begin = datetime.datetime(date.year, date.month, 21)
end = date
else:
begin = period[0]
end = date
data = {}
metadata = {}
for var in variables:
img, _, _, meta = \
nt.read_image(src_file, var, begin, end)
metadata[var] = meta
data[var] = average_layers(img, self.dest_nan_value)
filename = (region + '_' + str(self.dest_sp_res) + '_'
+ str(self.dest_temp_res) + '.nc')
dest_file = os.path.join(self.data_path, filename)
nt.save_image(data, date, region, metadata, dest_file,
self.dest_start_date, self.dest_sp_res,
self.dest_nan_value, shapefile, self.dest_temp_res)
# delete intermediate netCDF file
print ''
os.unlink(src_file)
[docs] def download(self, download_path=None, begin=None, end=None):
""""Download data
Parameters
----------
begin : datetime.datetime, optional
start date of download, default to None
end : datetime.datetime, optional
start date of download, default to None
"""
if begin is None:
if self.dest_start_date < self.begin_date:
begin = self.begin_date
else:
begin = self.dest_start_date
if self.protocol in ['HTTP', 'http']:
check = download_http(self.tmp_path, self.host,
self.directory, self.filename, self.filedate,
self.dirstruct, begin, end=end)
elif self.protocol in ['FTP', 'ftp']:
check = download_ftp(self.tmp_path, self.host, self.directory,
self.port, self.username, self.password,
self.filedate, self.dirstruct, begin, end=end)
elif self.protocol in ['SFTP', 'sftp']:
check = download_sftp(self.tmp_path, self.host,
self.directory, self.port, self.username,
self.password, self.filedate, self.dirstruct,
begin, end=end)
return check
[docs] def resample(self, begin=None, end=None, delete_rawdata=False,
shapefile=None):
"""Resamples source data to given spatial and temporal resolution.
Writes resampled images into a netCDF data file. Deletes original
files if flag delete_rawdata is set True.
Parameters
----------
begin : datetime.datetime
Start date of resampling.
end : datetime.datetime
End date of resampling.
delete_rawdata : bool
Original files will be deleted from tmp_path if set 'True'.
shapefile : str, optional
Path to shape file, uses "world country admin boundary shapefile"
by default.
"""
for region in self.dest_regions:
print '[INFO] resampling to region ' + region
print '[INFO] performing spatial resampling ',
self._resample_spatial(region, begin, end, delete_rawdata,
shapefile)
if self.temp_res == self.dest_temp_res:
print '[INFO] skipping temporal resampling'
else:
print '[INFO] performing temporal resampling ',
self._resample_temporal(region, shapefile)
[docs] def download_and_resample(self, download_path=None, begin=None, end=None,
delete_rawdata=False, shapefile=None):
"""Downloads and resamples data.
Parameters
----------
download_path : str
Path where to save the downloaded files.
begin : datetime.date, optional
set either to first date of remote repository or date of
last file in local repository
end : datetime.date, optional
set to today if none given
delete_rawdata : bool, optional
Original files will be deleted from tmp_path if set True
shapefile : str, optional
Path to shape file, uses "world country admin boundary shapefile"
by default.
"""
if begin is None:
if self.dest_start_date < self.begin_date:
begin = self.begin_date
else:
begin = self.dest_start_date
if begin < self._get_download_date():
begin = self._get_download_date()
if end is None:
end = datetime.datetime.now()
drange = dt.get_dtindex(self.dest_temp_res, begin, end)
for i, date in enumerate(drange):
if date > end:
continue
if i == 0:
start = begin
else:
if self.dest_temp_res == 'dekad':
start = drange[i - 1] + datetime.timedelta(days=1)
else:
start = date
stop = date
filecheck = self.download(download_path, start, stop)
if filecheck is True:
self.resample(start, stop, delete_rawdata, shapefile)
else:
print '[WARNING] no data available for this date'
[docs] def read_ts(self, gp, region=None, variable=None):
"""Gets timeseries from netCDF file for a gridpoint.
Parameters
----------
gp : int
Grid point index.
region : str, optional
Region of interest, set to first defined region if not set.
variable : str, optional
Variable to display, selects all available variables if None.
Returns
-------
df : pd.DataFrame
Timeseries for selected variables.
"""
if region is None:
region = self.dest_regions[0]
if variable is None:
variable = self.variables
else:
variable = [variable]
source_file = os.path.join(self.data_path,
region + '_' + str(self.dest_sp_res) + '_'
+ str(self.dest_temp_res) + '.nc')
var_dates = self._check_current_date()
with Dataset(source_file, 'r', format='NETCDF4') as nc:
time = nc.variables['time']
dates = num2date(time[:], units=time.units, calendar=time.calendar)
position = np.where(nc.variables['gpi'][:] == gp)
lat_pos = position[0][0]
lon_pos = position[1][0]
df = pd.DataFrame(index=pd.DatetimeIndex(dates))
for var in variable:
begin = np.where(dates == var_dates[region][var][0])[0][0]
end = np.where(dates == var_dates[region][var][1])[0][0]
# Renames variable name to SOURCE_variable
ncvar = self.name + '_' + var
df[ncvar] = np.NAN
for i in range(begin, end + 1):
df[ncvar][i] = nc.variables[ncvar][i, lat_pos, lon_pos]
return df
[docs] def read_img(self, date, region=None, variable=None):
"""Gets images from netCDF file for certain date
Parameters
----------
date : datetime.datetime
Date of the image.
region : str, optional
Region of interest, set to first defined region if not set.
variable : str, optional
Variable to display, selects first available variables if None.
Returns
-------
img : numpy.ndarray
Image of selected date.
lon : numpy.array
Array with longitudes.
lat : numpy.array
Array with latitudes.
"""
if region is None:
region = self.dest_regions[0]
if variable is None:
variable = self.name + '_' + self.variables[0]
source_file = os.path.join(self.data_path,
region + '_' + str(self.dest_sp_res)
+ '_' + str(self.dest_temp_res) + '.nc')
# get dekad of date:
date = check_dekad(date)
with Dataset(source_file, 'r', format='NETCDF4') as nc:
time = nc.variables['time']
datenum = date2num(date, units=time.units, calendar=time.calendar)
position = np.where(time[:] == datenum)[0][0]
img = nc.variables[variable][position]
lon = nc.variables['lon'][:]
lat = nc.variables['lat'][:]
return img, lon, lat
if __name__ == "__main__":
pass