# Copyright (c) 2014, Vienna University of Technology (TU Wien), Department
# of Geodesy and Geoinformation (GEO).
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the Vienna University of Technology - Department of
# Geodesy and Geoinformation nor the names of its contributors may be used to
# endorse or promote products derived from this software without specific
# prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL VIENNA UNIVERSITY OF TECHNOLOGY,
# DEPARTMENT OF GEODESY AND GEOINFORMATION BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# Author: Thomas Mistelbauer Thomas.Mistelbauer@geo.tuwien.ac.at
# Creation date: 2014-06-30
from datetime import datetime, timedelta
from netCDF4 import Dataset, num2date, date2num
from poets.grid.grids import ShapeGrid, RegularGrid
from poets.image.resampling import resample_to_shape, average_layers
from poets.io.download import download_http, download_ftp, download_sftp, \
get_file_date, download_local
from poets.io.fileformats import select_file
from poets.io.unpack import unpack, check_compressed
import math as ma
import numpy as np
import os
import pandas as pd
import poets.grid.grids as gr
import poets.image.netcdf as nc
import poets.timedate.dateindex as dt
import shutil
[docs]class BasicSource(object):
"""Base Class for data sources.
Parameters
----------
name : str
Name of the data source.
filename : str
Structure/convention of the file name.
filedate : dict
Position of date fields in filename, given as tuple.
temp_res : str
Temporal resolution of the source.
rootpath : str
Root path where all data will be stored.
host : str
Link to data host.
protocol : str
Protocol for data transfer.
username : str, optional
Username for data access.
password : str, optional
Password for data access.
port : int, optional
Port to data host, defaults to 22.
directory : str, optional
Path to data on host.
dirstruct : list of strings, optional
Structure of source directory, each list item represents a
subdirectory.
regions : list of str, optional
List of regions where data from source is available. Uses all regions
specified in dest_regions if not set.
begin_date : datetime, optional
Date from which on data is available.
variables : string or list of strings, optional
Variables used from data source, defaults to ['dataset'].
nan_value : int, float, optional
Nan value of the original data as given by the data provider.
valid_range : tuple of int of float, optional
Valid range of data, given as (minimum, maximum).
data_range : tuple of int of float, optional
Range of the values as data given in rawdata (minimum, maximum).
Will be scaled to valid_range.
ffilter : str, optional
Pattern that apperas in filename. Can be used to select out not
needed files if multiple files per date are provided.
colorbar : str, optional
Colorbar to use, use one from
http://matplotlib.org/examples/color/colormaps_reference.html,
defaults to jet.
unit : str, optional
Unit of dataset for displaying in legend. Does not have to be set
if unit is specified in input file metadata. Defaults to None.
dest_nan_value : int, float, optional
NaN value in the final NetCDF file.
dest_regions : list of str, optional
Regions of interest where data should be resampled to.
dest_sp_res : int, float, optional
Spatial resolution of the destination NetCDF file, defaults to 0.25
degree.
dest_temp_res : string, optional
Temporal resolution of the destination NetCDF file, possible values:
('day', 'week', 'dekad', 'month'), defaults to dekad.
dest_start_date : datetime, optional
Start date of the destination NetCDF file, defaults to 2000-01-01.
src_file : dict of str, optional
Path to file that contains source. Uses default NetCDF file if None.
Key of dict must be regions as set in regions attribute.
Attributes
----------
name : str
Name of the data source.
filename : str
Structure/convention of the file name.
filedate : dict
Position of date fields in filename, given as tuple.
temp_res : str
Temporal resolution of the source.
host : str
Link to data host.
protocol : str
Protocol for data transfer.
username : str
Username for data access.
password : str
Password for data access.
port : int
Port to data host.
directory : str
Path to data on host.
dirstruct : list of strings
Structure of source directory, each list item represents a
subdirectory.
regions : list of str
List of regions where data from source is available.
begin_date : datetime
Date from which on data is available.
ffilter : str
Pattern that apperas in filename.
colorbar : str, optional
Colorbar to used.
unit : str
Unit of dataset for displaying in legend.
variables : list of strings
Variables used from data source.
nan_value : int, float
Not a number value of the original data as given by the data provider.
valid_range : tuple of int of float
Valid range of data, given as (minimum, maximum).
data_range : tuple of int of float
Range of the values as data given in rawdata (minimum, maximum).
dest_nan_value : int, float, optional
NaN value in the final NetCDF file.
tmp_path : str
Path where temporary files are stored.
rawdata_path : str
Path where original files are stored.
data_path : str
Path where resampled NetCDF file is stored.
dest_regions : list of str
Regions of interest where data is resampled to.
dest_sp_res : int, float
Spatial resolution of the destination NetCDF file.
dest_temp_res : string
Temporal resolution of the destination NetCDF file.
dest_start_date : datetime.datetime
First date of the dataset in the destination NetCDF file.
src_file : str, list of str
Path to file that contains source.
"""
def __init__(self, name, filename, filedate, temp_res, rootpath,
host, protocol, username=None, password=None, port=22,
directory=None, dirstruct=None, regions=None,
begin_date=None, ffilter=None, colorbar='jet',
variables=None, nan_value=None, valid_range=None, unit=None,
dest_nan_value=-99, dest_regions=None, dest_sp_res=0.25,
dest_temp_res='dekad', dest_start_date=datetime(2000, 1, 1),
data_range=None, src_file=None):
self.name = name
self.filename = filename
self.filedate = filedate
self.temp_res = temp_res
self.host = host
self.protocol = protocol
self.username = username
self.password = password
self.port = port
self.directory = directory
self.dirstruct = dirstruct
if begin_date is None:
self.begin_date = dest_start_date
else:
self.begin_date = begin_date
if type(variables) == str:
self.variables = [variables]
else:
self.variables = variables
self.ffilter = ffilter
self.unit = unit
self.nan_value = nan_value
self.valid_range = valid_range
self.data_range = data_range
self.colorbar = colorbar
if isinstance(regions, str):
self.regions = [regions]
else:
self.regions = regions
self.dest_nan_value = dest_nan_value
if isinstance(dest_regions, str):
self.dest_regions = [dest_regions]
else:
self.dest_regions = dest_regions
self.dest_sp_res = dest_sp_res
self.dest_temp_res = dest_temp_res
self.dest_start_date = dest_start_date
self.rawdata_path = os.path.join(rootpath, 'RAWDATA', name)
self.tmp_path = os.path.join(rootpath, 'TMP')
if not os.path.exists(self.tmp_path):
os.mkdir(self.tmp_path)
self.data_path = os.path.join(rootpath, 'DATA')
if not os.path.exists(self.data_path):
os.mkdir(self.data_path)
if self.host[-1] != '/':
self.host += '/'
if self.directory is not None and self.directory[-1] != '/':
self.directory += '/'
if src_file is None:
self.src_file = {}
for reg in self.dest_regions:
self.src_file[reg] = os.path.join(self.data_path, reg + '_' +
str(self.dest_sp_res) + '_'
+ str(self.dest_temp_res)
+ '.nc')
else:
self.src_file = src_file
def _check_current_date(self, begin=True, end=True):
"""Helper method that checks the current date of individual variables
in the netCDF data file.
Parameters
----------
begin : bool, optional
If set True, begin will be returned as None.
end : bool, optional
If set True, end will be returned as None.
Returns
-------
dates : dict of dicts
Dictionary with dates of each parameter. None if no date available.
"""
dates = {}
for region in self.dest_regions:
if self.regions is not None:
if region not in self.regions:
continue
nc_name = self.src_file[region]
if os.path.exists(nc_name):
dates[region] = {}
variables = self.get_variables()
with Dataset(nc_name, 'r', format='NETCDF4') as nc:
for var in variables:
dates[region][var] = []
if begin:
for i in range(0, nc.variables['time'].size - 1):
if(nc.variables[var][i].mask.min() or
ma.isnan(np.nanmax(nc.variables[var][i]))):
continue
else:
times = nc.variables['time']
dat = num2date(nc.variables['time'][i],
units=times.units,
calendar=times.calendar)
dates[region][var].append(dat)
break
else:
dates[region][var].append(None)
if end:
for i in range(nc.variables['time'].size - 1,
- 1, -1):
if(nc.variables[var][i].mask.min() or
ma.isnan(np.nanmax(nc.variables[var][i]))):
continue
else:
times = nc.variables['time']
dat = num2date(nc.variables['time'][i],
units=times.units,
calendar=times.calendar)
dates[region][var].append(dat)
break
else:
dates[region][var].append(None)
if dates[region][var] in [[None], []]:
dates[region][var] = [None, None]
else:
dates = None
break
return dates
def _get_download_date(self):
"""Gets the date from which to start the data download.
Returns
-------
begin : datetime
date from which to start the data download.
"""
dates = self._check_current_date(begin=False)
if dates is not None:
begin = datetime.now()
for region in self.dest_regions:
if self.regions is not None:
if region not in self.regions:
continue
variables = self.get_variables()
if variables == []:
begin = self.dest_start_date
else:
for var in variables:
if dates[region][var][1] is not None:
if dates[region][var][1] < begin:
begin = dates[region][var][1]
begin += timedelta(days=1)
else:
if self.dest_start_date < self.begin_date:
begin = self.begin_date
else:
begin = self.dest_start_date
else:
begin = self.begin_date
return begin
def _get_tmp_filepath(self, prefix, region):
"""Creates path to a temporary directory.
Returns
-------
str
Path to the temporary direcotry
"""
filename = ('_' + prefix + '_' + region + '_' + str(self.dest_sp_res)
+ '_' + str(self.dest_temp_res) + '.nc')
return os.path.join(self.tmp_path, filename)
def _resample_spatial(self, region, begin, end, delete_rawdata,
shapefile=None):
"""Helper method that calls spatial resampling routines.
Parameters:
region : str
FIPS country code (https://en.wikipedia.org/wiki/FIPS_country_code)
begin : datetime
Start date of resampling
end : datetime
End date of resampling
delete_rawdata : bool
True if original downloaded files should be deleted after
resampling
"""
dest_file = self._get_tmp_filepath('spatial', region)
dirList = os.listdir(self.rawdata_path)
dirList.sort()
if region == 'global':
grid = gr.RegularGrid(sp_res=self.dest_sp_res)
else:
grid = gr.ShapeGrid(region, self.dest_sp_res, shapefile)
for item in dirList:
src_file = os.path.join(self.rawdata_path, item)
fdate = get_file_date(item, self.filedate)
if begin is not None:
if fdate < begin:
continue
if end is not None:
if fdate > end:
continue
if check_compressed(src_file):
dirname = os.path.splitext(item)[0]
dirpath = os.path.join(self.rawdata_path, dirname)
unpack(src_file)
src_file = select_file(os.listdir(dirpath))
src_file = os.path.join(dirpath, src_file)
if begin is not None:
if fdate < begin:
if check_compressed(item):
shutil.rmtree(os.path.join(self.rawdata_path,
os.path.splitext(item)[0]))
continue
if end is not None:
if fdate > end:
if check_compressed(item):
shutil.rmtree(os.path.join(self.rawdata_path,
os.path.splitext(item)[0]))
continue
print '.',
image, _, _, _, timestamp, metadata = \
resample_to_shape(src_file, region, self.dest_sp_res, grid,
self.name, self.nan_value,
self.dest_nan_value, self.variables,
shapefile)
if timestamp is None:
timestamp = get_file_date(item, self.filedate)
if self.temp_res == self.dest_temp_res:
filename = (region + '_' + str(self.dest_sp_res) + '_'
+ str(self.dest_temp_res) + '.nc')
dfile = os.path.join(self.data_path, filename)
nc.save_image(image, timestamp, region, metadata, dfile,
self.dest_start_date, self.dest_sp_res,
self.dest_nan_value, shapefile,
self.dest_temp_res)
else:
nc.write_tmp_file(image, timestamp, region, metadata,
dest_file, self.dest_start_date,
self.dest_sp_res, self.dest_nan_value,
shapefile)
# deletes unpacked files if existing
if check_compressed(item):
shutil.rmtree(os.path.join(self.rawdata_path,
os.path.splitext(item)[0]))
print ''
def _resample_temporal(self, region, shapefile=None):
"""Helper method that calls temporal resampling routines.
Parameters:
region : str
Identifier of the region in the shapefile. If the default shapefile
is used, this would be the FIPS country code.
shapefile : str, optional
Path to shape file, uses "world country admin boundary shapefile"
by default.
"""
src_file = self._get_tmp_filepath('spatial', region)
if not os.path.exists(src_file):
print '[Info] No data available for this period'
return False
data = {}
variables, _, period = nc.get_properties(src_file)
dtindex = dt.get_dtindex(self.dest_temp_res, period[0], period[1])
for date in dtindex:
# skip if data for period is not complete
# if date > period[1]:
# continue
if self.dest_temp_res == 'dekad':
if date.day < 21:
begin = datetime(date.year, date.month, date.day - 10 + 1)
else:
begin = datetime(date.year, date.month, 21)
end = date
else:
begin = period[0]
end = date
data = {}
metadata = {}
for var in variables:
img, _, _, meta = \
nc.read_variable(src_file, var, begin, end)
metadata[var] = meta
data[var] = average_layers(img, self.dest_nan_value)
dest_file = self.src_file[region]
nc.save_image(data, date, region, metadata, dest_file,
self.dest_start_date, self.dest_sp_res,
self.dest_nan_value, shapefile, self.dest_temp_res)
# delete intermediate netCDF file
print ''
os.unlink(src_file)
def _scale_values(self, data):
if self.valid_range is not None:
if self.data_range is not None:
data = ((data - self.data_range[0]) /
(self.data_range[1] - self.data_range[0]) *
(self.valid_range[1] - self.valid_range[0]) +
self.valid_range[0])
return data
[docs] def download(self, download_path=None, begin=None, end=None):
""""Download data
Parameters
----------
begin : datetime, optional
start date of download, default to None
end : datetime, optional
start date of download, default to None
"""
if begin is None:
if self.dest_start_date < self.begin_date:
begin = self.begin_date
else:
begin = self.dest_start_date
if self.protocol in ['HTTP', 'http']:
check = download_http(self.rawdata_path, self.host,
self.directory, self.filename, self.filedate,
self.dirstruct, begin=begin, end=end,
ffilter=self.ffilter)
elif self.protocol in ['FTP', 'ftp']:
check = download_ftp(self.rawdata_path, self.host, self.directory,
self.filedate, self.port, self.username,
self.password, self.dirstruct, begin=begin,
end=end, ffilter=self.ffilter)
elif self.protocol in ['SFTP', 'sftp']:
check = download_sftp(self.rawdata_path, self.host,
self.directory, self.port, self.username,
self.password, self.filedate, self.dirstruct,
begin=begin, end=end, ffilter=self.ffilter)
elif self.protocol in ['local', 'LOCAL']:
check = download_local(self.rawdata_path, directory=self.host,
filedate=self.filedate,
dirstruct=self.dirstruct, begin=begin,
end=end, ffilter=self.ffilter)
return check
[docs] def resample(self, begin=None, end=None, delete_rawdata=False,
shapefile=None, stepwise=True):
"""Resamples source data to given spatial and temporal resolution.
Writes resampled images into a netCDF data file. Deletes original
files if flag delete_rawdata is set True.
Parameters
----------
begin : datetime
Start date of resampling.
end : datetime
End date of resampling.
delete_rawdata : bool
Original files will be deleted from rawdata_path if set 'True'.
shapefile : str, optional
Path to shape file, uses "world country admin boundary shapefile"
by default.
"""
if len(os.listdir(self.tmp_path)) != 0:
for fname in os.listdir(self.tmp_path):
if '.nc' in fname:
os.remove(os.path.join(self.tmp_path, fname))
# clean rawdata folder from sudirectories
for item in os.listdir(self.rawdata_path):
if os.path.isdir(os.path.join(self.rawdata_path, item)):
os.rmdir(os.path.join(self.rawdata_path, item))
begin, end = self._check_begin_end(begin, end)
if begin > end:
print '[INFO] everything up to date'
return '[INFO] everything up to date'
if stepwise:
drange = dt.get_dtindex(self.dest_temp_res, begin, end)
for i, date in enumerate(drange):
if i == 0:
start = begin
else:
if self.dest_temp_res in ['dekad', 'dekadal', 'week',
'weekly', 'month', 'monthly']:
start = drange[i - 1] + timedelta(days=1)
else:
start = date
stop = date
print '[INFO] Resampling ' + str(start) + ' to ' + str(stop)
for region in self.dest_regions:
if self.regions is not None:
if region not in self.regions:
continue
print '[INFO] resampling to region ' + region
print '[INFO] performing spatial resampling ',
self._resample_spatial(region, start, stop, delete_rawdata,
shapefile)
if self.temp_res == self.dest_temp_res:
print '[INFO] skipping temporal resampling'
else:
print '[INFO] performing temporal resampling ',
self._resample_temporal(region, shapefile)
else:
print '[INFO] ' + str(begin) + '-' + str(end)
for region in self.dest_regions:
if self.regions is not None:
if region not in self.regions:
continue
print '[INFO] resampling to region ' + region
print '[INFO] performing spatial resampling ',
self._resample_spatial(region, begin, end, delete_rawdata,
shapefile)
if self.temp_res == self.dest_temp_res:
print '[INFO] skipping temporal resampling'
else:
print '[INFO] performing temporal resampling ',
self._resample_temporal(region, shapefile)
if delete_rawdata:
print '[INFO] Cleaning up rawdata'
dirList = os.listdir(self.rawdata_path)
dirList.sort()
for item in dirList:
src_file = os.path.join(self.rawdata_path, item)
os.unlink(src_file)
[docs] def download_and_resample(self, download_path=None, begin=None, end=None,
delete_rawdata=False, shapefile=None):
"""Downloads and resamples data.
Parameters
----------
download_path : str
Path where to save the downloaded files.
begin : datetime.date, optional
set either to first date of remote repository or date of
last file in local repository
end : datetime.date, optional
set to today if none given
delete_rawdata : bool, optional
Original files will be deleted from rawdata_path if set True
shapefile : str, optional
Path to shape file, uses "world country admin boundary shapefile"
by default.
"""
begin, end = self._check_begin_end(begin, end)
if begin > end:
print '[INFO] everything up to date'
return '[INFO] everything up to date'
drange = dt.get_dtindex(self.dest_temp_res, begin, end)
for i, date in enumerate(drange):
if i == 0:
start = begin
else:
if self.dest_temp_res in ['dekad', 'dekadal', 'week',
'weekly', 'month', 'monthly']:
start = drange[i - 1] + timedelta(days=1)
else:
start = date
stop = date
filecheck = self.download(download_path, start, stop)
if filecheck is True:
self.resample(start, stop, delete_rawdata, shapefile, False)
else:
print '[WARNING] no data available for this date'
[docs] def read_ts(self, location, region=None, variable=None, shapefile=None,
scaled=True):
"""Gets timeseries from netCDF file for a gridpoint.
Parameters
----------
location : int or tuple of floats
Either Grid point index as integer value or Longitude/Latitude
given as tuple.
region : str, optional
Region of interest, set to first defined region if not set.
variable : str, optional
Variable to display, selects all available variables if None.
shapefile : str, optional
Path to custom shapefile.
scaled : bool, optional
If true, data will be scaled to a predefined range; if false, data
will be shown as given in rawdata file; defaults to True
Returns
-------
df : pd.DataFrame
Timeseries for selected variables.
"""
if region is None:
region = self.dest_regions[0]
if type(location) is tuple:
if region == 'global':
grid = RegularGrid(self.dest_sp_res)
else:
grid = ShapeGrid(region, self.dest_sp_res, shapefile)
gp, _ = grid.find_nearest_gpi(location[0], location[1])
else:
gp = location
if variable is None:
variable = self.get_variables()
else:
variable = self.check_variable(variable)
variable = [variable]
source_file = self.src_file[region]
var_dates = self._check_current_date()
with Dataset(source_file, 'r', format='NETCDF4') as nc:
time = nc.variables['time']
dates = num2date(time[:], units=time.units, calendar=time.calendar)
position = np.where(nc.variables['gpi'][:] == gp)
lat_pos = position[0][0]
lon_pos = position[1][0]
df = pd.DataFrame(index=pd.DatetimeIndex(dates))
for ncvar in variable:
begin = np.where(dates == var_dates[region][ncvar][0])[0][0]
end = np.where(dates == var_dates[region][ncvar][1])[0][0]
df[ncvar] = np.NAN
for i in range(begin, end + 1):
df[ncvar][i] = nc.variables[ncvar][i, lat_pos, lon_pos]
if 'scaling_factor' in nc.variables[ncvar].ncattrs():
vvar = nc.variables[ncvar]
if vvar.getncattr('scaling_factor') < 0:
df[ncvar] = (df[ncvar] *
float(vvar.getncattr('scaling_factor')))
else:
df[ncvar] = (df[ncvar] /
float(vvar.getncattr('scaling_factor')))
if scaled:
if self.valid_range is not None:
if self.data_range is not None:
df[ncvar] = self._scale_values(df[ncvar])
return df
[docs] def read_img(self, date, region=None, variable=None, scaled=True):
"""Gets images from netCDF file for certain date
Parameters
----------
date : datetime
Date of the image.
region : str, optional
Region of interest, set to first defined region if not set.
variable : str, optional
Variable to display, selects first available variables if None.
scaled : bool, optional
If true, data will be scaled to a predefined range; if false, data
will be shown as given in rawdata file; defaults to True.
Returns
-------
img : numpy.ndarray
Image of selected date.
lon : numpy.array
Array with longitudes.
lat : numpy.array
Array with latitudes.
metadata : dict
Dictionary containing metadata of the variable.
"""
if region is None:
region = self.dest_regions[0]
if variable is None:
variable = self.get_variables()[0]
else:
# Renames variable name to SOURCE_variable
variable = self.check_variable(variable)
source_file = self.src_file[region]
# get dekad of date:
date = dt.check_period(self.dest_temp_res, date)
with Dataset(source_file, 'r', format='NETCDF4') as nc:
time = nc.variables['time']
datenum = date2num(date, units=time.units, calendar=time.calendar)
position = np.where(time[:] == datenum)[0][0]
var = nc.variables[variable]
img = var[position]
lon = nc.variables['lon'][:]
lat = nc.variables['lat'][:]
metadata = {}
for attr in var.ncattrs():
if attr[0] != '_' and attr != 'scale_factor':
metadata[attr] = var.getncattr(attr)
if not metadata:
metadata = None
if 'scaling_factor' in var.ncattrs():
if metadata['scaling_factor'] < 0:
img = img * float(metadata['scaling_factor'])
else:
img = img / float(metadata['scaling_factor'])
if scaled:
if self.valid_range is not None:
if self.data_range is not None:
img = self._scale_values(img)
return img, lon, lat, metadata
[docs] def get_variables(self):
"""
Gets all variables given in the NetCDF file.
Returns
-------
variables : list of str
Variables from given in the NetCDF file.
"""
# nc_name = self.src_file[self.dest_regions[0]]
# nc_vars, _, _ = nc.get_properties(nc_name)
nc_vars = []
for reg in self.dest_regions:
vari, _, _ = nc.get_properties(self.src_file[reg])
if vari is None:
continue
for v in vari:
if v not in nc_vars:
nc_vars.append(v)
variables = []
if self.variables is not None:
for var in self.variables:
if var in nc_vars:
variables.append(var)
else:
if self.name + '_' + var in nc_vars:
variables.append(self.name + '_' + var)
else:
for var in nc_vars:
if self.name + '_dataset' in var:
variables.append(var)
elif self.name in var:
variables.append(var)
return variables
[docs] def check_variable(self, variable):
"""
Checks if a variable exists in a source and returns it's correct name.
Parameters
----------
variable : str
Variable to check.
Returns
-------
varname : str
Name of the variable in the source.
"""
varname = ''
if self.variables is not None:
for var in self.variables:
if variable in var:
varname = variable
break
elif self.name + '_' + variable in var:
varname = self.name + '_' + variable
break
else:
if variable == self.name + '_' + var:
varname = self.name + '_' + var
break
else:
for var in self.get_variables():
if variable == var:
varname = variable
break
else:
if self.name + '_' + variable in var:
varname = self.name + '_' + variable
break
return varname
def _check_begin_end(self, begin, end):
"""
Checks begin and end date and returns valid dates.
Parameters
----------
begin : datetime
Begin date to check.
end : datetime
End date to check.
Returns
-------
begin : datetime
Begin date.
end : datetime
End date.
"""
if begin is None:
if self.dest_start_date < self.begin_date:
begin = self.begin_date
else:
begin = self.dest_start_date
if begin < self._get_download_date():
begin = self._get_download_date()
# start one period earlier to close possible gaps
begin = begin - timedelta(days=1)
begin, _ = dt.check_period_boundaries(self.dest_temp_res, begin)
if begin < self.begin_date:
begin = self.begin_date
if begin < self.dest_start_date:
begin = self.dest_start_date
if end is None:
end = datetime.now()
return begin, end
[docs] def fill_gaps(self, begin=None, end=None):
"""
Detects gaps in data and tries to fill them by downloading and
resampling the data within these periods.
Parameters
----------
begin : datetime
Begin date of intervall to check, defaults to None.
end : datetime
End date of intervall to check, defaults to None.
"""
gaps = []
for region in self.dest_regions:
if self.regions is not None:
if region not in self.regions:
continue
_, _, period = nc.get_properties(self.src_file[region])
if begin is None:
if self.begin_date < self.dest_start_date:
begin = self.dest_start_date
else:
begin = self.begin_date
if end is None:
end = period[1]
drange = dt.get_dtindex(self.dest_temp_res, begin, end)
for date in drange:
nonans = []
for var in self.get_variables():
img, _, _, _ = self.read_img(date, region, var)
if np.nanmean(img) is not np.ma.masked:
nonans.append(1)
if len(nonans) == 0:
if date not in gaps:
gaps.append(date)
if len(gaps) == 0:
print '[INFO] No gaps found.'
else:
print '[INFO] Found ' + str(len(gaps)) + ' gap(s), attempt to fill..'
for date in gaps:
if self.dest_temp_res in ['day', 'daily']:
begin = date
end = date
else:
begin, end = dt.check_period_boundaries(self.dest_temp_res,
date)
self.download_and_resample(begin=begin, end=end)