# Copyright (c) 2014, Vienna University of Technology (TU Wien), Department
# of Geodesy and Geoinformation (GEO).
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the Vienna University of Technology - Department of
# Geodesy and Geoinformation nor the names of its contributors may be used to
# endorse or promote products derived from this software without specific
# prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL VIENNA UNIVERSITY OF TECHNOLOGY,
# DEPARTMENT OF GEODESY AND GEOINFORMATION BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# Author: Thomas Mistelbauer
# Creation date: 2014-07-29
"""
This module includes the poets base class `Poet`.
"""
from datetime import datetime
from netCDF4 import Dataset
from poets.grid.grids import ShapeGrid
from poets.io.source_base import BasicSource
import numpy as np
import os
import pandas as pd
import poets.web.app as app
valid_temp_res = ['dekad', 'dekade', 'month', 'monthly', 'week', 'weekly',
'day', 'daily']
[docs]class Poet(object):
"""POETS base class.
Provides methods to download and resample data using parameters as defined
in this class. Resampled outputfiles will be saved as NetCDF4 files.
Parameters
----------
rootpath : str
path to the directory where data should be stored
regions : list of str, str, optional
Identifier of the region in the shapefile. If the default shapefile is
used, this would be the FIPS country code. Defaults to global.
region_names : list of str, optional
Full name of the regions. If set, must have same size as regions
parameter. Defaults to regions parameter.
spatial_resolution : float, optional
spatial resolution in degree, defaults to 0.25
temporal_resolution : str, optional
temporal resolution of the data, possible values: day, week, month,
dekad; defaults to dekad.
start_date : datetime.datetime, optional
first date of the dataset, defaults to 2000-01-01
nan_value : int
NaN value to use, defaults to -99
shapefile : str, optional
Path to shape file, uses "world country admin boundary shapefile" by
default. Custom shapefile must use WGS84 as reference system.
delete_rawdata : bool, optional
Original files will be deleted from rawdata_path if set True. Defaults
to False.
Attributes
----------
rootpath : str
path to the directory where data should be stored
regions : list of str
Identifier of the region in the shapefile.
region_names : list of str
Full name of the regions.
spatial_resolution : float
Spatial resolution in degree.
temporal_resolution : str
Temporal resolution of the data.
data_path : str
Path where resampled NetCDF file is stored.
rawdata_path : str
Path where original files are stored and downloaded.
tmp_path : str
Path where temporary files are stored.
nan_value : int
NaN value to use, defaults to -99.
start_date : datetime.datetime
First date of the dataset.
shapefile : str
Path to shape file.
sources : dict of poets.io.BasicSource objects
Sources used by poets given as BasicSource class.
delete_rawdata : bool
Original files will be deleted from rawdata_path if True.
"""
def __init__(self, rootpath, regions=['global'],
spatial_resolution=0.25, temporal_resolution='dekad',
start_date=datetime(2000, 1, 1), nan_value=-99,
shapefile=None, delete_rawdata=False, region_names=None):
self.rootpath = rootpath
if isinstance(regions, str):
self.regions = [regions]
else:
self.regions = regions
if region_names is not None:
if isinstance(region_names, str):
self.region_names = [region_names]
else:
self.region_names = region_names
else:
self.region_names = self.regions
self.spatial_resolution = spatial_resolution
if temporal_resolution not in valid_temp_res:
raise ValueError("Temporal resulution must be one of " +
str(valid_temp_res))
self.temporal_resolution = temporal_resolution
self.rawdata_path = os.path.join(rootpath, 'RAWDATA')
self.data_path = os.path.join(rootpath, 'DATA')
self.tmp_path = os.path.join(rootpath, 'TMP')
self.nan_value = nan_value
self.start_date = start_date
self.shapefile = shapefile
self.delete_rawdata = delete_rawdata
self.sources = {}
if not os.path.exists(self.rawdata_path):
os.mkdir(self.rawdata_path)
if not os.path.exists(self.tmp_path):
os.mkdir(self.tmp_path)
if not os.path.exists(self.data_path):
os.mkdir(self.data_path)
[docs] def add_source(self, name, filename, filedate, temp_res, host, protocol,
username=None, password=None, port=22, directory=None,
dirstruct=None, begin_date=None, regions=None,
variables=None, nan_value=None, valid_range=None,
unit=None, ffilter=None, data_range=None, colorbar=None,
src_file=None):
"""Creates BasicSource class and adds it to `Poet.sources`.
Parameters
----------
name : str
Name of the data source.
filename : str
Structure/convention of the file name.
filedate : dict
Position of date fields in filename, given as tuple.
temp_res : str
Temporal resolution of the source.
host : str
Link to data host.
protocol : str
Protocol for data transfer.
username : str, optional
Username for data access.
password : str, optional
Password for data access.
port : int, optional
Port to data host, defaults to 22.
directory : str, optional
Path to data on host.
dirstruct : list of strings
Structure of source directory, each list item represents a
subdirectory.
regions : list of str, optional
List of regions where data from source is available. Uses all
regions as given in Poet.regions attribute if None.
begin_date : datetime.date, optional
Date from which on data is available.
variables : string or list of strings, optional
Variables used from data source.
nan_value : int, float, optional
Nan value of the original data as given by the data provider.
valid_range : tuple of int of float, optional
Valid range of data, given as (minimum, maximum).
data_range : tuple of int of float, optional
Range of the values as data given in rawdata (minimum, maximum).
Will be scaled to valid_range.
ffilter : str, optional
Pattern that apperas in filename. Can be used to select out not
needed files if multiple files per date are provided.
colorbar : str, optional
Colorbar to use, use one from
http://matplotlib.org/examples/color/colormaps_reference.html;
defaults to jet.
unit : str, optional
Unit of dataset for displaying in legend. Does not have to be set
if unit is specified in input file metadata. Defaults to None.
src_file : dict of str, optional
Path to file that contains source. Uses default NetCDF file if
None.
"""
source = BasicSource(name, filename, filedate, temp_res, self.rootpath,
host, protocol, username=username,
password=password, port=port, ffilter=ffilter,
directory=directory, dirstruct=dirstruct,
begin_date=begin_date, variables=variables,
nan_value=nan_value, valid_range=valid_range,
unit=unit, regions=regions,
data_range=data_range, colorbar=colorbar,
dest_nan_value=self.nan_value,
dest_regions=self.regions,
dest_sp_res=self.spatial_resolution,
dest_temp_res=self.temporal_resolution,
dest_start_date=self.start_date,
src_file=src_file)
self.sources[name] = source
[docs] def fetch_data(self, begin=None, end=None, delete_rawdata=None):
"""Starts download and resampling of input data for sources as added
to `Poets.sources`.
Parameters
----------
begin : datetime, optional
Start date of data to download, defaults to start date as defined
in poets class.
end : datetime, optional
End date of data to download, defaults to current datetime.
delete_rawdata : bool, optional
Original files will be deleted from rawdata_path if set True.
Defaults to value of delete_rawdata attribute as set in Poet class.
"""
if delete_rawdata is None:
delete_rawdata = self.delete_rawdata
for source in self.sources.keys():
src = self.sources[source]
print '[INFO] Download data for source ' + source
src.download_and_resample(begin=begin, end=end,
shapefile=self.shapefile,
delete_rawdata=delete_rawdata)
print '[SUCCESS] Download and resampling complete!'
[docs] def download(self, begin=None, end=None):
"""Starts download of input data for sources as added
to `Poets.sources`.
Parameters
----------
begin : datetime, optional
Start date of data to download, defaults to start date as defined
in poets class.
end : datetime, optional
End date of data to download, defaults to current datetime.
"""
for source in self.sources.keys():
src = self.sources[source]
print '[INFO] Download data for source ' + source
src.download(begin=begin, end=end)
print '[SUCCESS] Download complete!'
[docs] def resample(self, begin=None, end=None, delete_rawdata=None):
"""Starts download of input data for sources as added
to `Poets.sources`.
Parameters
----------
begin : datetime, optional
Start date of data to download, defaults to start date as defined
in poets class.
end : datetime, optional
End date of data to download, defaults to current datetime.
"""
for source in self.sources.keys():
src = self.sources[source]
print '[INFO] Resampling data for source ' + source
src.resample(begin=begin, end=end,
shapefile=self.shapefile,
delete_rawdata=delete_rawdata)
print '[SUCCESS] Resampling complete!'
[docs] def fill_gaps(self):
"""
Detects gaps in data and tries to fill them by downloading and
resampling the data within these periods.
"""
for source in self.sources.keys():
src = self.sources[source]
print '[INFO] Scanning ' + source + ' for gaps'
src.fill_gaps()
[docs] def get_gridpoints(self):
"""Returns gridpoints from NetCDF file.
Parameters
----------
region : str
Identifier of the region in the NetCDF file.
Returns
-------
gridpoints : dict of pandas.DataFrame
Dict containing Dataframes with gridpoint index as index,
longitutes and latitudes as columns for each region.
"""
gridpoints = {}
if self.regions == ['global']:
filename = (self.regions[0] + '_' + str(self.spatial_resolution)
+ '_' + str(self.temporal_resolution) + '.nc')
ncfile = os.path.join(self.data_path, filename)
with Dataset(ncfile, 'r+', format='NETCDF4') as nc:
gpis = nc.variables['gpi'][:]
lons = nc.variables['lon'][:]
lats = nc.variables['lat'][:]
gpis = gpis.flatten()
lons, lats = np.meshgrid(lons, lats)
lons = lons.flatten()
lats = lats.flatten()
points = pd.DataFrame(index=gpis)
points['lon'] = lons
points['lat'] = lats
gridpoints['global'] = points
else:
for region in self.regions:
grid = ShapeGrid(region, self.spatial_resolution)
points = grid.get_gridpoints()
gridpoints[region] = points
return gridpoints
[docs] def read_image(self, source, date, region=None, variable=None):
"""Gets images from netCDF file for certain date
Parameters
----------
date : datetime
Date of the image.
source : str
Data source from which image should be read.
region : str, optional
Region of interest, set to first defined region if None.
variable : str, optional
Variable to display, set to first variable of source if None.
Returns
-------
img : numpy.ndarray
Image of selected date.
lon : numpy.array
Array with longitudes.
lat : numpy.array
Array with latitudes.
metadata : dict
Dictionary containing metadata of the variable.
"""
img, lon, lat, metadata = self.sources[source].read_img(date, region,
variable)
return img, lon, lat, metadata
[docs] def read_timeseries(self, source, location, region=None, variable=None):
"""
Gets timeseries from netCDF file for a gridpoint.
Parameters
----------
source : str
Data source from which time series should be read.
location : int or tuple of floats
Either Grid point index as integer value or Longitude/Latitude
given as tuple.
region : str, optional
Region of interest, set to first defined region if None.
variable : str, optional
Variable to display, set to first variable of source if None.
Returns
-------
ts : pd.DataFrame
Timeseries for the selected data.
"""
ts = self.sources[source].read_ts(location, region, variable,
shapefile=self.shapefile)
return ts
[docs] def get_variables(self, region=None):
"""
Returns all variables available.
Parameters
----------
region : str, optional
Region to check for variables.
Returns
-------
variables : list of str
Sorted list of all variables.
"""
variables = []
for src in self.sources.keys():
if region is None:
for var in self.sources[src].get_variables():
variables.append(var)
else:
if self.sources[src].regions is not None:
if region in self.sources[src].regions:
for var in self.sources[src].get_variables():
variables.append(var)
else:
for var in self.sources[src].get_variables():
variables.append(var)
variables.sort()
return variables
[docs] def start_app(self, host='127.0.0.1', port=5000, r_host=None, r_port=None,
debug=False):
"""Starts web interface.
Parameters
----------
host : str, optional
Host that is used by the app, defaults to 127.0.0.1.
port : int, optional
Port where app runs on, defaults to 50000.
r_host : str, optional
IP of router that is between host and internet.
r_port : int, optional
Port of router that is between host and internet.
debug : bool, optional
Starts app in debug mode if set True, defaults to False.
"""
app.start(self, host, port, r_host, r_port, debug)