# Copyright (c) 2014, Vienna University of Technology (TU Wien), Department
# of Geodesy and Geoinformation (GEO).
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the Vienna University of Technology - Department of
# Geodesy and Geoinformation nor the names of its contributors may be used to
# endorse or promote products derived from this software without specific
# prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL VIENNA UNIVERSITY OF TECHNOLOGY,
# DEPARTMENT OF GEODESY AND GEOINFORMATION BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# Author: Thomas Mistelbauer
# Creation date: 2014-07-29
"""
This module includes the poets base class `Poet`.
"""
import os
import numpy as np
import pandas as pd
from datetime import datetime
from netCDF4 import Dataset
from poets.io.source_base import BasicSource
from poets.grid.grids import ShapeGrid
valid_temp_res = ['dekad', 'month']
[docs]class Poet(object):
"""POETS base class.
Provides methods to download and resample data using parameters as defined
in this class. Resampled outputfiles will be saved as NetCDF4 files.
Parameters
----------
rootpath : str
path to the directory where data should be stored
regions : list of str, optional
Identifier of the region in the shapefile. If the default shapefile is
used, this would be the FIPS country code. Defaults to global.
spatial_resolution : float, optional
spatial resolution in degree, defaults to 0.25
temporal_resolution : str, optional
temporal resolution of the data, possible values: month, dekad,
defaults to dekad
start_date : datetime.datetime, optional
first date of the dataset, defaults to 2000-01-01
nan_value : int
NaN value to use, defaults to -99
shapefile : str, optional
Path to shape file, uses "world country admin boundary shapefile" by
default.
delete_rawdata : bool, optional
Original files will be deleted from tmp_path if set True. Defaults
to False
Attributes
----------
rootpath : str
path to the directory where data should be stored
regions : list of str
Identifier of the region in the shapefile.
spatial_resolution : float
Spatial resolution in degree.
temporal_resolution : str
Temporal resolution of the data.
tmp_path : str
Path where temporary files and original files are stored and
downloaded.
data_path : str
Path where resampled NetCDF file is stored.
nan_value : int
NaN value to use, defaults to -99.
start_date : datetime.datetime
First date of the dataset.
shapefile : str
Path to shape file.
sources : dict of poets.io.BasicSource objects
Sources used by poets given as BasicSource class.
delete_rawdata : bool
Original files will be deleted from tmp_path if True.
"""
def __init__(self, rootpath, regions=['global'],
spatial_resolution=0.25, temporal_resolution='dekad',
start_date=datetime(2000, 1, 1), nan_value=-99,
shapefile=None, delete_rawdata=False):
self.rootpath = rootpath
self.regions = regions
self.spatial_resolution = spatial_resolution
if temporal_resolution not in ['dekad', 'month']:
raise ValueError("Temporal resulution must be one of " +
str(valid_temp_res))
self.temporal_resolution = temporal_resolution
self.tmp_path = os.path.join(rootpath, 'TMP')
self.data_path = os.path.join(rootpath, 'DATA')
self.nan_value = nan_value
self.start_date = start_date
self.shapefile = shapefile
self.delete_rawdata = delete_rawdata
self.sources = {}
if not os.path.exists(self.tmp_path):
os.mkdir(self.tmp_path)
if not os.path.exists(self.data_path):
os.mkdir(self.data_path)
[docs] def add_source(self, name, filename, filedate, temp_res, host, protocol,
username=None, password=None, port=22, directory=None,
dirstruct=None, begin_date=datetime(2000, 1, 1),
variables=['dataset'], nan_value=None):
"""Creates BasicSource class and adds it to `Poet.sources`.
Parameters
----------
name : str
Name of the data source.
filename : str
Structure/convention of the file name.
filedate : dict
Position of date fields in filename, given as tuple.
temp_res : str
Temporal resolution of the source.
host : str
Link to data host.
protocol : str
Protocol for data transfer.
username : str, optional
Username for data access.
password : str, optional
Password for data access.
port : int, optional
Port to data host, defaults to 22.
directory : str, optional
Path to data on host.
dirstruct : list of strings
Structure of source directory, each list item represents a
subdirectory.
begin_date : datetime.date, optional
Date from which on data is available, defaults to 2000-01-01.
variables : list of strings, optional
Variables used from data source, defaults to ['dataset'].
nan_value : int, float, optional
Nan value of the original data as given by the data provider.
"""
source = BasicSource(name, filename, filedate, temp_res, self.rootpath,
host, protocol, username, password, port,
directory, dirstruct, begin_date, variables,
nan_value, self.nan_value, self.regions,
self.spatial_resolution, self.temporal_resolution,
self.start_date)
self.sources[name] = source
[docs] def fetch_data(self, begin=None, end=None, delete_rawdata=None):
"""Starts download and resampling of input data for sources as added
to `Poets.sources`.
Parameters
----------
begin : datetime, optional
Start date of data to download, defaults to start date as defined
in poets class.
end : datetime, optional
End date of data to download, defaults to current datetime.
delete_rawdata : bool, optional
Original files will be deleted from tmp_path if set True. Defaults
to delete_rawdata attribute as set in Poet class.
"""
if not delete_rawdata:
delete_rawdata = self.delete_rawdata
for source in self.sources.keys():
src = self.sources[source]
print '[INFO] Download data for source ' + source
src.download_and_resample(begin=begin, end=end,
shapefile=self.shapefile)
print '[SUCCESS] Download and resampling complete!'
[docs] def get_gridpoints(self):
"""Returns gridpoints from NetCDF file.
Parameters
----------
region : str
Identifier of the region in the NetCDF file.
Returns
-------
gridpoints : dict of pandas.DataFrame
Dict containing Dataframes with gridpoint index as index,
longitutes and latitudes as columns for each region.
"""
gridpoints = {}
if self.regions == ['global']:
filename = (self.regions[0] + '_' + str(self.spatial_resolution)
+ '_' + str(self.temporal_resolution) + '.nc')
ncfile = os.path.join(self.data_path, filename)
with Dataset(ncfile, 'r+', format='NETCDF4') as nc:
gpis = nc.variables['gpi'][:]
lons = nc.variables['lon'][:]
lats = nc.variables['lat'][:]
gpis = gpis.flatten()
lons, lats = np.meshgrid(lons, lats)
lons = lons.flatten()
lats = lats.flatten()
points = pd.DataFrame(index=gpis)
points['lon'] = lons
points['lat'] = lats
gridpoints['global'] = points
else:
for region in self.regions:
grid = ShapeGrid(region, self.spatial_resolution)
points = grid.get_gridpoints()
gridpoints[region] = points
return gridpoints
if __name__ == "__main__":
pass