#!/usr/bin/python
r"""Function to cross-correlate templates generated by template_gen function \
with data and output the detecitons. The central component of this is \
the match_template function from the openCV image processing package. This \
is a highly optimized and accurate normalized cross-correlation routine. \
The details of this code can be found here: \
http://www.cs.ubc.ca/research/deaton/remarks_ncc.html \
The matched-filter routine described here was used a previous Matlab code for \
the Chamberlain et al. 2014 G-cubed publication.
Code written by Calum John Chamberlain of Victoria University of \
Wellington, 2015.
Copyright 2015, 2016 Calum Chamberlain
This file is part of EQcorrscan.
EQcorrscan is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
EQcorrscan is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with EQcorrscan. If not, see <http://www.gnu.org/licenses/>.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import numpy as np
import warnings
[docs]class DETECTION(object):
r"""Information required for a full detection based on cross-channel \
correlation sums.
Attributes:
:type template_name: str
:param template_name: The name of the template for which this \
detection was made.
:type detect_time: :class: 'obspy.UTCDateTime'
:param detect_time: Time of detection as an obspy UTCDateTime object
:type no_chans: int
:param no_chans: The number of channels for which the cross-channel \
correlation sum was calculated over.
:type chans: list of str
:param chans: List of stations for the detection
:type cccsum_val: float
:param cccsum_val: The raw value of the cross-channel correlation sum \
for this detection.
:type threshold: float
:param threshold: The value of the threshold used for this detection, \
will be the raw threshold value related to the cccsum.
:type typeofdet: str
:param typeofdet: Type of detection, STA, corr, bright
"""
detectioncount = 0
def __init__(self, template_name, detect_time,
no_chans, detect_val,
threshold, typeofdet,
chans=None):
"""Main class of PICK."""
self.template_name = template_name
self.detect_time = detect_time
self.no_chans = no_chans
self.chans = chans
self.detect_val = detect_val
self.threshold = threshold
self.typeofdet = typeofdet
self.detectioncount += 1
def __repr__(self):
"""Simple print."""
return "DETECTION()"
def __str__(self):
"""Full print."""
print_str = "Detection on " + self.template_name + " at " + \
str(self.detect_time)
return print_str
[docs]def normxcorr2(template, image):
r"""Base function to call the c++ correlation routine from the openCV \
image processing suite. Requires you to have installed the openCV python \
bindings, which can be downloaded on Linux machines using: \
**sudo apt-get install python-openCV**.
Here we use the cv2.TM_CCOEFF_NORMED method within openCV to give the \
normalized cross-correaltion. Documentation on this function can be \
found here:
**http://docs.opencv.org/modules/imgproc/doc/object_detection.html?highlight=matchtemplate#cv2.matchTemplate**
:type template: :class: 'numpy.array'
:param template: Template array
:type image: :class: 'numpy.array'
:param image: image to scan the template through. The order of these \
matters, if you put the template after the image you will get a \
reversed correaltion matrix
:return: New :class: 'numpy.array' object of the correlation values for \
the correlation of the image with the template.
"""
import cv2
# Check that we have been passed numpy arrays
if type(template) != np.ndarray or type(image) != np.ndarray:
print('You have not provided numpy arrays, I will not convert them')
return 'NaN'
# Convert numpy arrays to float 32
cv_template = template.astype(np.float32)
cv_image = image.astype(np.float32)
ccc = cv2.matchTemplate(cv_image, cv_template, cv2.TM_CCOEFF_NORMED)
if np.all(np.isnan(cv_image)) and np.all(np.isnan(cv_template)):
ccc = np.zeros(len(ccc))
if np.all(ccc == 1.0) and (np.all(np.isnan(cv_template)) or
np.all(np.isnan(cv_image))):
ccc = np.zeros(len(ccc))
# Convert an array of perfect correlations to zero cross-correlations
# Reshape ccc to be a 1D vector as is useful for seismic data
ccc = ccc.reshape((1, len(ccc)))
return ccc
[docs]def _template_loop(template, chan, station, channel, debug=0, i=0):
r"""Sister loop to handle the correlation of a single template (of \
multiple channels) with a single channel of data.
:type template: obspy.Stream
:type chan: np.array
:type station: string
:type channel: string
:type i: int
:param i: Optional argument, used to keep track of which process is being \
run.
:returns: tuple of (i, ccc) with ccc as an ndarray
.. note:: This function currently assumes only one template-channel per \
data-channel, while this is normal for a standard matched-filter \
routine, if we wanted to impliment a subspace detector, this would be \
the function to change, I think. E.g. where I currently take only \
the first matching channel, we could loop through all the matching \
channels and then sum the correlation sums - however I haven't yet
implimented detection based on that. More reading of the Harris \
document required.
"""
from eqcorrscan.utils.timer import Timer
ccc = np.array([np.nan] * (len(chan) - len(template[0].data) + 1),
dtype=np.float16)
ccc = ccc.reshape((1, len(ccc))) # Set default value for
# cross-channel correlation in case there are no data that match our
# channels.
with Timer() as t:
# While each bit of this loop isn't slow, looping through the if
# statement when I don't need to adds up, I should work this out
# earlier
template_data = template.select(station=station,
channel=channel)
# I will for now assume that you only have one template per-channel
template_data = template_data[0]
delay = template_data.stats.starttime - \
template.sort(['starttime'])[0].stats.starttime
pad = np.array([0] * int(round(delay *
template_data.stats.sampling_rate)))
image = np.append(chan, pad)[len(pad):]
ccc = (normxcorr2(template_data.data, image))
ccc = ccc.astype(np.float16)
# Convert to float16 to save memory for large problems - lose some
# accuracy which will affect detections very close to threshold
if debug >= 2 and t.secs > 4:
print("Single if statement took %s s" % t.secs)
if not template_data:
print("Didn't even correlate!")
print(station + ' ' + channel)
elif debug >= 2:
print("If statement without correlation took %s s" % t.secs)
if debug >= 3:
print('********* DEBUG: ' + station + '.' +
channel + ' ccc MAX: ' + str(np.max(ccc[0])))
print('********* DEBUG: ' + station + '.' +
channel + ' ccc MEAN: ' + str(np.mean(ccc[0])))
if np.isinf(np.mean(ccc[0])):
warnings.warn('Mean of ccc is infinite, check!')
if debug >= 3:
np.save('inf_cccmean_ccc.npy', ccc[0])
np.save('inf_cccmean_template.npy', template_data.data)
np.save('inf_cccmean_image.npy', image)
if debug >= 3:
print('shape of ccc: ' + str(np.shape(ccc)))
print('A single ccc is using: ' + str(ccc.nbytes / 1000000) + 'MB')
print('ccc type is: ' + str(type(ccc)))
if debug >= 3:
print('shape of ccc: ' + str(np.shape(ccc)))
print("Parallel worker " + str(i) + " complete")
return (i, ccc)
[docs]def _channel_loop(templates, stream, cores=1, debug=0):
r"""
Loop to generate cross channel correaltion sums for a series of templates \
hands off the actual correlations to a sister function which can be run \
in parallel.
:type templates: :class: 'obspy.Stream'
:param templates: A list of templates, where each one should be an \
obspy.Stream object containing multiple traces of seismic data and \
the relevant header information.
:param stream: A single obspy.Stream object containing daylong seismic \
data to be correlated through using the templates. This is in effect \
the image.
:type core: int
:param core: Number of cores to loop over
:type debug: int
:param debug: Debug level.
:return: New list of :class: 'numpy.array' objects. These will contain \
the correlation sums for each template for this day of data.
:return: list of ints as number of channels used for each cross-correlation
"""
import time
from multiprocessing import Pool
from eqcorrscan.utils.timer import Timer
num_cores = cores
if len(templates) < num_cores:
num_cores = len(templates)
if 'cccs_matrix' in locals():
del cccs_matrix
# Initialize cccs_matrix, which will be two arrays of len(templates) arrays
# where the arrays cccs_matrix[0[:]] will be the cross channel sum for each
# template.
# Note: This requires all templates to be the same length, and all channels
# to be the same length
cccs_matrix = np.array([np.array([np.array([0.0] * (len(stream[0].data) -
len(templates[0][0].data) + 1))] *
len(templates))] * 2)
# Initialize number of channels array
no_chans = np.array([0] * len(templates))
for tr in stream:
tr_data = tr.data
station = tr.stats.station
channel = tr.stats.channel
if debug >= 1:
print("Starting parallel run for station " + station +
" channel " + channel)
tic = time.clock()
with Timer() as t:
# Send off to sister function
pool = Pool(processes=num_cores)
results = [pool.apply_async(_template_loop,
args=(templates[i], tr_data, station,
channel, debug, i))
for i in range(len(templates))]
pool.close()
if debug >= 1:
print("--------- TIMER: Correlation loop took: %s s" % t.secs)
print(" I have " + str(len(results)) + " results")
with Timer() as t:
cccs_list = [p.get() for p in results]
pool.join()
if debug >= 1:
print("--------- TIMER: Getting results took: %s s" % t.secs)
with Timer() as t:
# Sort by placeholder returned from _template_loop
cccs_list.sort(key=lambda tup: tup[0])
if debug >= 1:
print("--------- TIMER: Sorting took: %s s" % t.secs)
with Timer() as t:
cccs_list = [ccc[1] for ccc in cccs_list]
if debug >= 1:
print("--------- TIMER: Extracting arrays took: %s s" % t.secs)
if debug >= 3:
print('cccs_list is shaped: ' + str(np.shape(cccs_list)))
with Timer() as t:
cccs = np.concatenate(cccs_list, axis=0)
if debug >= 1:
print("--------- TIMER: cccs_list conversion: %s s" % t.secs)
del cccs_list
if debug >= 2:
print('After looping through templates the cccs is shaped: ' +
str(np.shape(cccs)))
print('cccs is using: ' + str(cccs.nbytes / 1000000) +
' MB of memory')
cccs_matrix[1] = np.reshape(cccs, (1, len(templates),
max(np.shape(cccs))))
del cccs
if debug >= 2:
print('cccs_matrix shaped: ' + str(np.shape(cccs_matrix)))
print('cccs_matrix is using ' + str(cccs_matrix.nbytes / 1000000) +
' MB of memory')
# Now we have an array of arrays with the first dimensional index
# giving the channel, the second dimensional index giving the
# template and the third dimensional index giving the position
# in the ccc, e.g.:
# np.shape(cccsums)=(len(stream), len(templates), len(ccc))
if debug >= 2:
print('cccs_matrix as a np.array is shaped: ' +
str(np.shape(cccs_matrix)))
# First work out how many channels were used
for i in range(0, len(templates)):
if not np.all(cccs_matrix[1][i] == 0):
# Check that there are some real numbers in the vector rather
# than being all 0, which is the default case for no match
# of image and template names
no_chans[i] += 1
# Now sum along the channel axis for each template to give the
# cccsum values for each template for each day
with Timer() as t:
cccsums = cccs_matrix.sum(axis=0).astype(np.float32)
if debug >= 1:
print("--------- TIMER: Summing took %s s" % t.secs)
if debug >= 2:
print('cccsums is shaped thus: ' + str(np.shape(cccsums)))
cccs_matrix[0] = cccsums
del cccsums
toc = time.clock()
if debug >= 1:
print("--------- TIMER: Trace loop took " + str(toc - tic) +
" s")
if debug >= 2:
print('cccs_matrix is shaped: ' + str(np.shape(cccs_matrix)))
cccsums = cccs_matrix[0]
return cccsums, no_chans
[docs]def match_filter(template_names, template_list, st, threshold,
threshold_type, trig_int, plotvar, plotdir='.', cores=1,
tempdir=False, debug=0, plot_format='jpg'):
r"""Over-arching code to run the correlations of given templates with a \
day of seismic data and output the detections based on a given threshold.
:type template_names: list
:param template_names: List of template names in the same order as \
template_list
:type template_list: list :class: 'obspy.Stream'
:param template_list: A list of templates of which each template is a \
Stream of obspy traces containing seismic data and header information.
:type st: :class: 'obspy.Stream'
:param st: An obspy.Stream object containing all the data available and \
required for the correlations with templates given. For efficiency \
this should contain no excess traces which are not in one or more of \
the templates. This will now remove excess traces internally, but \
will copy the stream and work on the copy, leaving your input stream \
untouched.
:type threshold: float
:param threshold: A threshold value set based on the threshold_type
:type threshold_type: str
:param threshold_type: The type of threshold to be used, can be MAD, \
absolute or av_chan_corr. MAD threshold is calculated as the \
threshold*(median(abs(cccsum))) where cccsum is the cross-correlation \
sum for a given template. absolute threhsold is a true absolute \
threshold based on the cccsum value av_chan_corr is based on the mean \
values of single-channel cross-correlations assuming all data are \
present as required for the template, \
e.g. av_chan_corr_thresh=threshold*(cccsum/len(template)) where \
template is a single template from the input and the length is the \
number of channels within this template.
:type trig_int: float
:param trig_int: Minimum gap between detections in seconds.
:type plotvar: bool
:param plotvar: Turn plotting on or off
:type plotdir: str
:param plotdir: Path to plotting folder, plots will be output here, \
defaults to run location.
:type tempdir: String or False
:param tempdir: Directory to put temporary files, or False
:type cores: int
:param cores: Number of cores to use
:type debug: int
:param debug: Debug output level, the bigger the number, the more the \
output.
:return: :class: 'DETECTIONS' detections for each channel formatted as \
:class: 'obspy.UTCDateTime' objects.
.. note:: Plotting within the match-filter routine uses the Agg backend \
with interactive plotting turned off. This is because the function \
is designed to work in bulk. If you wish to turn interactive \
plotting on you must import matplotlib in your script first, when you \
them import match_filter you will get the warning that this call to \
matplotlib has no effect, which will mean that match_filter has not \
changed the plotting behaviour.
"""
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
plt.ioff()
import copy
from eqcorrscan.utils import plotting
from eqcorrscan.utils import findpeaks
from obspy import Trace
import time
# Copy the stream here because we will muck about with it
stream = st.copy()
templates = copy.deepcopy(template_list)
# Debug option to confirm that the channel names match those in the
# templates
if debug >= 2:
template_stachan = []
data_stachan = []
for template in templates:
for tr in template:
template_stachan.append(tr.stats.station + '.' +
tr.stats.channel)
for tr in stream:
data_stachan.append(tr.stats.station + '.' + tr.stats.channel)
template_stachan = list(set(template_stachan))
data_stachan = list(set(data_stachan))
if debug >= 3:
print('I have template info for these stations:')
print(template_stachan)
print('I have daylong data for these stations:')
print(data_stachan)
# Perform a check that the daylong vectors are daylong
for tr in stream:
if not tr.stats.sampling_rate * 86400 == tr.stats.npts:
msg = ' '.join(['Data are not daylong for', tr.stats.station,
tr.stats.channel])
raise ValueError(msg)
# Call the _template_loop function to do all the correlation work
outtic = time.clock()
# Edit here from previous, stable, but slow match_filter
# Would be worth testing without an if statement, but with every station in
# the possible template stations having data, but for those without real
# data make the data NaN to return NaN ccc_sum
# Note: this works
if debug >= 2:
print('Ensuring all template channels have matches in daylong data')
template_stachan = []
for template in templates:
for tr in template:
template_stachan += [(tr.stats.station, tr.stats.channel)]
template_stachan = list(set(template_stachan))
# Copy this here to keep it safe
for stachan in template_stachan:
if not stream.select(station=stachan[0], channel=stachan[1]):
# Remove template traces rather than adding NaN data
for template in templates:
if template.select(station=stachan[0], channel=stachan[1]):
for tr in template.select(station=stachan[0],
channel=stachan[1]):
template.remove(tr)
# Remove un-needed channels
for tr in stream:
if not (tr.stats.station, tr.stats.channel) in template_stachan:
stream.remove(tr)
# Also pad out templates to have all channels
for template in templates:
for stachan in template_stachan:
if not template.select(station=stachan[0], channel=stachan[1]):
nulltrace = Trace()
nulltrace.stats.station = stachan[0]
nulltrace.stats.channel = stachan[1]
nulltrace.stats.sampling_rate = template[0].stats.sampling_rate
nulltrace.stats.starttime = template[0].stats.starttime
nulltrace.data = np.array([np.NaN] * len(template[0].data),
dtype=np.float32)
template += nulltrace
if debug >= 2:
print('Starting the correlation run for this day')
[cccsums, no_chans] = _channel_loop(templates, stream, cores, debug)
if len(cccsums[0]) == 0:
raise ValueError('Correlation has not run, zero length cccsum')
outtoc = time.clock()
print(' '.join(['Looping over templates and streams took:',
str(outtoc - outtic), 's']))
if debug >= 2:
print(' '.join(['The shape of the returned cccsums is:',
str(np.shape(cccsums))]))
print(' '.join(['This is from', str(len(templates)), 'templates']))
print(' '.join(['Correlated with', str(len(stream)),
'channels of data']))
detections = []
for i, cccsum in enumerate(cccsums):
template = templates[i]
if threshold_type == 'MAD':
rawthresh = threshold * np.median(np.abs(cccsum))
elif threshold_type == 'absolute':
rawthresh = threshold
elif threshold == 'av_chan_corr':
rawthresh = threshold * (cccsum / len(template))
else:
print('You have not selected the correct threshold type, I will' +
'use MAD as I like it')
rawthresh = threshold * np.mean(np.abs(cccsum))
# Findpeaks returns a list of tuples in the form [(cccsum, sample)]
print(' '.join(['Threshold is set at:', str(rawthresh)]))
print(' '.join(['Max of data is:', str(max(cccsum))]))
print(' '.join(['Mean of data is:', str(np.mean(cccsum))]))
if np.abs(np.mean(cccsum)) > 0.05:
warnings.warn('Mean is not zero! Check this!')
# Set up a trace object for the cccsum as this is easier to plot and
# maintins timing
if plotvar:
stream_plot = copy.deepcopy(stream[0])
# Downsample for plotting
stream_plot.decimate(int(stream[0].stats.sampling_rate / 10))
cccsum_plot = Trace(cccsum)
cccsum_plot.stats.sampling_rate = stream[0].stats.sampling_rate
# Resample here to maintain shape better
cccsum_hist = cccsum_plot.copy()
cccsum_hist = cccsum_hist.decimate(int(stream[0].stats.
sampling_rate / 10)).data
cccsum_plot = plotting.chunk_data(cccsum_plot, 10,
'Maxabs').data
# Enforce same length
stream_plot.data = stream_plot.data[0:len(cccsum_plot)]
cccsum_plot = cccsum_plot[0:len(stream_plot.data)]
cccsum_hist = cccsum_hist[0:len(stream_plot.data)]
plotting.triple_plot(cccsum_plot, cccsum_hist,
stream_plot, rawthresh, True,
plotdir + '/cccsum_plot_' +
template_names[i] + '_' +
stream[0].stats.starttime.
datetime.strftime('%Y-%m-%d') +
'.' + plot_format)
if debug >= 4:
print(' '.join(['Saved the cccsum to:', template_names[i],
stream[0].stats.starttime.datetime.
strftime('%Y%j')]))
np.save(template_names[i] +
stream[0].stats.starttime.datetime.strftime('%Y%j'),
cccsum)
tic = time.clock()
if debug >= 4:
np.save('cccsum_' + str(i) + '.npy', cccsum)
if debug >= 3 and max(cccsum) > rawthresh:
peaks = findpeaks.find_peaks2_short(cccsum, rawthresh,
trig_int * stream[0].stats.
sampling_rate, debug,
stream[0].stats.starttime,
stream[0].stats.sampling_rate)
elif max(cccsum) > rawthresh:
peaks = findpeaks.find_peaks2_short(cccsum, rawthresh,
trig_int * stream[0].stats.
sampling_rate, debug)
else:
print('No peaks found above threshold')
peaks = False
toc = time.clock()
if debug >= 1:
print(' '.join(['Finding peaks took:', str(toc - tic), 's']))
if peaks:
for peak in peaks:
detecttime = stream[0].stats.starttime +\
peak[1] / stream[0].stats.sampling_rate
detections.append(DETECTION(template_names[i],
detecttime,
no_chans[i], peak[0], rawthresh,
'corr'))
del stream, templates
return detections
if __name__ == "__main__":
import doctest
doctest.testmod()