from collections import namedtuple
import numpy as np
import pandas as pd
Occurrence = namedtuple('Occurrence', 'start stop slice duration')
[docs]class Events(object):
"""Object to represent events found in time series data
A representation of events based off a ``bool`` conditional array.
Attributes:
name (``str``): User provided name for events.
_starts (``np.array`` of ``int``): The index for event starts
_stops (``np.array`` of ``int``): The index for event stops
_period (``float``): Time between each value of the original condition array
_condition_size (``int``): The size of the original condition array
"""
def __init__(self, starts, stops, period, name, condition_size):
self.name = name
self._starts = starts
self._stops = stops
self._period = period
self._condition_size = condition_size
@property
def durations(self):
"""Return a ``numpy.ndarray`` of event durations in seconds.
Examples:
>>> import trouve as tr
>>> x = np.array([2, 2, 4, 5, 3, 2])
>>> condition = x == 2
>>> events = tr.find_events(condition, period=1)
>>> events.to_array() # doctest: +SKIP
array([1., 1., 0., 0., 0., 1.])
>>> print(events.durations)
[2 1]
"""
durations = (self._stops - self._starts) * self._period
return durations
[docs] def to_array(self, inactive_value=0, active_value=1, dtype=None, order='C'):
"""Returns a ``numpy.ndarray`` identifying found events
Useful for plotting or building another mask based on identified
events.
Parameters:
inactive_value (``float``, optional): Default is 0.
Value of array where events are not active.
active_value (``float``, optional): Default is 1.
Value of array where events are active.
dtype (``numpy.dtype``, optional): Default is ``numpy.float64``.
The datatype of returned array.
order (``str``, optional): Default is 'C'. {'C', 'F'} whether to
store multidimensional data in C- or Fortran-contiguous (row-
or column-wise) order in memory.
Returns:
``numpy.ndarray``: An array where values are coded to
identify when events are active or inactive.
Examples:
>>> import trouve as tr
>>> x = np.array([2, 2, 4, 5, 3, 2])
>>> condition = x > 2
>>> print(condition)
[False False True True True False]
>>> events = tr.find_events(condition, period=1)
>>> events.to_array() # doctest: +SKIP
array([0., 0., 1., 1., 1., 0.])
"""
if dtype is None and inactive_value == 0 and active_value == 1:
dtype = np.int8
output = np.full(self._condition_size, inactive_value, dtype, order)
for event in self:
output[event.slice] = active_value
return output.astype(dtype)
[docs] def to_series(self, inactive_value=0, active_value=1,
index=None, dtype=None, name=None):
"""Returns a ``pandas.Series`` identifying found events
Useful for plotting and for filtering a ``pandas.DataFrame``
Parameters:
inactive_value(``float``, optional): Default is 0.
Value of array where events are not active.
active_value (``float``, optional): Default is 1.
Value of array where events are active.
index (``array-like`` or ``Index`` (1d)):Values must be
hashable and have the same length as data. Non-unique
index values are allowed. Will default to
RangeIndex(len(data)) if not provided. If both a dict
and index sequence are used, the index will override
the keys found in the dict.
dtype (``numpy.dtype`` or ``None``): If ``None``, ``dtype``
will be inferred.
name (``str``, optional): Default is :attr:`Events.name`.
Name of series.
Returns:
``pandas.Series``:
A series where values are coded to identify when events are active
or inactive.
Examples:
>>> import trouve as tr
>>> x = np.array([2, 2, 4, 5, 3, 2])
>>> condition = x > 2
>>> print(condition)
[False False True True True False]
>>> events = tr.find_events(condition, period=1)
>>> events.to_series()
0 0.0
1 0.0
2 1.0
3 1.0
4 1.0
5 0.0
Name: events, dtype: float64
"""
if dtype is None and inactive_value == 0 and active_value == 1:
dtype = np.int8
if name is None:
name = self.name
data = self.to_array(inactive_value=inactive_value, active_value=active_value, dtype=dtype)
return pd.Series(data=data, index=index, name=name)
def __iter__(self):
for start, stop in zip(self._starts, self._stops):
occurrence = Occurrence(
start=start,
stop=stop - 1,
slice=slice(start, stop),
duration=(stop - start) * self._period
)
yield occurrence
[docs] def __getitem__(self, item):
"""Get a specific :class:`.Occurrence`
Examples:
>>> import numpy as np
>>> import trouve as tr
>>> x = np.array([0, 1, 1, 0, 1, 0])
>>> example = tr.find_events(x, period=1, name='example')
>>> first_event = example[0]
>>> print(first_event)
Occurrence(start=1, stop=2, slice=slice(1, 3, None), duration=2)
"""
if isinstance(item, slice):
pass
else:
occurrence = Occurrence(
start=self._starts[item],
stop=self._stops[item] - 1,
slice=slice(self._starts[item], self._stops[item]),
duration=(self._stops[item] - self._starts[item]) * self._period
)
return occurrence
[docs] def __len__(self):
"""Returns the number of events found
Redirects to :any:`Events._starts` and returns ``Events._starts.size``
Examples:
>>> import numpy as np
>>> import trouve as tr
>>> x = np.array([0, 1, 1, 0, 1, 0])
>>> example = tr.find_events(x, period=1, name='example')
>>> len(example)
2
"""
return len(self._starts)
[docs] def __repr__(self):
msg = (
'{__class__.__name__}(_starts={_starts!r}, '
'_stops={_stops!r}, '
'_period={_period!r}, '
'name={name!r}, '
'_condition_size={_condition_size!r})'
).format(__class__=self.__class__, **self.__dict__)
return msg
[docs] def __str__(self):
"""Prints a summary of the events
Examples:
>>> import numpy as np
>>> import trouve as tr
>>> x = np.array([0, 1, 1, 0, 1, 0])
>>> example = tr.find_events(x, period=1, name='example')
>>> print(example)
example
Number of events: 2
Min, Max, Mean Duration: 1.000s, 2.000s, 1.500s
"""
args = [len(self),
np.min(self.durations),
np.max(self.durations),
np.mean(self.durations)]
kwargs = {'name': '{}'.format(self.name),
'period': '{}s'.format(self._period)}
return (
'{name}'
'\nNumber of events: {0}'
'\nMin, Max, Mean Duration: {1:.3f}s, {2:.3f}s, {3:.3f}s'
).format(*args, **kwargs)
[docs] def __eq__(self, other):
"""Determine if two Events objects are identical
Compares :attr:`Events._starts`, :attr:`Events._stops`, :attr:`Events._period`
and :attr:`Events.condition.size` to determine if equality of two events.
Events objects can have different names and still be equal.
Examples:
>>> import numpy as np
>>> import trouve as tr
>>> x = np.array([0, 1, 1, 0, 1, 0])
>>> example = tr.find_events(x, period=1, name='example')
>>> other = tr.find_events(x, period=1, name='other')
>>> id(example) # doctest: +SKIP
2587452050568
>>> id(other) # doctest: +SKIP
2587452084352
>>> example == other
True
>>> example != other
False
"""
if (np.all(self._starts == other._starts)
and np.all(self._stops == other._stops)
and self._period == other._period
and self._condition_size == other._condition_size):
return True
else:
return False
def __hash__(self):
"""Numpy arrays aren't hashable.
Researching solution that doesn't require something beyond
standard lib.
"""
return id(self)