Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ numpy
pytest
pytest-cov
codecov
sklearn
scikit-learn
scipy
118 changes: 28 additions & 90 deletions saxpy/alphabet.py
Original file line number Diff line number Diff line change
@@ -1,98 +1,36 @@
"""Implements Alphabet cuts."""
import numpy as np

import scipy

def cuts_for_asize(a_size):
"""Generate a set of alphabet cuts for its size."""
""" Typically, we generate cuts in R as follows:
"""Generate a set of alphabet cuts for its size.

Typically, we generate cuts in R as follows:
get_cuts_for_num <- function(num) {
cuts = c(-Inf)
for (i in 1:(num-1)) {
cuts = c(cuts, qnorm(i * 1/num))
}
cuts
cuts = c(-Inf)
for (i in 1:(num-1)) {
cuts = c(cuts, qnorm(i * 1/num))
}
cuts
}

get_cuts_for_num(3) """
options = {
2: np.array([-np.inf, 0.00]),
3: np.array([-np.inf, -0.4307273, 0.4307273]),
4: np.array([-np.inf, -0.6744898, 0, 0.6744898]),
5: np.array([-np.inf, -0.841621233572914, -0.2533471031358,
0.2533471031358, 0.841621233572914]),
6: np.array([-np.inf, -0.967421566101701, -0.430727299295457, 0,
0.430727299295457, 0.967421566101701]),
7: np.array([-np.inf, -1.06757052387814, -0.565948821932863,
-0.180012369792705, 0.180012369792705, 0.565948821932863,
1.06757052387814]),
8: np.array([-np.inf, -1.15034938037601, -0.674489750196082,
-0.318639363964375, 0, 0.318639363964375,
0.674489750196082, 1.15034938037601]),
9: np.array([-np.inf, -1.22064034884735, -0.764709673786387,
-0.430727299295457, -0.139710298881862, 0.139710298881862,
0.430727299295457, 0.764709673786387, 1.22064034884735]),
10: np.array([-np.inf, -1.2815515655446, -0.841621233572914,
-0.524400512708041, -0.2533471031358, 0, 0.2533471031358,
0.524400512708041, 0.841621233572914, 1.2815515655446]),
11: np.array([-np.inf, -1.33517773611894, -0.908457868537385,
-0.604585346583237, -0.348755695517045,
-0.114185294321428, 0.114185294321428, 0.348755695517045,
0.604585346583237, 0.908457868537385, 1.33517773611894]),
12: np.array([-np.inf, -1.38299412710064, -0.967421566101701,
-0.674489750196082, -0.430727299295457,
-0.210428394247925, 0, 0.210428394247925,
0.430727299295457, 0.674489750196082, 0.967421566101701,
1.38299412710064]),
13: np.array([-np.inf, -1.42607687227285, -1.0200762327862,
-0.736315917376129, -0.502402223373355,
-0.293381232121193, -0.0965586152896391,
0.0965586152896394, 0.293381232121194, 0.502402223373355,
0.73631591737613, 1.0200762327862, 1.42607687227285]),
14: np.array([-np.inf, -1.46523379268552, -1.06757052387814,
-0.791638607743375, -0.565948821932863, -0.36610635680057,
-0.180012369792705, 0, 0.180012369792705,
0.36610635680057, 0.565948821932863, 0.791638607743375,
1.06757052387814, 1.46523379268552]),
15: np.array([-np.inf, -1.50108594604402, -1.11077161663679,
-0.841621233572914, -0.622925723210088,
-0.430727299295457, -0.2533471031358, -0.0836517339071291,
0.0836517339071291, 0.2533471031358, 0.430727299295457,
0.622925723210088, 0.841621233572914, 1.11077161663679,
1.50108594604402]),
16: np.array([-np.inf, -1.53412054435255, -1.15034938037601,
-0.887146559018876, -0.674489750196082,
-0.488776411114669, -0.318639363964375,
-0.157310684610171, 0, 0.157310684610171,
0.318639363964375, 0.488776411114669, 0.674489750196082,
0.887146559018876, 1.15034938037601, 1.53412054435255]),
17: np.array([-np.inf, -1.5647264713618, -1.18683143275582,
-0.928899491647271, -0.721522283982343,
-0.541395085129088, -0.377391943828554,
-0.223007830940367, -0.0737912738082727,
0.0737912738082727, 0.223007830940367, 0.377391943828554,
0.541395085129088, 0.721522283982343, 0.928899491647271,
1.18683143275582, 1.5647264713618]),
18: np.array([-np.inf, -1.59321881802305, -1.22064034884735,
-0.967421566101701, -0.764709673786387,
-0.589455797849779, -0.430727299295457,
-0.282216147062508, -0.139710298881862, 0,
0.139710298881862, 0.282216147062508, 0.430727299295457,
0.589455797849779, 0.764709673786387, 0.967421566101701,
1.22064034884735, 1.59321881802305]),
19: np.array([-np.inf, -1.61985625863827, -1.25211952026522,
-1.00314796766253, -0.8045963803603, -0.633640000779701,
-0.47950565333095, -0.336038140371823, -0.199201324789267,
-0.0660118123758407, 0.0660118123758406,
0.199201324789267, 0.336038140371823, 0.47950565333095,
0.633640000779701, 0.8045963803603, 1.00314796766253,
1.25211952026522, 1.61985625863827]),
20: np.array([-np.inf, -1.64485362695147, -1.2815515655446,
-1.03643338949379, -0.841621233572914, -0.674489750196082,
-0.524400512708041, -0.385320466407568, -0.2533471031358,
-0.125661346855074, 0, 0.125661346855074, 0.2533471031358,
0.385320466407568, 0.524400512708041, 0.674489750196082,
0.841621233572914, 1.03643338949379, 1.2815515655446,
1.64485362695147]),
}
get_cuts_for_num(3)

Args:
a_size (int): Number of sections to cut into
(Must be between 2 and 26)

Returns:
numpy.array: Numpy array of the threshold of the cuts
"""

# Only limit too 26 sections as there are only 26 letters in the alphabet
if a_size > 26 or a_size < 2:
raise ValueError("'a_size must be between 2 and 26'")

break_points = [-np.inf]

return options[a_size]
for i in range(1, a_size):
break_points.append(scipy.stats.norm.ppf(i/a_size))

return np.array(break_points)
2 changes: 1 addition & 1 deletion saxpy/hotsax.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def find_best_discord_hotsax(series, win_size, global_registry, sax_data, magic_

distance_calls = 0

visit_array = np.zeros(len(series), dtype=np.int)
visit_array = np.zeros(len(series), dtype=np.int_)

"""[4.0] and we are off iterating over the magic array entries"""
for entry in magic_array:
Expand Down
2 changes: 1 addition & 1 deletion saxpy/strfunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

def idx2letter(idx):
"""Convert a numerical index to a char."""
if 0 <= idx < 20:
if 0 <= idx < 26:
return chr(97 + idx)
else:
raise ValueError('A wrong idx value supplied.')
2 changes: 1 addition & 1 deletion tests/test_cuts.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@

def test_sizing():
"""Test alphabet sizes."""
for s in range(2, 20):
for s in range(2, 26):
assert len(alphabet.cuts_for_asize(s)) == s
3 changes: 2 additions & 1 deletion tests/test_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@ def test_sizing():
assert 'a' == idx2letter(0)
assert 'h' == idx2letter(7)
assert 't' == idx2letter(19)
assert 'z' == idx2letter(25)

with pytest.raises(ValueError, match=r'.* idx'):
idx2letter(-1)

with pytest.raises(ValueError, match=r'.* idx .*'):
idx2letter(20)
idx2letter(26)
21 changes: 15 additions & 6 deletions tests/test_ts2string.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,27 @@

def test_stringing():
"""Test string conversion."""
# 11: np.array([-np.inf, -1.33517773611894, -0.908457868537385,
# -0.604585346583237, -0.348755695517045,
# -0.114185294321428, 0.114185294321428, 0.348755695517045,
# 0.604585346583237, 0.908457868537385, 1.33517773611894]),
ab = sax.ts_to_string(np.array([-1.33517773611895, -1.33517773611894]),
# 11: np.array([-inf, -1.33517774, -0.90845787, -0.60458535,
# -0.3487557, -0.11418529, 0.11418529, 0.3487557,
# 0.60458535, 0.90845787, 1.33517774]),

ab = sax.ts_to_string(np.array([-1.33517775 , -1.33517773]),
alphabet.cuts_for_asize(11))
assert 'ab' == ab

kj = sax.ts_to_string(np.array([1.33517773611895, 1.33517773611894]),
kj = sax.ts_to_string(np.array([1.33517775, 1.33517773]),
alphabet.cuts_for_asize(11))
assert 'kj' == kj

# Test to handel cuts of size 26
print(alphabet.cuts_for_asize(26))

yz = sax.ts_to_string(np.array([1.76882503, 1.76882505]),
alphabet.cuts_for_asize(26))

assert 'yz' == yz



def test_mindist():
"""Test MINDIST."""
Expand Down