Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 57 additions & 57 deletions tests/test_gbsketch.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@

import sourmash
from sourmash import sourmash_args
from sourmash.signature import load_one_signature_from_json
import sourmash_tst_utils as utils
from sourmash_tst_utils import SourmashCommandFailed



def get_test_data(filename):
thisdir = os.path.dirname(__file__)
return os.path.join(thisdir, 'test-data', filename)
Expand All @@ -35,10 +35,10 @@ def test_gbsketch_simple(runtmp, capfd):
sig1 = get_test_data('GCA_000175535.1.sig.gz')
sig2 = get_test_data('GCA_000961135.2.sig.gz')
sig3 = get_test_data('GCA_000961135.2.protein.sig.gz')
ss1 = sourmash.load_one_signature(sig1, ksize=31)
ss2 = sourmash.load_one_signature(sig2, ksize=31)
ss1 = load_one_signature_from_json(sig1, ksize=31)
ss2 = load_one_signature_from_json(sig2, ksize=31)
# why does this need ksize =30 and not ksize = 10!???
ss3 = sourmash.load_one_signature(sig3, ksize=30, select_moltype='protein')
ss3 = load_one_signature_from_json(sig3, ksize=30, select_moltype='protein')

runtmp.sourmash('scripts', 'gbsketch', acc_csv, '-o', output,
'--failed', failed, '-r', '3', '--checksum-fail', ch_fail,
Expand Down Expand Up @@ -89,10 +89,10 @@ def test_gbsketch_simple_default_failed(runtmp, capfd):
sig1 = get_test_data('GCA_000175535.1.sig.gz')
sig2 = get_test_data('GCA_000961135.2.sig.gz')
sig3 = get_test_data('GCA_000961135.2.protein.sig.gz')
ss1 = sourmash.load_one_signature(sig1, ksize=31)
ss2 = sourmash.load_one_signature(sig2, ksize=31)
ss1 = load_one_signature_from_json(sig1, ksize=31)
ss2 = load_one_signature_from_json(sig2, ksize=31)
# why does this need ksize =30 and not ksize = 10!???
ss3 = sourmash.load_one_signature(sig3, ksize=30, select_moltype='protein')
ss3 = load_one_signature_from_json(sig3, ksize=30, select_moltype='protein')

runtmp.sourmash('scripts', 'gbsketch', acc_csv, '-o', output,
'-r', '3',
Expand Down Expand Up @@ -129,10 +129,10 @@ def test_gbsketch_manifest(runtmp, capfd):
sig1 = get_test_data('GCA_000175535.1.sig.gz')
sig2 = get_test_data('GCA_000961135.2.sig.gz')
sig3 = get_test_data('GCA_000961135.2.protein.sig.gz')
ss1 = sourmash.load_one_signature(sig1, ksize=31)
ss2 = sourmash.load_one_signature(sig2, ksize=31)
ss1 = load_one_signature_from_json(sig1, ksize=31)
ss2 = load_one_signature_from_json(sig2, ksize=31)
# why does this need ksize =30 and not ksize = 10!???
ss3 = sourmash.load_one_signature(sig3, ksize=30, select_moltype='protein')
ss3 = load_one_signature_from_json(sig3, ksize=30, select_moltype='protein')

runtmp.sourmash('scripts', 'gbsketch', acc_csv, '-o', output,
'--failed', failed, '-r', '3', '--checksum-fail', ch_fail,
Expand Down Expand Up @@ -170,8 +170,8 @@ def test_gbsketch_genomes_only(runtmp):

sig1 = get_test_data('GCA_000175535.1.sig.gz')
sig2 = get_test_data('GCA_000961135.2.sig.gz')
ss1 = sourmash.load_one_signature(sig1, ksize=31)
ss2 = sourmash.load_one_signature(sig2, ksize=31)
ss1 = load_one_signature_from_json(sig1, ksize=31)
ss2 = load_one_signature_from_json(sig2, ksize=31)

runtmp.sourmash('scripts', 'gbsketch', acc_csv, '-o', output,
'--failed', failed, '-r', '3', '--genomes-only',
Expand Down Expand Up @@ -202,7 +202,7 @@ def test_gbsketch_proteomes_only(runtmp):

sig3 = get_test_data('GCA_000961135.2.protein.sig.gz')
# why does this need ksize =30 and not ksize = 10!???
ss3 = sourmash.load_one_signature(sig3, ksize=30, select_moltype='protein')
ss3 = load_one_signature_from_json(sig3, ksize=30, select_moltype='protein')

runtmp.sourmash('scripts', 'gbsketch', acc_csv, '-o', output,
'--failed', failed, '-r', '3', '--proteomes-only',
Expand All @@ -229,8 +229,8 @@ def test_gbsketch_genomes_only_via_params(runtmp, capfd):

sig1 = get_test_data('GCA_000175535.1.sig.gz')
sig2 = get_test_data('GCA_000961135.2.sig.gz')
ss1 = sourmash.load_one_signature(sig1, ksize=31)
ss2 = sourmash.load_one_signature(sig2, ksize=31)
ss1 = load_one_signature_from_json(sig1, ksize=31)
ss2 = load_one_signature_from_json(sig2, ksize=31)

runtmp.sourmash('scripts', 'gbsketch', acc_csv, '-o', output,
'--failed', failed, '-r', '3', '--checksum-fail', ch_fail,
Expand Down Expand Up @@ -263,7 +263,7 @@ def test_gbsketch_proteomes_only_via_params(runtmp, capfd):

sig3 = get_test_data('GCA_000961135.2.protein.sig.gz')
# why does this need ksize =30 and not ksize = 10!???
ss3 = sourmash.load_one_signature(sig3, ksize=30, select_moltype='protein')
ss3 = load_one_signature_from_json(sig3, ksize=30, select_moltype='protein')

runtmp.sourmash('scripts', 'gbsketch', acc_csv, '-o', output,
'--failed', failed, '-r', '3',
Expand Down Expand Up @@ -296,10 +296,10 @@ def test_gbsketch_save_fastas(runtmp):
sig1 = get_test_data('GCA_000175535.1.sig.gz')
sig2 = get_test_data('GCA_000961135.2.sig.gz')
sig3 = get_test_data('GCA_000961135.2.protein.sig.gz')
ss1 = sourmash.load_one_signature(sig1, ksize=31)
ss2 = sourmash.load_one_signature(sig2, ksize=31)
ss1 = load_one_signature_from_json(sig1, ksize=31)
ss2 = load_one_signature_from_json(sig2, ksize=31)
# why does this need ksize =30 and not ksize = 10!???
ss3 = sourmash.load_one_signature(sig3, ksize=30, select_moltype='protein')
ss3 = load_one_signature_from_json(sig3, ksize=30, select_moltype='protein')

runtmp.sourmash('scripts', 'gbsketch', acc_csv, '-o', output,
'--failed', failed, '-r', '3', '--fastas', out_dir, '--keep-fasta',
Expand Down Expand Up @@ -433,10 +433,10 @@ def test_gbsketch_bad_acc(runtmp):
sig1 = get_test_data('GCA_000175535.1.sig.gz')
sig2 = get_test_data('GCA_000961135.2.sig.gz')
sig3 = get_test_data('GCA_000961135.2.protein.sig.gz')
ss1 = sourmash.load_one_signature(sig1, ksize=31)
ss2 = sourmash.load_one_signature(sig2, ksize=31)
ss1 = load_one_signature_from_json(sig1, ksize=31)
ss2 = load_one_signature_from_json(sig2, ksize=31)
# why does this need ksize =30 and not ksize = 10!???
ss3 = sourmash.load_one_signature(sig3, ksize=30, select_moltype='protein')
ss3 = load_one_signature_from_json(sig3, ksize=30, select_moltype='protein')

runtmp.sourmash('scripts', 'gbsketch', acc_mod, '-o', output,
'--failed', failed, '-r', '3', #'--fastas', output_fastas,
Expand Down Expand Up @@ -600,7 +600,7 @@ def test_gbsketch_version_bug(runtmp):
ch_fail = runtmp.output('checksum_dl_failed.csv')

sig1 = get_test_data('GCA_000193795.2.sig.gz')
ss1 = sourmash.load_one_signature(sig1, ksize=31)
ss1 = load_one_signature_from_json(sig1, ksize=31)

runtmp.sourmash('scripts', 'gbsketch', acc_csv, '-o', output,
'--failed', failed, '-r', '3', '--checksum-fail', ch_fail,
Expand Down Expand Up @@ -662,9 +662,9 @@ def test_zip_file_permissions(runtmp):
sig1 = get_test_data('GCA_000175535.1.sig.gz')
sig2 = get_test_data('GCA_000961135.2.sig.gz')
sig3 = get_test_data('GCA_000961135.2.protein.sig.gz')
ss1 = sourmash.load_one_signature(sig1, ksize=31)
ss2 = sourmash.load_one_signature(sig2, ksize=31)
ss3 = sourmash.load_one_signature(sig3, ksize=30, select_moltype='protein')
ss1 = load_one_signature_from_json(sig1, ksize=31)
ss2 = load_one_signature_from_json(sig2, ksize=31)
ss3 = load_one_signature_from_json(sig3, ksize=30, select_moltype='protein')

runtmp.sourmash('scripts', 'gbsketch', acc_csv, '-o', output,
'--failed', failed, '-r', '3', '--checksum-fail', ch_fail,
Expand Down Expand Up @@ -693,9 +693,9 @@ def test_gbsketch_protein_dayhoff_hp(runtmp):
sig1 = get_test_data('GCA_000961135.2.protein.sig.gz')
sig2 = get_test_data('GCA_000961135.2.dayhoff.sig.gz')
sig3 = get_test_data('GCA_000961135.2.hp.sig.gz')
ss1 = sourmash.load_one_signature(sig1, ksize=30, select_moltype='protein')
ss2 = sourmash.load_one_signature(sig2, ksize=30, select_moltype='dayhoff')
ss3 = sourmash.load_one_signature(sig3, ksize=30, select_moltype='hp')
ss1 = load_one_signature_from_json(sig1, ksize=30, select_moltype='protein')
ss2 = load_one_signature_from_json(sig2, ksize=30, select_moltype='dayhoff')
ss3 = load_one_signature_from_json(sig3, ksize=30, select_moltype='hp')

runtmp.sourmash('scripts', 'gbsketch', acc_csv, '-o', output,
'--failed', failed, '-r', '3',
Expand Down Expand Up @@ -756,8 +756,8 @@ def test_gbsketch_simple_batched_single_acc(runtmp, capfd):

sig1 = get_test_data('GCA_000961135.2.sig.gz')
sig2 = get_test_data('GCA_000961135.2.protein.sig.gz')
ss1 = sourmash.load_one_signature(sig1, ksize=31)
ss2 = sourmash.load_one_signature(sig2, ksize=30, select_moltype='protein')
ss1 = load_one_signature_from_json(sig1, ksize=31)
ss2 = load_one_signature_from_json(sig2, ksize=30, select_moltype='protein')

runtmp.sourmash('scripts', 'gbsketch', acc1, '-o', output,
'--failed', failed, '-r', '3', '--checksum-fail', ch_fail,
Expand Down Expand Up @@ -801,9 +801,9 @@ def test_gbsketch_simple_batched_multiple(runtmp, capfd):
sig1 = get_test_data('GCA_000175535.1.sig.gz')
sig2 = get_test_data('GCA_000961135.2.sig.gz')
sig3 = get_test_data('GCA_000961135.2.protein.sig.gz')
ss1 = sourmash.load_one_signature(sig1, ksize=31)
ss2 = sourmash.load_one_signature(sig2, ksize=31)
ss3 = sourmash.load_one_signature(sig3, ksize=30, select_moltype='protein')
ss1 = load_one_signature_from_json(sig1, ksize=31)
ss2 = load_one_signature_from_json(sig2, ksize=31)
ss3 = load_one_signature_from_json(sig3, ksize=30, select_moltype='protein')

runtmp.sourmash('scripts', 'gbsketch', acc_csv, '-o', output,
'--failed', failed, '-r', '3', '--checksum-fail', ch_fail,
Expand Down Expand Up @@ -855,11 +855,11 @@ def test_gbsketch_simple_batch_restart(runtmp, capfd):
sig1 = get_test_data('GCA_000175535.1.sig.gz')
sig2 = get_test_data('GCA_000961135.2.sig.gz')
sig3 = get_test_data('GCA_000961135.2.protein.sig.gz')
ss1 = sourmash.load_one_signature(sig1, ksize=31)
ss2 = sourmash.load_one_signature(sig2, ksize=31)
ss3 = sourmash.load_one_signature(sig2, ksize=21)
ss1 = load_one_signature_from_json(sig1, ksize=31)
ss2 = load_one_signature_from_json(sig2, ksize=31)
ss3 = load_one_signature_from_json(sig2, ksize=21)
# why does this need ksize =30 and not ksize = 10!???
ss4 = sourmash.load_one_signature(sig3, ksize=30, select_moltype='protein')
ss4 = load_one_signature_from_json(sig3, ksize=30, select_moltype='protein')

# first, cat sig2 into an output file that will trick gbsketch into thinking it's a prior batch
runtmp.sourmash('sig', 'cat', sig2, '-o', out1)
Expand Down Expand Up @@ -971,11 +971,11 @@ def test_gbsketch_simple_batch_restart_sig_zip(runtmp, capfd):
sig1 = get_test_data('GCA_000175535.1.sig.gz')
sig2 = get_test_data('GCA_000961135.2.sig.gz')
sig3 = get_test_data('GCA_000961135.2.protein.sig.gz')
ss1 = sourmash.load_one_signature(sig1, ksize=31)
ss2 = sourmash.load_one_signature(sig2, ksize=31)
ss3 = sourmash.load_one_signature(sig2, ksize=21)
ss1 = load_one_signature_from_json(sig1, ksize=31)
ss2 = load_one_signature_from_json(sig2, ksize=31)
ss3 = load_one_signature_from_json(sig2, ksize=21)
# why does this need ksize =30 and not ksize = 10!???
ss4 = sourmash.load_one_signature(sig3, ksize=30, select_moltype='protein')
ss4 = load_one_signature_from_json(sig3, ksize=30, select_moltype='protein')

# first, cat sig2 into an output file that will trick gbsketch into thinking it's a prior batch
runtmp.sourmash('sig', 'cat', sig2, '-o', out1)
Expand Down Expand Up @@ -1034,11 +1034,11 @@ def test_gbsketch_simple_batch_restart_incomplete_sig_zip(runtmp, capfd):
sig1 = get_test_data('GCA_000175535.1.sig.gz')
sig2 = get_test_data('GCA_000961135.2.sig.gz')
sig3 = get_test_data('GCA_000961135.2.protein.sig.gz')
ss1 = sourmash.load_one_signature(sig1, ksize=31)
ss2 = sourmash.load_one_signature(sig2, ksize=31)
ss3 = sourmash.load_one_signature(sig2, ksize=21)
ss1 = load_one_signature_from_json(sig1, ksize=31)
ss2 = load_one_signature_from_json(sig2, ksize=31)
ss3 = load_one_signature_from_json(sig2, ksize=21)
# why does this need ksize =30 and not ksize = 10!???
ss4 = sourmash.load_one_signature(sig3, ksize=30, select_moltype='protein')
ss4 = load_one_signature_from_json(sig3, ksize=30, select_moltype='protein')

# first, cat sig2 into an output file that will trick gbsketch into thinking it's a prior batch
runtmp.sourmash('sig', 'cat', sig2, '-o', out1)
Expand Down Expand Up @@ -1100,9 +1100,9 @@ def test_gbsketch_simple_batch_restart_skipcount(runtmp, capfd):

sig1 = get_test_data('GCA_000175535.1.sig.gz')
sig2 = get_test_data('GCA_000961135.2.sig.gz')
ss1 = sourmash.load_one_signature(sig1, ksize=31)
ss2 = sourmash.load_one_signature(sig2, ksize=31)
ss3 = sourmash.load_one_signature(sig2, ksize=21)
ss1 = load_one_signature_from_json(sig1, ksize=31)
ss2 = load_one_signature_from_json(sig2, ksize=31)
ss3 = load_one_signature_from_json(sig2, ksize=21)

# first, cat sig2 into an output file that will trick gbsketch into thinking it's a prior batch
runtmp.sourmash('sig', 'cat', sig2, '-o', out1)
Expand Down Expand Up @@ -1176,10 +1176,10 @@ def test_gbsketch_simple_batch_restart_with_incomplete_zip(runtmp, capfd):
sig1 = get_test_data('GCA_000175535.1.sig.gz')
sig2 = get_test_data('GCA_000961135.2.sig.gz')
sig3 = get_test_data('GCA_000961135.2.protein.sig.gz')
ss1 = sourmash.load_one_signature(sig1, ksize=31)
ss2 = sourmash.load_one_signature(sig2, ksize=31)
ss3 = sourmash.load_one_signature(sig2, ksize=21)
ss4 = sourmash.load_one_signature(sig3, ksize=30, select_moltype='protein')
ss1 = load_one_signature_from_json(sig1, ksize=31)
ss2 = load_one_signature_from_json(sig2, ksize=31)
ss3 = load_one_signature_from_json(sig2, ksize=21)
ss4 = load_one_signature_from_json(sig3, ksize=30, select_moltype='protein')

# first, cat sig2 into an output file that will trick gbsketch into thinking it's a prior batch
runtmp.sourmash('sig', 'cat', sig2, '-o', out1)
Expand Down Expand Up @@ -1260,10 +1260,10 @@ def test_gbsketch_overwrite(runtmp, capfd):
sig1 = get_test_data('GCA_000175535.1.sig.gz')
sig2 = get_test_data('GCA_000961135.2.sig.gz')
sig3 = get_test_data('GCA_000961135.2.protein.sig.gz')
ss1 = sourmash.load_one_signature(sig1, ksize=31)
ss2 = sourmash.load_one_signature(sig2, ksize=31)
# ss3 = sourmash.load_one_signature(sig2, ksize=21)
ss4 = sourmash.load_one_signature(sig3, ksize=30, select_moltype='protein')
ss1 = load_one_signature_from_json(sig1, ksize=31)
ss2 = load_one_signature_from_json(sig2, ksize=31)
# ss3 = load_one_signature_from_json(sig2, ksize=21)
ss4 = load_one_signature_from_json(sig3, ksize=30, select_moltype='protein')

# run the workflow once - write all to single output
runtmp.sourmash('scripts', 'gbsketch', acc_csv, '-o', output,
Expand Down
Loading
Loading