Skip to content

Commit eaecacf

Browse files
committed
Fix issues with some awks (mawk) printing large ints as floats
1 parent 91462b1 commit eaecacf

File tree

4 files changed

+5
-5
lines changed

4 files changed

+5
-5
lines changed

data/workflow/cascaded_clustering.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,8 @@ if [ -n "$REASSIGN" ]; then
159159
if notExists "${TMP_PATH}/seq_wrong_assigned_pref.dbtype"; then
160160
if notExists "${TMP_PATH}/seq_seeds.merged.dbtype"; then
161161
# combine seq dbs
162-
MAXOFFSET=$(awk '($2+$3) > max{max=$2+$3}END{print max}' "${TMP_PATH}/seq_seeds.index")
163-
awk -v OFFSET="${MAXOFFSET}" 'FNR==NR{print $0; next}{print $1"\t"$2+OFFSET"\t"$3}' "${TMP_PATH}/seq_seeds.index" \
162+
MAXOFFSET=$(awk '($2+$3) > max { max = $2+$3 } END { printf("%.0f\n", max); }' "${TMP_PATH}/seq_seeds.index")
163+
awk -v OFFSET="${MAXOFFSET}" 'FNR == NR { print $0; next } { printf("%s\t%.0f\t%s\n", $1, $2+OFFSET, $3); }' "${TMP_PATH}/seq_seeds.index" \
164164
"${TMP_PATH}/seq_wrong_assigned.index" > "${TMP_PATH}/seq_seeds.merged.index"
165165
ln -s "$(abspath "${TMP_PATH}/seq_seeds")" "${TMP_PATH}/seq_seeds.merged.0"
166166
ln -s "$(abspath "${TMP_PATH}/seq_wrong_assigned")" "${TMP_PATH}/seq_seeds.merged.1"

data/workflow/databases.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ case "${SELECTION}" in
165165
if notExists "${TMP_PATH}/msa.index"; then
166166
date "+%s" > "${TMP_PATH}/version"
167167
downloadFile "http://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/pdb70_from_mmcif_latest.tar.gz" "${TMP_PATH}/pdb70.tar.gz"
168-
tar -xOzf "${TMP_PATH}/pdb70.tar.gz" pdb70_a3m.ffdata | tr -d '\000' | awk -v outfile="${TMP_PATH}/msa" 'function writeEntry() { printf "%s\0", data >> outfile; size = length(data) + 1; data=""; print id"\t"offset"\t"size >> outindex; offset = offset + size; } BEGIN { data = ""; offset = 0; id = 1; if(length(outfile) == 0) { outfile="output"; } outindex = outfile".index"; printf("") > outfile; printf("") > outindex; printf("%c%c%c%c",11,0,0,0) > outfile".dbtype"; } /^>ss_/ { inss = 1; entry = 0; next; } inss == 1 { inss = 0; next; } /^>/ && entry == 0 { if (id > 1) { writeEntry(); } id = id + 1; data = ">"substr($1, 2)"\n"; entry = entry + 1; next; } entry > 0 { data = data""$0"\n"; entry = entry + 1; next; } END { writeEntry(); close(outfile); close(outfile".index"); }'
168+
tar -xOzf "${TMP_PATH}/pdb70.tar.gz" pdb70_a3m.ffdata | tr -d '\000' | awk -v outfile="${TMP_PATH}/msa" 'function writeEntry() { printf "%s\0", data >> outfile; size = length(data) + 1; data=""; printf("%s\t%.0f\t%s\n", id, offset, size) >> outindex; offset = offset + size; } BEGIN { data = ""; offset = 0; id = 1; if(length(outfile) == 0) { outfile="output"; } outindex = outfile".index"; printf("") > outfile; printf("") > outindex; printf("%c%c%c%c",11,0,0,0) > outfile".dbtype"; } /^>ss_/ { inss = 1; entry = 0; next; } inss == 1 { inss = 0; next; } /^>/ && entry == 0 { if (id > 1) { writeEntry(); } id = id + 1; data = ">"substr($1, 2)"\n"; entry = entry + 1; next; } entry > 0 { data = data""$0"\n"; entry = entry + 1; next; } END { writeEntry(); close(outfile); close(outfile".index"); }'
169169
rm -f "${TMP_PATH}/pdb70.tar.gz"
170170
fi
171171
INPUT_TYPE="A3M"

data/workflow/searchslicedtargetprofile.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ while [ "${FIRST_INDEX_LINE}" -le "${TOTAL_NUM_PROFILES}" ]; do
118118
|| fail "result2stats died"
119119
fi
120120
# update the starting point for the next step and the total number of pref results
121-
NUM_PREF_RESULTS_IN_STEP=$(awk '{sum+=$1;} END{print sum;}' "${TMP_PATH}/pref_count.tsv")
121+
NUM_PREF_RESULTS_IN_STEP=$(awk '{sum+=$1;} END { printf("%.0f\n", sum); }' "${TMP_PATH}/pref_count.tsv")
122122
rm -f "${TMP_PATH}/pref_count.tsv"
123123

124124
NUM_PREF_RESULTS_IN_ALL_PREV_STEPS="$((NUM_PREF_RESULTS_IN_ALL_PREV_STEPS+NUM_PREF_RESULTS_IN_STEP))"

data/workflow/update_clustering.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ fi
131131
if notExists "${TMP_PATH}/newMappingSeqs"; then
132132
log "=== Update new sequences with old keys"
133133
MAXID="$(awk '$1 > max { max = $1 } END { print max }' "${OLDDB}.index" "${NEWDB}.index")"
134-
awk -v highest="$MAXID" 'BEGIN { start=highest+1 } { print $1"\t"start; start=start+1; }' \
134+
awk -v highest="$MAXID" 'BEGIN { start=highest+1 } { printf("%s\t%.0f\n", $1, start); start=start+1; }' \
135135
"${TMP_PATH}/newSeqs" > "${TMP_PATH}/newSeqs.mapped"
136136
awk '{ print $2"\t"$1 }' "${TMP_PATH}/mappingSeqs" > "${TMP_PATH}/mappingSeqs.reverse"
137137
cat "${TMP_PATH}/mappingSeqs.reverse" "${TMP_PATH}/newSeqs.mapped" > "${TMP_PATH}/newMappingSeqs"

0 commit comments

Comments
 (0)