Skip to content

Commit dacdbb2

Browse files
authored
chore: use requests session to reuse tcp connection (#397) (#405)
That's about a bit faster.
1 parent 17f317c commit dacdbb2

File tree

1 file changed

+4
-3
lines changed
  • catalog/build/py/package/catalog_build

1 file changed

+4
-3
lines changed

catalog/build/py/package/catalog_build/build.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -176,10 +176,10 @@ def get_genomes_and_primarydata_df(accessions):
176176
pd.DataFrame(data=[get_biosample_data(info) for info in genomes_info if 'biosample' in info['assembly_info']]))
177177

178178

179-
def _id_to_gene_model_url(asm_id):
179+
def _id_to_gene_model_url(asm_id: str, session: requests.Session):
180180
ucsc_files_endpoint = "https://genome.ucsc.edu/list/files"
181181
download_base_url = "https://hgdownload.soe.ucsc.edu"
182-
response = requests.get(ucsc_files_endpoint, params={"genome": asm_id})
182+
response = session.get(ucsc_files_endpoint, params={"genome": asm_id})
183183
try:
184184
response.raise_for_status()
185185
except Exception:
@@ -202,7 +202,8 @@ def _id_to_gene_model_url(asm_id):
202202

203203
def add_gene_model_url(genomes_df: pd.DataFrame):
204204
print("Fetching gene model URLs")
205-
return pd.concat([genomes_df, genomes_df["accession"].apply(_id_to_gene_model_url).rename("geneModelUrl")], axis="columns")
205+
session = requests.Session()
206+
return pd.concat([genomes_df, genomes_df["accession"].apply(partial(_id_to_gene_model_url, session=session)).rename("geneModelUrl")], axis="columns")
206207

207208

208209
def report_missing_values_from(values_name, message_predicate, all_values_series, *partial_values_series):

0 commit comments

Comments
 (0)