3
3
import requests
4
4
import urllib
5
5
import time
6
+ from functools import partial
6
7
from bs4 import BeautifulSoup
7
8
import logging
8
9
11
12
log = logging .getLogger (__name__ )
12
13
13
14
15
+ def rate_limit_handler (request_call ):
16
+ try :
17
+ response = request_call ()
18
+ response .raise_for_status ()
19
+ return response
20
+ except requests .HTTPError :
21
+ if response .status_code == 429 :
22
+ retry_after = int (response .headers .get ("Retry-After" ))
23
+ print (f"Rate limited, waiting { retry_after } seconds" )
24
+ time .sleep (retry_after )
25
+ response = request_call ()
26
+ response .raise_for_status ()
27
+ return response
28
+ raise
29
+
14
30
def read_assemblies (assemblies_path ):
15
31
with open (assemblies_path ) as stream :
16
32
return pd .DataFrame (yaml .safe_load (stream )["assemblies" ])
@@ -23,7 +39,8 @@ def get_paginated_ncbi_results(base_url, query_description):
23
39
while next_page_token or page == 1 :
24
40
print (f"Requesting page { page } of { query_description } " )
25
41
request_url = f"{ base_url } ?page_size=1000{ "&page_token=" + next_page_token if next_page_token else "" } "
26
- page_data = requests .get (request_url ).json ()
42
+ response = rate_limit_handler (partial (requests .get , request_url ))
43
+ page_data = response .json ()
27
44
if len (page_data ["reports" ][0 ].get ("errors" , [])) > 0 :
28
45
raise Exception (page_data ["reports" ][0 ])
29
46
results += page_data ["reports" ]
0 commit comments