Skip to content

Commit 1cb2da6

Browse files
committed
Merge branch 'main' of github-datawhores:datawhores/OF-Scraper
2 parents 084fb21 + 61bfdee commit 1cb2da6

File tree

2 files changed

+19
-45
lines changed

2 files changed

+19
-45
lines changed

ofscraper/data/api/subscriptions/lists.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -214,20 +214,18 @@ async def scrape_list_members(c, item, offset=0):
214214
log.debug(
215215
f"{log_id} -> hasMore value in json {data.get('hasMore','undefined') }"
216216
)
217+
log.debug(
218+
f"{log_id} -> nextOffset value in json {data.get('nextOffset','undefined') }"
219+
)
217220
log.debug(
218221
f"usernames {log_id} : usernames retrived -> {list(map(lambda x:x.get('username'),users))}"
219222
)
220223
name = f"API {item.get('name')}"
221224
trace_progress_log(name, data, offset=offset)
222225

223-
if (
224-
data.get("hasMore")
225-
and len(users) > 0
226-
and offset != data.get("nextOffset")
227-
):
228-
offset += len(users)
226+
if data.get("hasMore"):
229227
new_tasks.append(
230-
asyncio.create_task(scrape_list_members(c, item, offset=offset))
228+
asyncio.create_task(scrape_list_members(c, item, offset=data["nextOffset"]))
231229
)
232230
except asyncio.TimeoutError:
233231
raise Exception(f"Task timed out {url}")

ofscraper/data/api/subscriptions/subscriptions.py

Lines changed: 14 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,8 @@ async def get_all_activive_subscriptions(subscribe_count):
6060
sem_count=of_env.getattr("SUBSCRIPTION_SEMS"),
6161
) as c:
6262
tasks = [
63-
asyncio.create_task(funct(c, offset))
64-
for offset in range(0, subscribe_count + 1, 10)
63+
asyncio.create_task(funct(c))
6564
]
66-
tasks.extend([asyncio.create_task(funct(c, subscribe_count + 1, recur=True))])
6765
return await process_task(tasks)
6866

6967

@@ -73,10 +71,8 @@ async def get_all_expired_subscriptions(subscribe_count):
7371
sem_count=of_env.getattr("SUBSCRIPTION_SEMS"),
7472
) as c:
7573
tasks = [
76-
asyncio.create_task(funct(c, offset))
77-
for offset in range(0, subscribe_count + 1, 10)
74+
asyncio.create_task(funct(c))
7875
]
79-
tasks.extend([asyncio.create_task(funct(c, subscribe_count + 1, recur=True))])
8076
return await process_task(tasks)
8177

8278

@@ -112,10 +108,8 @@ async def activeHelper(subscribe_count, c):
112108
funct = scrape_subscriptions_active
113109

114110
tasks = [
115-
asyncio.create_task(funct(c, offset))
116-
for offset in range(0, subscribe_count + 1, 10)
111+
asyncio.create_task(funct(c))
117112
]
118-
tasks.extend([asyncio.create_task(funct(c, subscribe_count + 1, recur=True))])
119113
return await process_task(tasks)
120114

121115

@@ -151,11 +145,8 @@ async def expiredHelper(subscribe_count, c):
151145
funct = scrape_subscriptions_disabled
152146

153147
tasks = [
154-
asyncio.create_task(funct(c, offset))
155-
for offset in range(0, subscribe_count + 1, 10)
148+
asyncio.create_task(funct(c))
156149
]
157-
tasks.extend([asyncio.create_task(funct(c, subscribe_count + 1, recur=True))])
158-
159150
return await process_task(tasks)
160151

161152

@@ -182,29 +173,22 @@ async def process_task(tasks):
182173
return output
183174

184175

185-
async def scrape_subscriptions_active(c, offset=0, num=0, recur=False) -> list:
176+
async def scrape_subscriptions_active(c, offset=0):
186177
with progress_utils.setup_live("user_list"):
187178
new_tasks = []
188179
url = of_env.getattr("subscriptionsActiveEP").format(offset)
189180
try:
190181
log.debug(f"usernames active offset {offset}")
191182
async with c.requests_async(url=url) as r:
192-
subscriptions = (await r.json_())["list"]
183+
response = await r.json_()
184+
subscriptions = response["list"]
193185
log.debug(
194186
f"usernames retrived -> {list(map(lambda x:x.get('username'),subscriptions))}"
195187
)
196-
if len(subscriptions) == 0:
197-
return subscriptions, new_tasks
198-
elif recur is False:
199-
pass
200-
elif (await r.json_())["hasMore"] is True:
188+
if response["hasMore"] is True:
201189
new_tasks.append(
202190
asyncio.create_task(
203-
scrape_subscriptions_active(
204-
c,
205-
recur=True,
206-
offset=offset + len(subscriptions),
207-
)
191+
scrape_subscriptions_active(c, offset=offset + 10)
208192
)
209193
)
210194
return subscriptions, new_tasks
@@ -217,30 +201,22 @@ async def scrape_subscriptions_active(c, offset=0, num=0, recur=False) -> list:
217201
raise E
218202

219203

220-
async def scrape_subscriptions_disabled(c, offset=0, num=0, recur=False) -> list:
204+
async def scrape_subscriptions_disabled(c, offset=0):
221205
with progress_utils.setup_live("user_list"):
222206
new_tasks = []
223207
url = of_env.getattr("subscriptionsExpiredEP").format(offset)
224208
try:
225209
log.debug(f"usernames offset expired {offset}")
226210
async with c.requests_async(url=url) as r:
227-
subscriptions = (await r.json_())["list"]
211+
response = await r.json_()
212+
subscriptions = response["list"]
228213
log.debug(
229214
f"usernames retrived -> {list(map(lambda x:x.get('username'),subscriptions))}"
230215
)
231-
232-
if len(subscriptions) == 0:
233-
return subscriptions, new_tasks
234-
elif recur is False:
235-
pass
236-
elif (await r.json_())["hasMore"] is True:
216+
if response["hasMore"] is True:
237217
new_tasks.append(
238218
asyncio.create_task(
239-
scrape_subscriptions_disabled(
240-
c,
241-
recur=True,
242-
offset=offset + len(subscriptions),
243-
)
219+
scrape_subscriptions_disabled(c, offset=offset + 10)
244220
)
245221
)
246222

0 commit comments

Comments
 (0)