Skip to content

Commit 586b557

Browse files
subrat-limabashonly
authored andcommitted
[ie/jiosaavn:artist] Add extractor (#12803)
Closes #10823 Authored by: subrat-lima
1 parent 317f4b8 commit 586b557

File tree

2 files changed

+95
-52
lines changed

2 files changed

+95
-52
lines changed

yt_dlp/extractor/_extractors.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -928,6 +928,7 @@
928928
)
929929
from .jiosaavn import (
930930
JioSaavnAlbumIE,
931+
JioSaavnArtistIE,
931932
JioSaavnPlaylistIE,
932933
JioSaavnShowIE,
933934
JioSaavnShowPlaylistIE,

yt_dlp/extractor/jiosaavn.py

Lines changed: 94 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import functools
2+
import itertools
23
import math
34
import re
45

@@ -11,6 +12,7 @@
1112
int_or_none,
1213
js_to_json,
1314
make_archive_id,
15+
orderedSet,
1416
smuggle_url,
1517
unified_strdate,
1618
unified_timestamp,
@@ -19,6 +21,7 @@
1921
url_or_none,
2022
urlencode_postdata,
2123
urljoin,
24+
variadic,
2225
)
2326
from ..utils.traversal import traverse_obj
2427

@@ -78,61 +81,50 @@ def _call_api(self, type_, token, note='API', params={}):
7881
})
7982

8083
@staticmethod
81-
def _extract_common_info(data):
82-
return traverse_obj(data, {
84+
def _extract_song(song_data, url=None):
85+
info = traverse_obj(song_data, {
8386
'id': ('id', {str}),
87+
'title': (('song', 'title'), {clean_html}, any),
88+
'album': ((None, 'more_info'), 'album', {clean_html}, any),
89+
'duration': ((None, 'more_info'), 'duration', {int_or_none}, any),
90+
'channel': ((None, 'more_info'), 'label', {str}, any),
91+
'channel_id': ((None, 'more_info'), 'label_id', {str}, any),
92+
'channel_url': ((None, 'more_info'), 'label_url', {urljoin('https://www.jiosaavn.com/')}, any),
93+
'release_date': ((None, 'more_info'), 'release_date', {unified_strdate}, any),
94+
'release_year': ('year', {int_or_none}),
8495
'thumbnail': ('image', {url_or_none}, {lambda x: re.sub(r'-\d+x\d+\.', '-500x500.', x)}),
8596
'view_count': ('play_count', {int_or_none}),
86-
'release_year': ('year', {int_or_none}),
8797
'language': ('language', {lambda x: ISO639Utils.short2long(x.casefold()) or 'und'}),
8898
'webpage_url': ('perma_url', {url_or_none}),
99+
'artists': ('more_info', 'artistMap', 'primary_artists', ..., 'name', {str}, filter, all),
89100
})
90-
91-
@staticmethod
92-
def _extract_song(song_data, url=None):
93-
info = JioSaavnBaseIE._extract_common_info(song_data)
94-
info.update(traverse_obj(song_data, {
95-
'title': ('song', {clean_html}),
96-
'album': ('album', {clean_html}),
97-
'duration': ('duration', {int_or_none}),
98-
'release_date': ('release_date', {unified_strdate}),
99-
'channel': ('label', {str}),
100-
'channel_id': ('label_id', {str}),
101-
'channel_url': ('label_url', {urljoin('https://www.jiosaavn.com/')}),
102-
'artists': ('primary_artists', {lambda x: x.split(', ') if x else None}),
103-
}))
104101
if webpage_url := info.get('webpage_url') or url:
105102
info['display_id'] = url_basename(webpage_url)
106103
info['_old_archive_ids'] = [make_archive_id(JioSaavnSongIE, info['display_id'])]
107104

105+
if primary_artists := traverse_obj(song_data, ('primary_artists', {lambda x: x.split(', ') if x else None})):
106+
info['artists'].extend(primary_artists)
107+
if featured_artists := traverse_obj(song_data, ('featured_artists', {str}, filter)):
108+
info['artists'].extend(featured_artists.split(', '))
109+
info['artists'] = orderedSet(info['artists']) or None
110+
108111
return info
109112

110113
@staticmethod
111114
def _extract_episode(episode_data, url=None):
112-
info = JioSaavnBaseIE._extract_common_info(episode_data)
115+
info = JioSaavnBaseIE._extract_song(episode_data, url)
116+
info.pop('_old_archive_ids', None)
113117
info.update(traverse_obj(episode_data, {
114-
'title': ('title', {clean_html}),
115118
'description': ('more_info', 'description', {str}),
116-
'duration': ('more_info', 'duration', {int_or_none}),
117119
'timestamp': ('more_info', 'release_time', {unified_timestamp}),
118-
'channel': ('more_info', 'label', {str}),
119-
'channel_id': ('more_info', 'label_id', {str}),
120-
'channel_url': ('more_info', 'label_url', {urljoin('https://www.jiosaavn.com/')}),
121120
'series': ('more_info', 'show_title', {str}),
122121
'series_id': ('more_info', 'show_id', {str}),
123122
'season': ('more_info', 'season_title', {str}),
124123
'season_number': ('more_info', 'season_no', {int_or_none}),
125124
'season_id': ('more_info', 'season_id', {str}),
126125
'episode_number': ('more_info', 'episode_number', {int_or_none}),
127126
'cast': ('starring', {lambda x: x.split(', ') if x else None}),
128-
'artists': ('more_info', 'artistMap', 'primary_artists', ..., 'name', {str}),
129127
}))
130-
if webpage_url := info.get('webpage_url') or url:
131-
info['display_id'] = url_basename(webpage_url)
132-
133-
if featured_artists := traverse_obj(episode_data, ('featured_artists', {str})):
134-
info.setdefault('artists', []).extend(featured_artists.split(', '))
135-
136128
return info
137129

138130
def _extract_jiosaavn_result(self, url, endpoint, response_key, parse_func):
@@ -152,14 +144,20 @@ def _extract_jiosaavn_result(self, url, endpoint, response_key, parse_func):
152144
result['formats'] = list(self._extract_formats(data))
153145
return result
154146

155-
def _yield_songs(self, playlist_data):
156-
for song_data in traverse_obj(playlist_data, ('songs', lambda _, v: v['id'] and v['perma_url'])):
157-
song_info = self._extract_song(song_data)
158-
url = smuggle_url(song_info['webpage_url'], {
159-
'id': song_data['id'],
160-
'encrypted_media_url': song_data['encrypted_media_url'],
161-
})
162-
yield self.url_result(url, JioSaavnSongIE, url_transparent=True, **song_info)
147+
def _yield_items(self, playlist_data, keys=None, parse_func=None):
148+
"""Subclasses using this method must set _ENTRY_IE"""
149+
if parse_func is None:
150+
parse_func = self._extract_song
151+
152+
for item_data in traverse_obj(playlist_data, (
153+
*variadic(keys, (str, bytes, dict, set)), lambda _, v: v['id'] and v['perma_url'],
154+
)):
155+
info = parse_func(item_data)
156+
url = smuggle_url(info['webpage_url'], traverse_obj(item_data, {
157+
'id': ('id', {str}),
158+
'encrypted_media_url': ((None, 'more_info'), 'encrypted_media_url', {str}, any),
159+
}))
160+
yield self.url_result(url, self._ENTRY_IE, url_transparent=True, **info)
163161

164162

165163
class JioSaavnSongIE(JioSaavnBaseIE):
@@ -244,6 +242,7 @@ class JioSaavnShowIE(JioSaavnBaseIE):
244242
'channel_id': '1953876',
245243
'episode_number': 1,
246244
'upload_date': '20211227',
245+
'release_date': '20211227',
247246
},
248247
}, {
249248
'url': 'https://www.jiosaavn.com/shows/himesh-reshammiya/Kr8fmfSN4vo_',
@@ -265,13 +264,14 @@ class JioSaavnAlbumIE(JioSaavnBaseIE):
265264
},
266265
'playlist_count': 10,
267266
}]
267+
_ENTRY_IE = JioSaavnSongIE
268268

269269
def _real_extract(self, url):
270270
display_id = self._match_id(url)
271271
album_data = self._call_api('album', display_id)
272272

273273
return self.playlist_result(
274-
self._yield_songs(album_data), display_id, traverse_obj(album_data, ('title', {str})))
274+
self._yield_items(album_data, 'songs'), display_id, traverse_obj(album_data, ('title', {str})))
275275

276276

277277
class JioSaavnPlaylistIE(JioSaavnBaseIE):
@@ -290,15 +290,16 @@ class JioSaavnPlaylistIE(JioSaavnBaseIE):
290290
'id': 'DVR,pFUOwyXqIp77B1JF,A__',
291291
'title': 'Mood Hindi',
292292
},
293-
'playlist_mincount': 801,
293+
'playlist_mincount': 750,
294294
}, {
295295
'url': 'https://www.jiosaavn.com/featured/taaza-tunes/Me5RridRfDk_',
296296
'info_dict': {
297297
'id': 'Me5RridRfDk_',
298298
'title': 'Taaza Tunes',
299299
},
300-
'playlist_mincount': 301,
300+
'playlist_mincount': 50,
301301
}]
302+
_ENTRY_IE = JioSaavnSongIE
302303
_PAGE_SIZE = 50
303304

304305
def _fetch_page(self, token, page):
@@ -307,7 +308,7 @@ def _fetch_page(self, token, page):
307308

308309
def _entries(self, token, first_page_data, page):
309310
page_data = first_page_data if not page else self._fetch_page(token, page + 1)
310-
yield from self._yield_songs(page_data)
311+
yield from self._yield_items(page_data, 'songs')
311312

312313
def _real_extract(self, url):
313314
display_id = self._match_id(url)
@@ -330,6 +331,7 @@ class JioSaavnShowPlaylistIE(JioSaavnBaseIE):
330331
},
331332
'playlist_mincount': 11,
332333
}]
334+
_ENTRY_IE = JioSaavnShowIE
333335
_PAGE_SIZE = 10
334336

335337
def _fetch_page(self, show_id, season_id, page):
@@ -342,18 +344,9 @@ def _fetch_page(self, show_id, season_id, page):
342344
'sort_order': 'desc',
343345
})
344346

345-
def _yield_episodes(self, playlist_data):
346-
for episode_data in playlist_data:
347-
episode_info = self._extract_episode(episode_data)
348-
url = smuggle_url(episode_info['webpage_url'], {
349-
'id': episode_data['id'],
350-
'encrypted_media_url': episode_data['more_info']['encrypted_media_url'],
351-
})
352-
yield self.url_result(url, JioSaavnShowIE, url_transparent=True, **episode_info)
353-
354347
def _entries(self, show_id, season_id, page):
355348
page_data = self._fetch_page(show_id, season_id, page + 1)
356-
yield from self._yield_episodes(page_data)
349+
yield from self._yield_items(page_data, keys=None, parse_func=self._extract_episode)
357350

358351
def _real_extract(self, url):
359352
show_slug, season_id = self._match_valid_url(url).group('show', 'season')
@@ -368,3 +361,52 @@ def _real_extract(self, url):
368361
entries = OnDemandPagedList(functools.partial(self._entries, show_id, season_id), self._PAGE_SIZE)
369362
return self.playlist_result(
370363
entries, playlist_id, traverse_obj(show_info, ('show', 'title', 'text', {str})))
364+
365+
366+
class JioSaavnArtistIE(JioSaavnBaseIE):
367+
IE_NAME = 'jiosaavn:artist'
368+
_VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/artist/[^/?#]+/(?P<id>[^/?#]+)'
369+
_TESTS = [{
370+
'url': 'https://www.jiosaavn.com/artist/krsna-songs/rYLBEve2z3U_',
371+
'info_dict': {
372+
'id': 'rYLBEve2z3U_',
373+
'title': 'KR$NA',
374+
},
375+
'playlist_mincount': 38,
376+
}, {
377+
'url': 'https://www.jiosaavn.com/artist/sanam-puri-songs/SkNEv3qRhDE_',
378+
'info_dict': {
379+
'id': 'SkNEv3qRhDE_',
380+
'title': 'Sanam Puri',
381+
},
382+
'playlist_mincount': 51,
383+
}]
384+
_ENTRY_IE = JioSaavnSongIE
385+
_PAGE_SIZE = 50
386+
387+
def _fetch_page(self, artist_id, page):
388+
return self._call_api('artist', artist_id, f'artist page {page + 1}', {
389+
'p': page,
390+
'n_song': self._PAGE_SIZE,
391+
'n_album': self._PAGE_SIZE,
392+
'sub_type': '',
393+
'includeMetaTags': '',
394+
'api_version': '4',
395+
'category': 'alphabetical',
396+
'sort_order': 'asc',
397+
})
398+
399+
def _entries(self, artist_id, first_page):
400+
for page in itertools.count():
401+
playlist_data = first_page if not page else self._fetch_page(artist_id, page)
402+
if not traverse_obj(playlist_data, ('topSongs', ..., {dict})):
403+
break
404+
yield from self._yield_items(playlist_data, 'topSongs')
405+
406+
def _real_extract(self, url):
407+
artist_id = self._match_id(url)
408+
first_page = self._fetch_page(artist_id, 0)
409+
410+
return self.playlist_result(
411+
self._entries(artist_id, first_page), artist_id,
412+
traverse_obj(first_page, ('name', {str})))

0 commit comments

Comments
 (0)