Skip to content

Commit 85c8a40

Browse files
bashonlydoe1080
andauthored
[ie] Improve JSON LD thumbnails extraction (#13368)
Authored by: bashonly, doe1080 Co-authored-by: doe1080 <[email protected]>
1 parent 943083e commit 85c8a40

File tree

2 files changed

+17
-3
lines changed

2 files changed

+17
-3
lines changed

test/test_InfoExtractor.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,20 @@ def test_search_json_ld_realworld(self):
314314
},
315315
{},
316316
),
317+
(
318+
# test thumbnail_url key without URL scheme
319+
r'''
320+
<script type="application/ld+json">
321+
{
322+
"@context": "https://schema.org",
323+
"@type": "VideoObject",
324+
"thumbnail_url": "//www.nobelprize.org/images/12693-landscape-medium-gallery.jpg"
325+
}</script>''',
326+
{
327+
'thumbnails': [{'url': 'https://www.nobelprize.org/images/12693-landscape-medium-gallery.jpg'}],
328+
},
329+
{},
330+
),
317331
]
318332
for html, expected_dict, search_json_ld_kwargs in _TESTS:
319333
expect_dict(

yt_dlp/extractor/common.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1675,9 +1675,9 @@ def extract_video_object(e):
16751675
'ext': mimetype2ext(e.get('encodingFormat')),
16761676
'title': unescapeHTML(e.get('name')),
16771677
'description': unescapeHTML(e.get('description')),
1678-
'thumbnails': [{'url': unescapeHTML(url)}
1679-
for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL'))
1680-
if url_or_none(url)],
1678+
'thumbnails': traverse_obj(e, (('thumbnailUrl', 'thumbnailURL', 'thumbnail_url'), (None, ...), {
1679+
'url': ({str}, {unescapeHTML}, {self._proto_relative_url}, {url_or_none}),
1680+
})),
16811681
'duration': parse_duration(e.get('duration')),
16821682
'timestamp': unified_timestamp(e.get('uploadDate')),
16831683
# author can be an instance of 'Organization' or 'Person' types.

0 commit comments

Comments
 (0)