diff --git a/yt_dlp/extractor/telegram.py b/yt_dlp/extractor/telegram.py index 2dfa261e9..bb9ca8c45 100644 --- a/yt_dlp/extractor/telegram.py +++ b/yt_dlp/extractor/telegram.py @@ -1,4 +1,5 @@ from .common import InfoExtractor +from ..utils import clean_html, get_element_by_class class TelegramEmbedIE(InfoExtractor): @@ -17,8 +18,8 @@ class TelegramEmbedIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - webpage_embed = self._download_webpage(f'{url}?embed=1', video_id) + webpage = self._download_webpage(url, video_id, query={'embed': 0}) + webpage_embed = self._download_webpage(url, video_id, query={'embed': 1}, note='Downloading ermbed page') formats = [{ 'url': self._proto_relative_url(self._search_regex( @@ -29,9 +30,12 @@ def _real_extract(self, url): return { 'id': video_id, - 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, fatal=True), - 'description': self._html_search_meta(['og:description', 'twitter:description'], webpage, fatal=True), - 'thumbnail': self._search_regex(r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)', - webpage_embed, 'thumbnail'), + 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None), + 'description': self._html_search_meta( + ['og:description', 'twitter:description'], webpage, + default=clean_html(get_element_by_class('tgme_widget_message_text', webpage_embed))), + 'thumbnail': self._search_regex( + r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)', + webpage_embed, 'thumbnail'), 'formats': formats, }