mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-10-31 23:02:40 +00:00
[ie/rai] Fix m3u8 formats extraction (#9291)
Closes #887 Authored by: nixxo
This commit is contained in:
parent
804f236611
commit
8f423cf805
1 changed files with 42 additions and 18 deletions
|
@ -28,6 +28,29 @@ class RaiBaseIE(InfoExtractor):
|
||||||
_GEO_COUNTRIES = ['IT']
|
_GEO_COUNTRIES = ['IT']
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
|
|
||||||
|
def _fix_m3u8_formats(self, media_url, video_id):
|
||||||
|
fmts = self._extract_m3u8_formats(
|
||||||
|
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||||
|
|
||||||
|
# Fix malformed m3u8 manifests by setting audio-only/video-only formats
|
||||||
|
for f in fmts:
|
||||||
|
if not f.get('acodec'):
|
||||||
|
f['acodec'] = 'mp4a'
|
||||||
|
if not f.get('vcodec'):
|
||||||
|
f['vcodec'] = 'avc1'
|
||||||
|
man_url = f['url']
|
||||||
|
if re.search(r'chunklist(?:_b\d+)*_ao[_.]', man_url): # audio only
|
||||||
|
f['vcodec'] = 'none'
|
||||||
|
elif re.search(r'chunklist(?:_b\d+)*_vo[_.]', man_url): # video only
|
||||||
|
f['acodec'] = 'none'
|
||||||
|
else: # video+audio
|
||||||
|
if f['acodec'] == 'none':
|
||||||
|
f['acodec'] = 'mp4a'
|
||||||
|
if f['vcodec'] == 'none':
|
||||||
|
f['vcodec'] = 'avc1'
|
||||||
|
|
||||||
|
return fmts
|
||||||
|
|
||||||
def _extract_relinker_info(self, relinker_url, video_id, audio_only=False):
|
def _extract_relinker_info(self, relinker_url, video_id, audio_only=False):
|
||||||
def fix_cdata(s):
|
def fix_cdata(s):
|
||||||
# remove \r\n\t before and after <![CDATA[ ]]> to avoid
|
# remove \r\n\t before and after <![CDATA[ ]]> to avoid
|
||||||
|
@ -69,8 +92,7 @@ def fix_cdata(s):
|
||||||
'format_id': 'https-mp3',
|
'format_id': 'https-mp3',
|
||||||
})
|
})
|
||||||
elif ext == 'm3u8' or 'format=m3u8' in media_url:
|
elif ext == 'm3u8' or 'format=m3u8' in media_url:
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._fix_m3u8_formats(media_url, video_id))
|
||||||
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
|
||||||
elif ext == 'f4m':
|
elif ext == 'f4m':
|
||||||
# very likely no longer needed. Cannot find any url that uses it.
|
# very likely no longer needed. Cannot find any url that uses it.
|
||||||
manifest_url = update_url_query(
|
manifest_url = update_url_query(
|
||||||
|
@ -153,10 +175,10 @@ def get_format_info(tbr):
|
||||||
'format_id': f'https-{tbr}',
|
'format_id': f'https-{tbr}',
|
||||||
'width': format_copy.get('width'),
|
'width': format_copy.get('width'),
|
||||||
'height': format_copy.get('height'),
|
'height': format_copy.get('height'),
|
||||||
'tbr': format_copy.get('tbr'),
|
'tbr': format_copy.get('tbr') or tbr,
|
||||||
'vcodec': format_copy.get('vcodec'),
|
'vcodec': format_copy.get('vcodec') or 'avc1',
|
||||||
'acodec': format_copy.get('acodec'),
|
'acodec': format_copy.get('acodec') or 'mp4a',
|
||||||
'fps': format_copy.get('fps'),
|
'fps': format_copy.get('fps') or 25,
|
||||||
} if format_copy else {
|
} if format_copy else {
|
||||||
'format_id': f'https-{tbr}',
|
'format_id': f'https-{tbr}',
|
||||||
'width': _QUALITY[tbr][0],
|
'width': _QUALITY[tbr][0],
|
||||||
|
@ -245,7 +267,7 @@ class RaiPlayIE(RaiBaseIE):
|
||||||
'series': 'Report',
|
'series': 'Report',
|
||||||
'season': '2013/14',
|
'season': '2013/14',
|
||||||
'subtitles': {'it': 'count:4'},
|
'subtitles': {'it': 'count:4'},
|
||||||
'release_year': 2022,
|
'release_year': 2024,
|
||||||
'episode': 'Espresso nel caffè - 07/04/2014',
|
'episode': 'Espresso nel caffè - 07/04/2014',
|
||||||
'timestamp': 1396919880,
|
'timestamp': 1396919880,
|
||||||
'upload_date': '20140408',
|
'upload_date': '20140408',
|
||||||
|
@ -253,7 +275,7 @@ class RaiPlayIE(RaiBaseIE):
|
||||||
},
|
},
|
||||||
'params': {'skip_download': True},
|
'params': {'skip_download': True},
|
||||||
}, {
|
}, {
|
||||||
# 1080p direct mp4 url
|
# 1080p
|
||||||
'url': 'https://www.raiplay.it/video/2021/11/Blanca-S1E1-Senza-occhi-b1255a4a-8e72-4a2f-b9f3-fc1308e00736.html',
|
'url': 'https://www.raiplay.it/video/2021/11/Blanca-S1E1-Senza-occhi-b1255a4a-8e72-4a2f-b9f3-fc1308e00736.html',
|
||||||
'md5': 'aeda7243115380b2dd5e881fd42d949a',
|
'md5': 'aeda7243115380b2dd5e881fd42d949a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -274,7 +296,7 @@ class RaiPlayIE(RaiBaseIE):
|
||||||
'episode': 'Senza occhi',
|
'episode': 'Senza occhi',
|
||||||
'timestamp': 1637318940,
|
'timestamp': 1637318940,
|
||||||
'upload_date': '20211119',
|
'upload_date': '20211119',
|
||||||
'formats': 'count:12',
|
'formats': 'count:7',
|
||||||
},
|
},
|
||||||
'params': {'skip_download': True},
|
'params': {'skip_download': True},
|
||||||
'expected_warnings': ['Video not available. Likely due to geo-restriction.']
|
'expected_warnings': ['Video not available. Likely due to geo-restriction.']
|
||||||
|
@ -527,7 +549,7 @@ class RaiPlaySoundPlaylistIE(InfoExtractor):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'ilruggitodelconiglio',
|
'id': 'ilruggitodelconiglio',
|
||||||
'title': 'Il Ruggito del Coniglio',
|
'title': 'Il Ruggito del Coniglio',
|
||||||
'description': 'md5:48cff6972435964284614d70474132e6',
|
'description': 'md5:62a627b3a2d0635d08fa8b6e0a04f27e',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 65,
|
'playlist_mincount': 65,
|
||||||
}, {
|
}, {
|
||||||
|
@ -634,19 +656,20 @@ def _real_extract(self, url):
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class RaiNewsIE(RaiIE): # XXX: Do not subclass from concrete IE
|
class RaiNewsIE(RaiBaseIE):
|
||||||
_VALID_URL = rf'https?://(www\.)?rainews\.it/(?!articoli)[^?#]+-(?P<id>{RaiBaseIE._UUID_RE})(?:-[^/?#]+)?\.html'
|
_VALID_URL = rf'https?://(www\.)?rainews\.it/(?!articoli)[^?#]+-(?P<id>{RaiBaseIE._UUID_RE})(?:-[^/?#]+)?\.html'
|
||||||
_EMBED_REGEX = [rf'<iframe[^>]+data-src="(?P<url>/iframe/[^?#]+?{RaiBaseIE._UUID_RE}\.html)']
|
_EMBED_REGEX = [rf'<iframe[^>]+data-src="(?P<url>/iframe/[^?#]+?{RaiBaseIE._UUID_RE}\.html)']
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# new rainews player (#3911)
|
# new rainews player (#3911)
|
||||||
'url': 'https://www.rainews.it/rubriche/24mm/video/2022/05/24mm-del-29052022-12cf645d-1ffd-4220-b27c-07c226dbdecf.html',
|
'url': 'https://www.rainews.it/video/2024/02/membri-della-croce-rossa-evacuano-gli-abitanti-di-un-villaggio-nella-regione-ucraina-di-kharkiv-il-filmato-dallucraina--31e8017c-845c-43f5-9c48-245b43c3a079.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '12cf645d-1ffd-4220-b27c-07c226dbdecf',
|
'id': '31e8017c-845c-43f5-9c48-245b43c3a079',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Puntata del 29/05/2022',
|
'title': 'md5:1e81364b09de4a149042bac3c7d36f0b',
|
||||||
'duration': 1589,
|
'duration': 196,
|
||||||
'upload_date': '20220529',
|
'upload_date': '20240225',
|
||||||
'uploader': 'rainews',
|
'uploader': 'rainews',
|
||||||
|
'formats': 'count:2',
|
||||||
},
|
},
|
||||||
'params': {'skip_download': True},
|
'params': {'skip_download': True},
|
||||||
}, {
|
}, {
|
||||||
|
@ -659,7 +682,8 @@ class RaiNewsIE(RaiIE): # XXX: Do not subclass from concrete IE
|
||||||
'description': 'I film in uscita questa settimana.',
|
'description': 'I film in uscita questa settimana.',
|
||||||
'thumbnail': r're:^https?://.*\.png$',
|
'thumbnail': r're:^https?://.*\.png$',
|
||||||
'duration': 833,
|
'duration': 833,
|
||||||
'upload_date': '20161103'
|
'upload_date': '20161103',
|
||||||
|
'formats': 'count:8',
|
||||||
},
|
},
|
||||||
'params': {'skip_download': True},
|
'params': {'skip_download': True},
|
||||||
'expected_warnings': ['unable to extract player_data'],
|
'expected_warnings': ['unable to extract player_data'],
|
||||||
|
@ -684,7 +708,7 @@ def _real_extract(self, url):
|
||||||
if not relinker_url:
|
if not relinker_url:
|
||||||
# fallback on old implementation for some old content
|
# fallback on old implementation for some old content
|
||||||
try:
|
try:
|
||||||
return self._extract_from_content_id(video_id, url)
|
return RaiIE._real_extract(self, url)
|
||||||
except GeoRestrictedError:
|
except GeoRestrictedError:
|
||||||
raise
|
raise
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
|
|
Loading…
Reference in a new issue