2013-12-16 20:34:41 +00:00
import re
from . common import InfoExtractor
2022-06-10 19:03:54 +00:00
from . . utils import parse_duration , unified_strdate
2013-12-16 20:34:41 +00:00
class RadioFranceIE ( InfoExtractor ) :
_VALID_URL = r ' ^https?://maison \ .radiofrance \ .fr/radiovisions/(?P<id>[^?#]+) '
2014-03-23 16:43:33 +00:00
IE_NAME = ' radiofrance '
2013-12-16 20:34:41 +00:00
_TEST = {
2014-03-23 16:43:33 +00:00
' url ' : ' http://maison.radiofrance.fr/radiovisions/one-one ' ,
' md5 ' : ' bdbb28ace95ed0e04faab32ba3160daf ' ,
' info_dict ' : {
' id ' : ' one-one ' ,
' ext ' : ' ogg ' ,
2016-02-14 09:37:17 +00:00
' title ' : ' One to one ' ,
' description ' : " Plutôt que d ' imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j ' ai l ' intime conviction que la radio continuera d ' être un grand média de proximité pour les auditeurs. " ,
' uploader ' : ' Thomas Hercouët ' ,
2013-12-16 20:34:41 +00:00
} ,
}
def _real_extract ( self , url ) :
2021-08-19 01:41:24 +00:00
m = self . _match_valid_url ( url )
2013-12-16 20:34:41 +00:00
video_id = m . group ( ' id ' )
webpage = self . _download_webpage ( url , video_id )
2014-03-23 16:43:33 +00:00
title = self . _html_search_regex ( r ' <h1>(.*?)</h1> ' , webpage , ' title ' )
2013-12-16 20:34:41 +00:00
description = self . _html_search_regex (
r ' <div class= " bloc_page_wrapper " ><div class= " text " >(.*?)</div> ' ,
2014-03-23 16:43:33 +00:00
webpage , ' description ' , fatal = False )
2013-12-16 20:34:41 +00:00
uploader = self . _html_search_regex (
r ' <div class= " credit " > © (.*?)</div> ' ,
2014-03-23 16:43:33 +00:00
webpage , ' uploader ' , fatal = False )
2013-12-16 20:34:41 +00:00
formats_str = self . _html_search_regex (
r ' class= " jp-jplayer[^ " ]* " data-source= " ([^ " ]+) " > ' ,
2014-03-23 16:43:33 +00:00
webpage , ' audio URLs ' )
2013-12-16 20:34:41 +00:00
formats = [
{
2013-12-17 11:35:16 +00:00
' format_id ' : fm [ 0 ] ,
' url ' : fm [ 1 ] ,
2013-12-16 20:34:41 +00:00
' vcodec ' : ' none ' ,
2021-02-18 22:03:16 +00:00
' quality ' : i ,
2013-12-16 20:34:41 +00:00
}
2014-03-23 16:43:33 +00:00
for i , fm in
enumerate ( re . findall ( r " ([a-z0-9]+) \ s*: \ s* ' ([^ ' ]+) ' " , formats_str ) )
2013-12-16 20:34:41 +00:00
]
return {
' id ' : video_id ,
' title ' : title ,
' formats ' : formats ,
' description ' : description ,
' uploader ' : uploader ,
}
2022-06-10 19:03:54 +00:00
class FranceCultureIE ( InfoExtractor ) :
2022-06-19 01:36:14 +00:00
_VALID_URL = r ' https?://(?:www \ .)?radiofrance \ .fr/(?:franceculture|fip|francemusique|mouv|franceinter)/podcasts/(?:[^?#]+/)?(?P<display_id>[^?#]+)-(?P<id> \ d+)($|[?#]) '
2022-06-10 19:03:54 +00:00
_TESTS = [
{
' url ' : ' https://www.radiofrance.fr/franceculture/podcasts/science-en-questions/la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau-8440487 ' ,
' info_dict ' : {
' id ' : ' 8440487 ' ,
' display_id ' : ' la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau ' ,
' ext ' : ' mp3 ' ,
' title ' : ' La physique d’ Einstein aiderait-elle à comprendre le cerveau ? ' ,
' description ' : ' Existerait-il un pont conceptuel entre la physique de l’ espace-temps et les neurosciences ? ' ,
' thumbnail ' : ' https://cdn.radiofrance.fr/s3/cruiser-production/2022/05/d184e7a3-4827-4494-bf94-04ed7b120db4/1200x630_gettyimages-200171095-001.jpg ' ,
' upload_date ' : ' 20220514 ' ,
' duration ' : 2750 ,
} ,
} ,
2022-06-19 01:36:14 +00:00
{
' url ' : ' https://www.radiofrance.fr/franceinter/podcasts/la-rafle-du-vel-d-hiv-une-affaire-d-etat/les-racines-du-crime-episode-1-3715507 ' ,
' only_matching ' : True ,
}
2022-06-10 19:03:54 +00:00
]
def _real_extract ( self , url ) :
video_id , display_id = self . _match_valid_url ( url ) . group ( ' id ' , ' display_id ' )
webpage = self . _download_webpage ( url , display_id )
# _search_json_ld doesn't correctly handle this. See https://github.com/yt-dlp/yt-dlp/pull/3874#discussion_r891903846
2022-10-03 11:20:27 +00:00
video_data = self . _search_json ( ' ' , webpage , ' audio data ' , display_id , contains_pattern = r ' { \ s* " @type " \ s*: \ s* " AudioObject " .+} ' )
2022-06-10 19:03:54 +00:00
return {
' id ' : video_id ,
' display_id ' : display_id ,
' url ' : video_data [ ' contentUrl ' ] ,
' ext ' : video_data . get ( ' encodingFormat ' ) ,
' vcodec ' : ' none ' if video_data . get ( ' encodingFormat ' ) == ' mp3 ' else None ,
' duration ' : parse_duration ( video_data . get ( ' duration ' ) ) ,
' title ' : self . _html_search_regex ( r ' (?s)<h1[^>]*itemprop= " [^ " ]*name[^ " ]* " [^>]*>(.+?)</h1> ' ,
webpage , ' title ' , default = self . _og_search_title ( webpage ) ) ,
' description ' : self . _html_search_regex (
r ' (?s)<meta name= " description " \ s*content= " ([^ " ]+) ' , webpage , ' description ' , default = None ) ,
' thumbnail ' : self . _og_search_thumbnail ( webpage ) ,
' uploader ' : self . _html_search_regex (
r ' (?s)<span class= " author " >(.*?)</span> ' , webpage , ' uploader ' , default = None ) ,
' upload_date ' : unified_strdate ( self . _search_regex (
r ' " datePublished " \ s*: \ s* " ([^ " ]+) ' , webpage , ' timestamp ' , fatal = False ) )
}