mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-07 06:41:01 +00:00
Merge pull request #6392 from dstftw/generalized-fragmented-fd
Generalized fragmented media file downloader
This commit is contained in:
commit
75e8b2ac87
3 changed files with 165 additions and 124 deletions
|
@ -7,8 +7,7 @@
|
||||||
import time
|
import time
|
||||||
import xml.etree.ElementTree as etree
|
import xml.etree.ElementTree as etree
|
||||||
|
|
||||||
from .common import FileDownloader
|
from .fragment import FragmentFD
|
||||||
from .http import HttpFD
|
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
|
@ -16,8 +15,6 @@
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
struct_pack,
|
struct_pack,
|
||||||
struct_unpack,
|
struct_unpack,
|
||||||
encodeFilename,
|
|
||||||
sanitize_open,
|
|
||||||
xpath_text,
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -226,16 +223,13 @@ def _add_ns(prop):
|
||||||
return '{http://ns.adobe.com/f4m/1.0}%s' % prop
|
return '{http://ns.adobe.com/f4m/1.0}%s' % prop
|
||||||
|
|
||||||
|
|
||||||
class HttpQuietDownloader(HttpFD):
|
class F4mFD(FragmentFD):
|
||||||
def to_screen(self, *args, **kargs):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class F4mFD(FileDownloader):
|
|
||||||
"""
|
"""
|
||||||
A downloader for f4m manifests or AdobeHDS.
|
A downloader for f4m manifests or AdobeHDS.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
FD_NAME = 'f4m'
|
||||||
|
|
||||||
def _get_unencrypted_media(self, doc):
|
def _get_unencrypted_media(self, doc):
|
||||||
media = doc.findall(_add_ns('media'))
|
media = doc.findall(_add_ns('media'))
|
||||||
if not media:
|
if not media:
|
||||||
|
@ -288,7 +282,7 @@ def _parse_bootstrap_node(self, node, base_url):
|
||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
man_url = info_dict['url']
|
man_url = info_dict['url']
|
||||||
requested_bitrate = info_dict.get('tbr')
|
requested_bitrate = info_dict.get('tbr')
|
||||||
self.to_screen('[download] Downloading f4m manifest')
|
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
|
||||||
manifest = self.ydl.urlopen(man_url).read()
|
manifest = self.ydl.urlopen(man_url).read()
|
||||||
|
|
||||||
doc = etree.fromstring(manifest)
|
doc = etree.fromstring(manifest)
|
||||||
|
@ -320,67 +314,20 @@ def real_download(self, filename, info_dict):
|
||||||
# For some akamai manifests we'll need to add a query to the fragment url
|
# For some akamai manifests we'll need to add a query to the fragment url
|
||||||
akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
|
akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
|
||||||
|
|
||||||
self.report_destination(filename)
|
ctx = {
|
||||||
http_dl = HttpQuietDownloader(
|
'filename': filename,
|
||||||
self.ydl,
|
'total_frags': total_frags,
|
||||||
{
|
}
|
||||||
'continuedl': True,
|
|
||||||
'quiet': True,
|
self._prepare_frag_download(ctx)
|
||||||
'noprogress': True,
|
|
||||||
'ratelimit': self.params.get('ratelimit', None),
|
dest_stream = ctx['dest_stream']
|
||||||
'test': self.params.get('test', False),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
tmpfilename = self.temp_name(filename)
|
|
||||||
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
|
|
||||||
|
|
||||||
write_flv_header(dest_stream)
|
write_flv_header(dest_stream)
|
||||||
if not live:
|
if not live:
|
||||||
write_metadata_tag(dest_stream, metadata)
|
write_metadata_tag(dest_stream, metadata)
|
||||||
|
|
||||||
# This dict stores the download progress, it's updated by the progress
|
self._start_frag_download(ctx)
|
||||||
# hook
|
|
||||||
state = {
|
|
||||||
'status': 'downloading',
|
|
||||||
'downloaded_bytes': 0,
|
|
||||||
'frag_index': 0,
|
|
||||||
'frag_count': total_frags,
|
|
||||||
'filename': filename,
|
|
||||||
'tmpfilename': tmpfilename,
|
|
||||||
}
|
|
||||||
start = time.time()
|
|
||||||
|
|
||||||
def frag_progress_hook(s):
|
|
||||||
if s['status'] not in ('downloading', 'finished'):
|
|
||||||
return
|
|
||||||
|
|
||||||
frag_total_bytes = s.get('total_bytes', 0)
|
|
||||||
if s['status'] == 'finished':
|
|
||||||
state['downloaded_bytes'] += frag_total_bytes
|
|
||||||
state['frag_index'] += 1
|
|
||||||
|
|
||||||
estimated_size = (
|
|
||||||
(state['downloaded_bytes'] + frag_total_bytes) /
|
|
||||||
(state['frag_index'] + 1) * total_frags)
|
|
||||||
time_now = time.time()
|
|
||||||
state['total_bytes_estimate'] = estimated_size
|
|
||||||
state['elapsed'] = time_now - start
|
|
||||||
|
|
||||||
if s['status'] == 'finished':
|
|
||||||
progress = self.calc_percent(state['frag_index'], total_frags)
|
|
||||||
else:
|
|
||||||
frag_downloaded_bytes = s['downloaded_bytes']
|
|
||||||
frag_progress = self.calc_percent(frag_downloaded_bytes,
|
|
||||||
frag_total_bytes)
|
|
||||||
progress = self.calc_percent(state['frag_index'], total_frags)
|
|
||||||
progress += frag_progress / float(total_frags)
|
|
||||||
|
|
||||||
state['eta'] = self.calc_eta(
|
|
||||||
start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes)
|
|
||||||
state['speed'] = s.get('speed')
|
|
||||||
self._hook_progress(state)
|
|
||||||
|
|
||||||
http_dl.add_progress_hook(frag_progress_hook)
|
|
||||||
|
|
||||||
frags_filenames = []
|
frags_filenames = []
|
||||||
while fragments_list:
|
while fragments_list:
|
||||||
|
@ -391,9 +338,9 @@ def frag_progress_hook(s):
|
||||||
url += '?' + akamai_pv.strip(';')
|
url += '?' + akamai_pv.strip(';')
|
||||||
if info_dict.get('extra_param_to_segment_url'):
|
if info_dict.get('extra_param_to_segment_url'):
|
||||||
url += info_dict.get('extra_param_to_segment_url')
|
url += info_dict.get('extra_param_to_segment_url')
|
||||||
frag_filename = '%s-%s' % (tmpfilename, name)
|
frag_filename = '%s-%s' % (ctx['tmpfilename'], name)
|
||||||
try:
|
try:
|
||||||
success = http_dl.download(frag_filename, {'url': url})
|
success = ctx['dl'].download(frag_filename, {'url': url})
|
||||||
if not success:
|
if not success:
|
||||||
return False
|
return False
|
||||||
with open(frag_filename, 'rb') as down:
|
with open(frag_filename, 'rb') as down:
|
||||||
|
@ -425,20 +372,9 @@ def frag_progress_hook(s):
|
||||||
msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
|
msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
|
||||||
self.report_warning(msg)
|
self.report_warning(msg)
|
||||||
|
|
||||||
dest_stream.close()
|
self._finish_frag_download(ctx)
|
||||||
|
|
||||||
elapsed = time.time() - start
|
|
||||||
self.try_rename(tmpfilename, filename)
|
|
||||||
for frag_file in frags_filenames:
|
for frag_file in frags_filenames:
|
||||||
os.remove(frag_file)
|
os.remove(frag_file)
|
||||||
|
|
||||||
fsize = os.path.getsize(encodeFilename(filename))
|
|
||||||
self._hook_progress({
|
|
||||||
'downloaded_bytes': fsize,
|
|
||||||
'total_bytes': fsize,
|
|
||||||
'filename': filename,
|
|
||||||
'status': 'finished',
|
|
||||||
'elapsed': elapsed,
|
|
||||||
})
|
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
110
youtube_dl/downloader/fragment.py
Normal file
110
youtube_dl/downloader/fragment.py
Normal file
|
@ -0,0 +1,110 @@
|
||||||
|
from __future__ import division, unicode_literals
|
||||||
|
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
from .common import FileDownloader
|
||||||
|
from .http import HttpFD
|
||||||
|
from ..utils import (
|
||||||
|
encodeFilename,
|
||||||
|
sanitize_open,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class HttpQuietDownloader(HttpFD):
|
||||||
|
def to_screen(self, *args, **kargs):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class FragmentFD(FileDownloader):
|
||||||
|
"""
|
||||||
|
A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _prepare_and_start_frag_download(self, ctx):
|
||||||
|
self._prepare_frag_download(ctx)
|
||||||
|
self._start_frag_download(ctx)
|
||||||
|
|
||||||
|
def _prepare_frag_download(self, ctx):
|
||||||
|
self.to_screen('[%s] Total fragments: %d' % (self.FD_NAME, ctx['total_frags']))
|
||||||
|
self.report_destination(ctx['filename'])
|
||||||
|
dl = HttpQuietDownloader(
|
||||||
|
self.ydl,
|
||||||
|
{
|
||||||
|
'continuedl': True,
|
||||||
|
'quiet': True,
|
||||||
|
'noprogress': True,
|
||||||
|
'ratelimit': self.params.get('ratelimit', None),
|
||||||
|
'test': self.params.get('test', False),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
tmpfilename = self.temp_name(ctx['filename'])
|
||||||
|
dest_stream, tmpfilename = sanitize_open(tmpfilename, 'wb')
|
||||||
|
ctx.update({
|
||||||
|
'dl': dl,
|
||||||
|
'dest_stream': dest_stream,
|
||||||
|
'tmpfilename': tmpfilename,
|
||||||
|
})
|
||||||
|
|
||||||
|
def _start_frag_download(self, ctx):
|
||||||
|
total_frags = ctx['total_frags']
|
||||||
|
# This dict stores the download progress, it's updated by the progress
|
||||||
|
# hook
|
||||||
|
state = {
|
||||||
|
'status': 'downloading',
|
||||||
|
'downloaded_bytes': 0,
|
||||||
|
'frag_index': 0,
|
||||||
|
'frag_count': total_frags,
|
||||||
|
'filename': ctx['filename'],
|
||||||
|
'tmpfilename': ctx['tmpfilename'],
|
||||||
|
}
|
||||||
|
start = time.time()
|
||||||
|
ctx['started'] = start
|
||||||
|
|
||||||
|
def frag_progress_hook(s):
|
||||||
|
if s['status'] not in ('downloading', 'finished'):
|
||||||
|
return
|
||||||
|
|
||||||
|
frag_total_bytes = s.get('total_bytes', 0)
|
||||||
|
if s['status'] == 'finished':
|
||||||
|
state['downloaded_bytes'] += frag_total_bytes
|
||||||
|
state['frag_index'] += 1
|
||||||
|
|
||||||
|
estimated_size = (
|
||||||
|
(state['downloaded_bytes'] + frag_total_bytes) /
|
||||||
|
(state['frag_index'] + 1) * total_frags)
|
||||||
|
time_now = time.time()
|
||||||
|
state['total_bytes_estimate'] = estimated_size
|
||||||
|
state['elapsed'] = time_now - start
|
||||||
|
|
||||||
|
if s['status'] == 'finished':
|
||||||
|
progress = self.calc_percent(state['frag_index'], total_frags)
|
||||||
|
else:
|
||||||
|
frag_downloaded_bytes = s['downloaded_bytes']
|
||||||
|
frag_progress = self.calc_percent(frag_downloaded_bytes,
|
||||||
|
frag_total_bytes)
|
||||||
|
progress = self.calc_percent(state['frag_index'], total_frags)
|
||||||
|
progress += frag_progress / float(total_frags)
|
||||||
|
|
||||||
|
state['eta'] = self.calc_eta(
|
||||||
|
start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes)
|
||||||
|
state['speed'] = s.get('speed')
|
||||||
|
self._hook_progress(state)
|
||||||
|
|
||||||
|
ctx['dl'].add_progress_hook(frag_progress_hook)
|
||||||
|
|
||||||
|
return start
|
||||||
|
|
||||||
|
def _finish_frag_download(self, ctx):
|
||||||
|
ctx['dest_stream'].close()
|
||||||
|
elapsed = time.time() - ctx['started']
|
||||||
|
self.try_rename(ctx['tmpfilename'], ctx['filename'])
|
||||||
|
fsize = os.path.getsize(encodeFilename(ctx['filename']))
|
||||||
|
|
||||||
|
self._hook_progress({
|
||||||
|
'downloaded_bytes': fsize,
|
||||||
|
'total_bytes': fsize,
|
||||||
|
'filename': ctx['filename'],
|
||||||
|
'status': 'finished',
|
||||||
|
'elapsed': elapsed,
|
||||||
|
})
|
|
@ -4,12 +4,11 @@
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
from ..postprocessor.ffmpeg import FFmpegPostProcessor
|
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from ..compat import (
|
from .fragment import FragmentFD
|
||||||
compat_urlparse,
|
|
||||||
compat_urllib_request,
|
from ..compat import compat_urlparse
|
||||||
)
|
from ..postprocessor.ffmpeg import FFmpegPostProcessor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
encodeArgument,
|
encodeArgument,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
|
@ -51,54 +50,50 @@ def real_download(self, filename, info_dict):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
class NativeHlsFD(FileDownloader):
|
class NativeHlsFD(FragmentFD):
|
||||||
""" A more limited implementation that does not require ffmpeg """
|
""" A more limited implementation that does not require ffmpeg """
|
||||||
|
|
||||||
def real_download(self, filename, info_dict):
|
FD_NAME = 'hlsnative'
|
||||||
url = info_dict['url']
|
|
||||||
self.report_destination(filename)
|
|
||||||
tmpfilename = self.temp_name(filename)
|
|
||||||
|
|
||||||
self.to_screen(
|
def real_download(self, filename, info_dict):
|
||||||
'[hlsnative] %s: Downloading m3u8 manifest' % info_dict['id'])
|
man_url = info_dict['url']
|
||||||
data = self.ydl.urlopen(url).read()
|
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
|
||||||
s = data.decode('utf-8', 'ignore')
|
manifest = self.ydl.urlopen(man_url).read()
|
||||||
segment_urls = []
|
|
||||||
|
s = manifest.decode('utf-8', 'ignore')
|
||||||
|
fragment_urls = []
|
||||||
for line in s.splitlines():
|
for line in s.splitlines():
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
if line and not line.startswith('#'):
|
if line and not line.startswith('#'):
|
||||||
segment_url = (
|
segment_url = (
|
||||||
line
|
line
|
||||||
if re.match(r'^https?://', line)
|
if re.match(r'^https?://', line)
|
||||||
else compat_urlparse.urljoin(url, line))
|
else compat_urlparse.urljoin(man_url, line))
|
||||||
segment_urls.append(segment_url)
|
fragment_urls.append(segment_url)
|
||||||
|
# We only download the first fragment during the test
|
||||||
is_test = self.params.get('test', False)
|
if self.params.get('test', False):
|
||||||
remaining_bytes = self._TEST_FILE_SIZE if is_test else None
|
|
||||||
byte_counter = 0
|
|
||||||
with open(tmpfilename, 'wb') as outf:
|
|
||||||
for i, segurl in enumerate(segment_urls):
|
|
||||||
self.to_screen(
|
|
||||||
'[hlsnative] %s: Downloading segment %d / %d' %
|
|
||||||
(info_dict['id'], i + 1, len(segment_urls)))
|
|
||||||
seg_req = compat_urllib_request.Request(segurl)
|
|
||||||
if remaining_bytes is not None:
|
|
||||||
seg_req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))
|
|
||||||
|
|
||||||
segment = self.ydl.urlopen(seg_req).read()
|
|
||||||
if remaining_bytes is not None:
|
|
||||||
segment = segment[:remaining_bytes]
|
|
||||||
remaining_bytes -= len(segment)
|
|
||||||
outf.write(segment)
|
|
||||||
byte_counter += len(segment)
|
|
||||||
if remaining_bytes is not None and remaining_bytes <= 0:
|
|
||||||
break
|
break
|
||||||
|
|
||||||
self._hook_progress({
|
ctx = {
|
||||||
'downloaded_bytes': byte_counter,
|
|
||||||
'total_bytes': byte_counter,
|
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'status': 'finished',
|
'total_frags': len(fragment_urls),
|
||||||
})
|
}
|
||||||
self.try_rename(tmpfilename, filename)
|
|
||||||
|
self._prepare_and_start_frag_download(ctx)
|
||||||
|
|
||||||
|
frags_filenames = []
|
||||||
|
for i, frag_url in enumerate(fragment_urls):
|
||||||
|
frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
|
||||||
|
success = ctx['dl'].download(frag_filename, {'url': frag_url})
|
||||||
|
if not success:
|
||||||
|
return False
|
||||||
|
with open(frag_filename, 'rb') as down:
|
||||||
|
ctx['dest_stream'].write(down.read())
|
||||||
|
frags_filenames.append(frag_filename)
|
||||||
|
|
||||||
|
self._finish_frag_download(ctx)
|
||||||
|
|
||||||
|
for frag_file in frags_filenames:
|
||||||
|
os.remove(frag_file)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
Loading…
Reference in a new issue