mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-25 02:45:12 +00:00
Allow non-ASCII characters in simplified titles(Closes #220)
This commit is contained in:
parent
e092418d8b
commit
af8e8d63f9
2 changed files with 9 additions and 6 deletions
|
@ -16,13 +16,14 @@ def test_simplify_title():
|
||||||
assert u'/' not in youtube_dl._simplify_title(u'abc/de')
|
assert u'/' not in youtube_dl._simplify_title(u'abc/de')
|
||||||
assert u'abc' in youtube_dl._simplify_title(u'abc/de')
|
assert u'abc' in youtube_dl._simplify_title(u'abc/de')
|
||||||
assert u'de' in youtube_dl._simplify_title(u'abc/de')
|
assert u'de' in youtube_dl._simplify_title(u'abc/de')
|
||||||
|
assert u'/' not in youtube_dl._simplify_title(u'abc/de///')
|
||||||
|
|
||||||
assert u'\\' not in youtube_dl._simplify_title(u'abc\\de')
|
assert u'\\' not in youtube_dl._simplify_title(u'abc\\de')
|
||||||
assert u'abc' in youtube_dl._simplify_title(u'abc\\de')
|
assert u'abc' in youtube_dl._simplify_title(u'abc\\de')
|
||||||
assert u'de' in youtube_dl._simplify_title(u'abc\\de')
|
assert u'de' in youtube_dl._simplify_title(u'abc\\de')
|
||||||
|
|
||||||
# TODO: Fix #220
|
assert youtube_dl._simplify_title(u'ä') == u'ä'
|
||||||
#assert youtube_dl._simplify_title(u'ä') == u'ä'
|
assert youtube_dl._simplify_title(u'кириллица') == u'кириллица'
|
||||||
|
|
||||||
# Strip underlines
|
# Strip underlines
|
||||||
assert youtube_dl._simplify_title(u'\'a_') == u'a'
|
assert youtube_dl._simplify_title(u'\'a_') == u'a'
|
||||||
|
|
|
@ -278,7 +278,8 @@ def timeconvert(timestr):
|
||||||
return timestamp
|
return timestamp
|
||||||
|
|
||||||
def _simplify_title(title):
|
def _simplify_title(title):
|
||||||
return re.sub(ur'[^\w\d_\-]+', u'_', title).strip(u'_')
|
expr = re.compile(ur'[^\w\d_\-]+', flags=re.UNICODE)
|
||||||
|
return expr.sub(u'_', title).strip(u'_')
|
||||||
|
|
||||||
class DownloadError(Exception):
|
class DownloadError(Exception):
|
||||||
"""Download Error exception.
|
"""Download Error exception.
|
||||||
|
@ -2937,6 +2938,7 @@ def _real_extract(self, url):
|
||||||
if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
|
if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
|
||||||
basename = url.split('/')[-1]
|
basename = url.split('/')[-1]
|
||||||
title,ext = os.path.splitext(basename)
|
title,ext = os.path.splitext(basename)
|
||||||
|
title = title.decode('UTF-8')
|
||||||
ext = ext.replace('.', '')
|
ext = ext.replace('.', '')
|
||||||
self.report_direct_download(title)
|
self.report_direct_download(title)
|
||||||
info = {
|
info = {
|
||||||
|
@ -3089,9 +3091,9 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
if mobj.group('shortname'):
|
if mobj.group('shortname'):
|
||||||
if mobj.group('shortname') in ('tds', 'thedailyshow'):
|
if mobj.group('shortname') in ('tds', 'thedailyshow'):
|
||||||
url = 'http://www.thedailyshow.com/full-episodes/'
|
url = u'http://www.thedailyshow.com/full-episodes/'
|
||||||
else:
|
else:
|
||||||
url = 'http://www.colbertnation.com/full-episodes/'
|
url = u'http://www.colbertnation.com/full-episodes/'
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
assert mobj is not None
|
assert mobj is not None
|
||||||
|
|
||||||
|
@ -3177,7 +3179,7 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
self._downloader.increment_downloads()
|
self._downloader.increment_downloads()
|
||||||
|
|
||||||
effTitle = showId + '-' + epTitle
|
effTitle = showId + u'-' + epTitle
|
||||||
info = {
|
info = {
|
||||||
'id': shortMediaId,
|
'id': shortMediaId,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
|
Loading…
Reference in a new issue