mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-03 23:25:06 +00:00
[porncom] Extract categories and tags (Closes #10510)
This commit is contained in:
parent
196c6ba067
commit
7a3e849f6e
1 changed files with 12 additions and 1 deletions
|
@ -26,6 +26,8 @@ class PornComIE(InfoExtractor):
|
||||||
'duration': 551,
|
'duration': 551,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
'categories': list,
|
||||||
|
'tags': list,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://se.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067',
|
'url': 'http://se.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067',
|
||||||
|
@ -75,7 +77,14 @@ def _real_extract(self, url):
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
view_count = str_to_int(self._search_regex(
|
view_count = str_to_int(self._search_regex(
|
||||||
r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage, 'view count'))
|
r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage,
|
||||||
|
'view count', fatal=False))
|
||||||
|
|
||||||
|
def extract_list(kind):
|
||||||
|
s = self._search_regex(
|
||||||
|
r'(?s)<p[^>]*>%s:(.+?)</p>' % kind.capitalize(),
|
||||||
|
webpage, kind, fatal=False)
|
||||||
|
return re.findall(r'<a[^>]+>([^<]+)</a>', s or '')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -86,4 +95,6 @@ def _real_extract(self, url):
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
'categories': extract_list('categories'),
|
||||||
|
'tags': extract_list('tags'),
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue