[eroprofile] Add album downloader (#658)

Authored by: jhwgh1968
2024-12-22 06:00:00 +00:00 · 2021-08-10 13:51:12 +00:00 · 2021-08-10 13:51:12 +00:00 · c196640ff1
commit c196640ff1
parent 60c8fc73c6
2 changed files with 41 additions and 1 deletions
--- a/yt_dlp/extractor/eroprofile.py
+++ b/yt_dlp/extractor/eroprofile.py
@ -90,3 +90,40 @@ def _real_extract(self, url):
            'title': title,
            'age_limit': 18,
        })
+
+
+class EroProfileAlbumIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/album/(?P<id>[^/]+)'
+    IE_NAME = 'EroProfile:album'
+
+    _TESTS = [{
+        'url': 'https://www.eroprofile.com/m/videos/album/BBW-2-893',
+        'info_dict': {
+            'id': 'BBW-2-893',
+            'title': 'BBW 2'
+        },
+        'playlist_mincount': 486,
+    },
+    ]
+
+    def _extract_from_page(self, page):
+        for url in re.findall(r'href=".*?(/m/videos/view/[^"]+)"', page):
+            yield self.url_result(f'https://www.eroprofile.com{url}', EroProfileIE.ie_key())
+
+    def _entries(self, playlist_id, first_page):
+        yield from self._extract_from_page(first_page)
+
+        page_urls = re.findall(rf'href=".*?(/m/videos/album/{playlist_id}\?pnum=(\d+))"', first_page)
+
+        for url, n in page_urls[1:]:
+            yield from self._extract_from_page(self._download_webpage(
+                f'https://www.eroprofile.com{url}',
+                playlist_id, note=f'Downloading playlist page {int(n) - 1}'))
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        first_page = self._download_webpage(url, playlist_id, note='Downloading playlist')
+        playlist_title = self._search_regex(
+            r'<title>Album: (.*) - EroProfile</title>', first_page, 'playlist_title')
+
+        return self.playlist_result(self._entries(playlist_id, first_page), playlist_id, playlist_title)
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@ -389,7 +389,10 @@
 from .embedly import EmbedlyIE
 from .engadget import EngadgetIE
 from .eporner import EpornerIE
-from .eroprofile import EroProfileIE
+from .eroprofile import (
+    EroProfileIE,
+    EroProfileAlbumIE,
+)
 from .escapist import EscapistIE
 from .espn import (
    ESPNIE,