From 720b3dc453c342bc2e8df7dbc0acaab4479de46c Mon Sep 17 00:00:00 2001
From: powergold1 <18133986+powergold1@users.noreply.github.com>
Date: Sat, 16 Nov 2024 20:55:40 +0100
Subject: [PATCH] [ie/chaturbate] Extract from API and support impersonation
 (#11555)

Closes #6546, Closes #10359
Authored by: powergold1
---
 yt_dlp/extractor/chaturbate.py | 51 ++++++++++++++++++++++++++++++----
 1 file changed, 45 insertions(+), 6 deletions(-)

diff --git a/yt_dlp/extractor/chaturbate.py b/yt_dlp/extractor/chaturbate.py
index 864d61f9c..aa70f26a1 100644
--- a/yt_dlp/extractor/chaturbate.py
+++ b/yt_dlp/extractor/chaturbate.py
@@ -5,6 +5,7 @@
     ExtractorError,
     lowercase_escape,
     url_or_none,
+    urlencode_postdata,
 )
 
 
@@ -40,14 +41,48 @@ class ChaturbateIE(InfoExtractor):
         'only_matching': True,
     }]
 
-    _ROOM_OFFLINE = 'Room is currently offline'
+    _ERROR_MAP = {
+        'offline': 'Room is currently offline',
+        'private': 'Room is currently in a private show',
+        'away': 'Performer is currently away',
+        'password protected': 'Room is password protected',
+        'hidden': 'Hidden session in progress',
+    }
 
-    def _real_extract(self, url):
-        video_id, tld = self._match_valid_url(url).group('id', 'tld')
+    def _extract_from_api(self, video_id, tld):
+        response = self._download_json(
+            f'https://chaturbate.{tld}/get_edge_hls_url_ajax/', video_id,
+            data=urlencode_postdata({'room_slug': video_id}),
+            headers={
+                **self.geo_verification_headers(),
+                'X-Requested-With': 'XMLHttpRequest',
+                'Accept': 'application/json',
+            }, fatal=False, impersonate=True) or {}
 
+        status = response.get('room_status')
+        if status != 'public':
+            if error := self._ERROR_MAP.get(status):
+                raise ExtractorError(error, expected=True)
+            self.report_warning('Falling back to webpage extraction')
+            return None
+
+        m3u8_url = response.get('url')
+        if not m3u8_url:
+            self.raise_geo_restricted()
+
+        return {
+            'id': video_id,
+            'title': video_id,
+            'thumbnail': f'https://roomimg.stream.highwebmedia.com/ri/{video_id}.jpg',
+            'is_live': True,
+            'age_limit': 18,
+            'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True),
+        }
+
+    def _extract_from_webpage(self, video_id, tld):
         webpage = self._download_webpage(
             f'https://chaturbate.{tld}/{video_id}/', video_id,
-            headers=self.geo_verification_headers())
+            headers=self.geo_verification_headers(), impersonate=True)
 
         found_m3u8_urls = []
 
@@ -85,8 +120,8 @@ def _real_extract(self, url):
                 webpage, 'error', group='error', default=None)
             if not error:
                 if any(p in webpage for p in (
-                        self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')):
-                    error = self._ROOM_OFFLINE
+                        self._ERROR_MAP['offline'], 'offline_tipping', 'tip_offline')):
+                    error = self._ERROR_MAP['offline']
             if error:
                 raise ExtractorError(error, expected=True)
             raise ExtractorError('Unable to find stream URL')
@@ -113,3 +148,7 @@ def _real_extract(self, url):
             'is_live': True,
             'formats': formats,
         }
+
+    def _real_extract(self, url):
+        video_id, tld = self._match_valid_url(url).group('id', 'tld')
+        return self._extract_from_api(video_id, tld) or self._extract_from_webpage(video_id, tld)