mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-16 01:25:06 +00:00
[udemy] Switch to api 2.0 (Closes #9035)
This commit is contained in:
parent
5299bc3f91
commit
81da8cbc45
1 changed files with 36 additions and 24 deletions
|
@ -17,6 +17,7 @@
|
||||||
int_or_none,
|
int_or_none,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
update_url_query,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -54,6 +55,16 @@ class UdemyIE(InfoExtractor):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _extract_course_info(self, webpage, video_id):
|
||||||
|
course = self._parse_json(
|
||||||
|
unescapeHTML(self._search_regex(
|
||||||
|
r'ng-init=["\'].*\bcourse=({.+?});', webpage, 'course', default='{}')),
|
||||||
|
video_id, fatal=False) or {}
|
||||||
|
course_id = course.get('id') or self._search_regex(
|
||||||
|
(r'"id"\s*:\s*(\d+)', r'data-course-id=["\'](\d+)'),
|
||||||
|
webpage, 'course id')
|
||||||
|
return course_id, course.get('title')
|
||||||
|
|
||||||
def _enroll_course(self, base_url, webpage, course_id):
|
def _enroll_course(self, base_url, webpage, course_id):
|
||||||
def combine_url(base_url, url):
|
def combine_url(base_url, url):
|
||||||
return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url
|
return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url
|
||||||
|
@ -98,7 +109,7 @@ def _handle_error(self, response):
|
||||||
error_str += ' - %s' % error_data.get('formErrors')
|
error_str += ' - %s' % error_data.get('formErrors')
|
||||||
raise ExtractorError(error_str, expected=True)
|
raise ExtractorError(error_str, expected=True)
|
||||||
|
|
||||||
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
|
def _download_json(self, url_or_request, *args, **kwargs):
|
||||||
headers = {
|
headers = {
|
||||||
'X-Udemy-Snail-Case': 'true',
|
'X-Udemy-Snail-Case': 'true',
|
||||||
'X-Requested-With': 'XMLHttpRequest',
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
|
@ -116,7 +127,7 @@ def _download_json(self, url_or_request, video_id, note='Downloading JSON metada
|
||||||
else:
|
else:
|
||||||
url_or_request = sanitized_Request(url_or_request, headers=headers)
|
url_or_request = sanitized_Request(url_or_request, headers=headers)
|
||||||
|
|
||||||
response = super(UdemyIE, self)._download_json(url_or_request, video_id, note)
|
response = super(UdemyIE, self)._download_json(url_or_request, *args, **kwargs)
|
||||||
self._handle_error(response)
|
self._handle_error(response)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
@ -166,9 +177,7 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
webpage = self._download_webpage(url, lecture_id)
|
webpage = self._download_webpage(url, lecture_id)
|
||||||
|
|
||||||
course_id = self._search_regex(
|
course_id, _ = self._extract_course_info(webpage, lecture_id)
|
||||||
(r'data-course-id=["\'](\d+)', r'"id"\s*:\s*(\d+)'),
|
|
||||||
webpage, 'course id')
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
lecture = self._download_lecture(course_id, lecture_id)
|
lecture = self._download_lecture(course_id, lecture_id)
|
||||||
|
@ -309,29 +318,32 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
webpage = self._download_webpage(url, course_path)
|
webpage = self._download_webpage(url, course_path)
|
||||||
|
|
||||||
response = self._download_json(
|
course_id, title = self._extract_course_info(webpage, course_path)
|
||||||
'https://www.udemy.com/api-1.1/courses/%s' % course_path,
|
|
||||||
course_path, 'Downloading course JSON')
|
|
||||||
|
|
||||||
course_id = response['id']
|
|
||||||
course_title = response.get('title')
|
|
||||||
|
|
||||||
self._enroll_course(url, webpage, course_id)
|
self._enroll_course(url, webpage, course_id)
|
||||||
|
|
||||||
|
course_url = update_url_query(
|
||||||
|
'https://www.udemy.com/api-2.0/courses/%s/cached-subscriber-curriculum-items' % course_id,
|
||||||
|
{
|
||||||
|
'fields[chapter]': 'title,object_index',
|
||||||
|
'fields[lecture]': 'title',
|
||||||
|
'page_size': '1000',
|
||||||
|
})
|
||||||
|
|
||||||
response = self._download_json(
|
response = self._download_json(
|
||||||
'https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id,
|
course_url, course_id, 'Downloading course curriculum')
|
||||||
course_id, 'Downloading course curriculum')
|
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
chapter, chapter_number = None, None
|
chapter, chapter_number = [None] * 2
|
||||||
for asset in response:
|
for entry in response['results']:
|
||||||
asset_type = asset.get('assetType') or asset.get('asset_type')
|
clazz = entry.get('_class')
|
||||||
if asset_type == 'Video':
|
if clazz == 'lecture':
|
||||||
asset_id = asset.get('id')
|
lecture_id = entry.get('id')
|
||||||
if asset_id:
|
if lecture_id:
|
||||||
entry = {
|
entry = {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': 'https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']),
|
'url': 'https://www.udemy.com/%s/#/lecture/%s' % (course_path, entry['id']),
|
||||||
|
'title': entry.get('title'),
|
||||||
'ie_key': UdemyIE.ie_key(),
|
'ie_key': UdemyIE.ie_key(),
|
||||||
}
|
}
|
||||||
if chapter_number:
|
if chapter_number:
|
||||||
|
@ -339,8 +351,8 @@ def _real_extract(self, url):
|
||||||
if chapter:
|
if chapter:
|
||||||
entry['chapter'] = chapter
|
entry['chapter'] = chapter
|
||||||
entries.append(entry)
|
entries.append(entry)
|
||||||
elif asset.get('type') == 'chapter':
|
elif clazz == 'chapter':
|
||||||
chapter_number = asset.get('index') or asset.get('object_index')
|
chapter_number = entry.get('object_index')
|
||||||
chapter = asset.get('title')
|
chapter = entry.get('title')
|
||||||
|
|
||||||
return self.playlist_result(entries, course_id, course_title)
|
return self.playlist_result(entries, course_id, title)
|
||||||
|
|
Loading…
Reference in a new issue