mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-26 02:55:17 +00:00
Merge branch 'master' into gedi
This commit is contained in:
commit
b662fc8d20
107 changed files with 5025 additions and 2379 deletions
10
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
10
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
|
@ -21,15 +21,15 @@ ## Checklist
|
|||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dlc:
|
||||
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is 2020.10.26. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is 2020.10.31. If it's not, see https://github.com/blackjack4494/yt-dlc on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in https://github.com/blackjack4494/yt-dlc.
|
||||
- Search the bugtracker for similar issues: https://github.com/blackjack4494/yt-dlc. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support
|
||||
- [ ] I've verified that I'm running youtube-dlc version **2020.10.26**
|
||||
- [ ] I've verified that I'm running youtube-dlc version **2020.10.31**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||
|
@ -44,7 +44,7 @@ ## Verbose log
|
|||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dlc version 2020.10.26
|
||||
[debug] youtube-dlc version 2020.10.31
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
|
|
@ -21,15 +21,15 @@ ## Checklist
|
|||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dlc:
|
||||
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is 2020.10.26. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is 2020.10.31. If it's not, see https://github.com/blackjack4494/yt-dlc on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dlc does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://github.com/blackjack4494/yt-dlc. youtube-dlc does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
- Search the bugtracker for similar site support requests: https://github.com/blackjack4494/yt-dlc. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a new site support request
|
||||
- [ ] I've verified that I'm running youtube-dlcc version **2020.10.26**
|
||||
- [ ] I've verified that I'm running youtube-dlcc version **2020.10.31**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||
|
|
|
@ -21,13 +21,13 @@ ## Checklist
|
|||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dlc:
|
||||
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is 2020.10.26. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is 2020.10.31. If it's not, see https://github.com/blackjack4494/yt-dlc on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar site feature requests: https://github.com/blackjack4494/yt-dlc. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a site feature request
|
||||
- [ ] I've verified that I'm running youtube-dlc version **2020.10.26**
|
||||
- [ ] I've verified that I'm running youtube-dlc version **2020.10.31**
|
||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||
|
||||
|
||||
|
|
12
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
12
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
|
@ -21,16 +21,16 @@ ## Checklist
|
|||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dlc:
|
||||
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is 2020.10.26. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is 2020.10.31. If it's not, see https://github.com/blackjack4494/yt-dlc on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Read bugs section in FAQ: http://yt-dl.org/reporting
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in https://github.com/blackjack4494/yt-dlc.
|
||||
- Search the bugtracker for similar issues: https://github.com/blackjack4494/yt-dlc. DO NOT post duplicates.
|
||||
- Read bugs section in FAQ: https://github.com/blackjack4494/yt-dlc
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support issue
|
||||
- [ ] I've verified that I'm running youtube-dlc version **2020.10.26**
|
||||
- [ ] I've verified that I'm running youtube-dlc version **2020.10.31**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||
|
@ -46,7 +46,7 @@ ## Verbose log
|
|||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dlc version 2020.10.26
|
||||
[debug] youtube-dlc version 2020.10.31
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
|
6
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
6
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
|
@ -21,13 +21,13 @@ ## Checklist
|
|||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dlc:
|
||||
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is 2020.10.26. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is 2020.10.31. If it's not, see https://github.com/blackjack4494/yt-dlc on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar feature requests: https://github.com/blackjack4494/yt-dlc. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a feature request
|
||||
- [ ] I've verified that I'm running youtube-dlc version **2020.10.26**
|
||||
- [ ] I've verified that I'm running youtube-dlc version **2020.10.31**
|
||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||
|
||||
|
||||
|
|
6
.github/ISSUE_TEMPLATE/6_question.md
vendored
6
.github/ISSUE_TEMPLATE/6_question.md
vendored
|
@ -21,8 +21,8 @@ ## Checklist
|
|||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- Look through the README (http://yt-dl.org/readme) and FAQ (http://yt-dl.org/faq) for similar questions
|
||||
- Search the bugtracker for similar questions: http://yt-dl.org/search-issues
|
||||
- Look through the README (https://github.com/blackjack4494/yt-dlc) and FAQ (https://github.com/blackjack4494/yt-dlc) for similar questions
|
||||
- Search the bugtracker for similar questions: https://github.com/blackjack4494/yt-dlc
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
|
@ -34,7 +34,7 @@ ## Checklist
|
|||
## Question
|
||||
|
||||
<!--
|
||||
Ask your question in an arbitrary form. Please make sure it's worded well enough to be understood, see https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient.
|
||||
Ask your question in an arbitrary form. Please make sure it's worded well enough to be understood, see https://github.com/blackjack4494/yt-dlc.
|
||||
-->
|
||||
|
||||
WRITE QUESTION HERE
|
||||
|
|
6
.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md
vendored
|
@ -18,10 +18,10 @@ ## Checklist
|
|||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dlc:
|
||||
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is %(version)s. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is %(version)s. If it's not, see https://github.com/blackjack4494/yt-dlc on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in https://github.com/blackjack4494/yt-dlc.
|
||||
- Search the bugtracker for similar issues: https://github.com/blackjack4494/yt-dlc. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
|
|
|
@ -19,10 +19,10 @@ ## Checklist
|
|||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dlc:
|
||||
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is %(version)s. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is %(version)s. If it's not, see https://github.com/blackjack4494/yt-dlc on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dlc does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://github.com/blackjack4494/yt-dlc. youtube-dlc does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
- Search the bugtracker for similar site support requests: https://github.com/blackjack4494/yt-dlc. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
|
|
|
@ -18,8 +18,8 @@ ## Checklist
|
|||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dlc:
|
||||
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is %(version)s. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is %(version)s. If it's not, see https://github.com/blackjack4494/yt-dlc on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar site feature requests: https://github.com/blackjack4494/yt-dlc. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
|
|
8
.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md
vendored
8
.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md
vendored
|
@ -18,11 +18,11 @@ ## Checklist
|
|||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dlc:
|
||||
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is %(version)s. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is %(version)s. If it's not, see https://github.com/blackjack4494/yt-dlc on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Read bugs section in FAQ: http://yt-dl.org/reporting
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in https://github.com/blackjack4494/yt-dlc.
|
||||
- Search the bugtracker for similar issues: https://github.com/blackjack4494/yt-dlc. DO NOT post duplicates.
|
||||
- Read bugs section in FAQ: https://github.com/blackjack4494/yt-dlc
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
|
|
|
@ -19,8 +19,8 @@ ## Checklist
|
|||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dlc:
|
||||
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is %(version)s. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- First of, make sure you are using the latest version of youtube-dlc. Run `youtube-dlc --version` and ensure your version is %(version)s. If it's not, see https://github.com/blackjack4494/yt-dlc on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar feature requests: https://github.com/blackjack4494/yt-dlc. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
|
|
42
.github/workflows/build.yml
vendored
42
.github/workflows/build.yml
vendored
|
@ -20,7 +20,7 @@ jobs:
|
|||
- name: Set up Python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.x'
|
||||
python-version: '3.8'
|
||||
- name: Install packages
|
||||
run: sudo apt-get -y install zip pandoc man
|
||||
- name: Bump version
|
||||
|
@ -57,7 +57,7 @@ jobs:
|
|||
id: sha2_file
|
||||
env:
|
||||
SHA2: ${{ hashFiles('youtube-dlc') }}
|
||||
run: echo "::set-output name=sha2_unix::${env:SHA2}"
|
||||
run: echo "::set-output name=sha2_unix::$SHA2"
|
||||
- name: Install dependencies for pypi
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
|
@ -82,7 +82,7 @@ jobs:
|
|||
- name: Set up Python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.x'
|
||||
python-version: '3.8'
|
||||
- name: Install Requirements
|
||||
run: pip install pyinstaller
|
||||
- name: Bump version
|
||||
|
@ -98,25 +98,25 @@ jobs:
|
|||
upload_url: ${{ needs.build_unix.outputs.upload_url }}
|
||||
asset_path: ./dist/youtube-dlc.exe
|
||||
asset_name: youtube-dlc.exe
|
||||
asset_content_type: application/octet-stream
|
||||
asset_content_type: application/vnd.microsoft.portable-executable
|
||||
- name: Get SHA2-256SUMS for youtube-dlc.exe
|
||||
id: sha2_file_win
|
||||
env:
|
||||
SHA2: ${{ hashFiles('dist/youtube-dlc.exe') }}
|
||||
run: echo "::set-output name=sha2_windows::${env:SHA2}"
|
||||
SHA2_win: ${{ hashFiles('dist/youtube-dlc.exe') }}
|
||||
run: echo "::set-output name=sha2_windows::$SHA2_win"
|
||||
|
||||
build_windows32:
|
||||
|
||||
runs-on: windows-latest
|
||||
|
||||
needs: build_unix
|
||||
needs: [build_unix, build_windows]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python 3.5.4 32-Bit
|
||||
- name: Set up Python 3.4.4 32-Bit
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.5.4'
|
||||
python-version: '3.4.4'
|
||||
architecture: 'x86'
|
||||
- name: Install Requirements for 32 Bit
|
||||
run: pip install pyinstaller==3.5
|
||||
|
@ -133,12 +133,12 @@ jobs:
|
|||
upload_url: ${{ needs.build_unix.outputs.upload_url }}
|
||||
asset_path: ./dist/youtube-dlc_x86.exe
|
||||
asset_name: youtube-dlc_x86.exe
|
||||
asset_content_type: application/octet-stream
|
||||
asset_content_type: application/vnd.microsoft.portable-executable
|
||||
- name: Get SHA2-256SUMS for youtube-dlc_x86.exe
|
||||
id: sha2_file_win32
|
||||
env:
|
||||
SHA2: ${{ hashFiles('dist/youtube-dlc_x86.exe') }}
|
||||
run: echo "::set-output name=sha2_windows32::${env:SHA2}"
|
||||
SHA2_win32: ${{ hashFiles('dist/youtube-dlc_x86.exe') }}
|
||||
run: echo "::set-output name=sha2_windows32::$SHA2_win32"
|
||||
- name: Make SHA2-256SUMS file
|
||||
env:
|
||||
SHA2_WINDOWS: ${{ needs.build_windows.outputs.sha2_windows }}
|
||||
|
@ -146,6 +146,18 @@ jobs:
|
|||
SHA2_UNIX: ${{ needs.build_unix.outputs.sha2_unix }}
|
||||
YTDLC_VERSION: ${{ needs.build_unix.outputs.ytdlc_version }}
|
||||
run: |
|
||||
echo "$SHA2_WINDOWS youtube-dlc.exe" > SHA2-256SUMS
|
||||
echo "$SHA2_WINDOWS32 youtube-dlc32.exe" > SHA2-256SUMS
|
||||
echo "$SHA2_UNIX youtube-dlc" >> SHA2-256SUMS
|
||||
echo "version:${env:YTDLC_VERSION}" >> SHA2-256SUMS
|
||||
echo "youtube-dlc.exe:${env:SHA2_WINDOWS}" >> SHA2-256SUMS
|
||||
echo "youtube-dlc_x86.exe:${env:SHA2_WINDOWS32}" >> SHA2-256SUMS
|
||||
echo "youtube-dlc:${env:SHA2_UNIX}" >> SHA2-256SUMS
|
||||
|
||||
- name: Upload 256SUMS file
|
||||
id: upload-sums
|
||||
uses: actions/upload-release-asset@v1
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
upload_url: ${{ needs.build_unix.outputs.upload_url }}
|
||||
asset_path: ./SHA2-256SUMS
|
||||
asset_name: SHA2-256SUMS
|
||||
asset_content_type: text/plain
|
||||
|
|
14
README.md
14
README.md
|
@ -1,15 +1,15 @@
|
|||
[![Build Status](https://travis-ci.com/blackjack4494/youtube-dlc.svg?branch=master)](https://travis-ci.com/blackjack4494/youtube-dlc)
|
||||
[![Build Status](https://travis-ci.com/blackjack4494/yt-dlc.svg?branch=master)](https://travis-ci.com/blackjack4494/yt-dlc)
|
||||
[![PyPi](https://img.shields.io/pypi/v/youtube-dlc.svg)](https://pypi.org/project/youtube-dlc)
|
||||
[![Downloads](https://pepy.tech/badge/youtube-dlc)](https://pepy.tech/project/youtube-dlc)
|
||||
|
||||
[![Gitter chat](https://img.shields.io/gitter/room/youtube-dlc/community)](https://gitter.im/youtube-dlc)
|
||||
[![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://github.com/blackjack4494/youtube-dlc/blob/master/LICENSE)
|
||||
[![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://github.com/blackjack4494/yt-dlc/blob/master/LICENSE)
|
||||
|
||||
youtube-dlc - download videos from youtube.com or other video platforms.
|
||||
|
||||
youtube-dlc is a fork of youtube-dl with the intention of getting features tested by the community merged in the tool faster, since youtube-dl's development seems to be slowing down. (https://github.com/ytdl-org/youtube-dl/issues/26462)
|
||||
youtube-dlc is a fork of youtube-dl with the intention of getting features tested by the community merged in the tool faster, since youtube-dl's development seems to be slowing down. (https://web.archive.org/web/20201014194602/https://github.com/ytdl-org/youtube-dl/issues/26462)
|
||||
|
||||
- [INSTALLATION](#installation)
|
||||
- [UPDATE](#update)
|
||||
- [DESCRIPTION](#description)
|
||||
- [OPTIONS](#options)
|
||||
- [Network Options:](#network-options)
|
||||
|
@ -44,6 +44,10 @@ # INSTALLATION
|
|||
|
||||
python -m pip install --upgrade youtube-dlc
|
||||
|
||||
If you want to install the current master branch
|
||||
|
||||
python -m pip install git+https://github.com/blackjack4494/yt-dlc
|
||||
|
||||
**UNIX** (Linux, macOS, etc.)
|
||||
Using wget:
|
||||
|
||||
|
@ -213,6 +217,8 @@ ## Video Selection:
|
|||
--download-archive FILE Download only videos not listed in the
|
||||
archive file. Record the IDs of all
|
||||
downloaded videos in it.
|
||||
--break-on-existing Stop the download process after attempting
|
||||
to download a file that's in the archive.
|
||||
--include-ads Download advertisements as well
|
||||
(experimental)
|
||||
|
||||
|
|
|
@ -61,7 +61,7 @@ def build_lazy_ie(ie, name):
|
|||
return s
|
||||
|
||||
|
||||
# find the correct sorting and add the required base classes so that sublcasses
|
||||
# find the correct sorting and add the required base classes so that subclasses
|
||||
# can be correctly created
|
||||
classes = _ALL_CLASSES[:-1]
|
||||
ordered_cls = []
|
||||
|
|
|
@ -59,9 +59,9 @@ # Supported sites
|
|||
- **ARD:mediathek**
|
||||
- **ARDBetaMediathek**
|
||||
- **Arkena**
|
||||
- **arte.tv:+7**
|
||||
- **arte.tv:embed**
|
||||
- **arte.tv:playlist**
|
||||
- **ArteTV**
|
||||
- **ArteTVEmbed**
|
||||
- **ArteTVPlaylist**
|
||||
- **AsianCrush**
|
||||
- **AsianCrushPlaylist**
|
||||
- **AtresPlayer**
|
||||
|
@ -104,12 +104,14 @@ # Supported sites
|
|||
- **BIQLE**
|
||||
- **BitChute**
|
||||
- **BitChuteChannel**
|
||||
- **bitwave.tv**
|
||||
- **BleacherReport**
|
||||
- **BleacherReportCMS**
|
||||
- **blinkx**
|
||||
- **Bloomberg**
|
||||
- **BokeCC**
|
||||
- **BostonGlobe**
|
||||
- **Box**
|
||||
- **Bpb**: Bundeszentrale für politische Bildung
|
||||
- **BR**: Bayerischer Rundfunk
|
||||
- **BravoTV**
|
||||
|
@ -157,6 +159,7 @@ # Supported sites
|
|||
- **Chilloutzone**
|
||||
- **chirbit**
|
||||
- **chirbit:profile**
|
||||
- **cielotv.it**
|
||||
- **Cinchcast**
|
||||
- **Cinemax**
|
||||
- **CiscoLiveSearch**
|
||||
|
@ -424,6 +427,7 @@ # Supported sites
|
|||
- **la7.it**
|
||||
- **laola1tv**
|
||||
- **laola1tv:embed**
|
||||
- **lbry.tv**
|
||||
- **LCI**
|
||||
- **Lcp**
|
||||
- **LcpPlay**
|
||||
|
@ -474,6 +478,7 @@ # Supported sites
|
|||
- **massengeschmack.tv**
|
||||
- **MatchTV**
|
||||
- **MDR**: MDR.DE and KiKA
|
||||
- **MedalTV**
|
||||
- **media.ccc.de**
|
||||
- **media.ccc.de:lists**
|
||||
- **Medialaan**
|
||||
|
@ -582,6 +587,7 @@ # Supported sites
|
|||
- **niconico**: ニコニコ動画
|
||||
- **NiconicoPlaylist**
|
||||
- **Nintendo**
|
||||
- **Nitter**
|
||||
- **njoy**: N-JOY
|
||||
- **njoy:embed**
|
||||
- **NJPWWorld**: 新日本プロレスワールド
|
||||
|
@ -616,6 +622,7 @@ # Supported sites
|
|||
- **Nuvid**
|
||||
- **NYTimes**
|
||||
- **NYTimesArticle**
|
||||
- **NYTimesCooking**
|
||||
- **NZZ**
|
||||
- **ocw.mit.edu**
|
||||
- **OdaTV**
|
||||
|
@ -668,6 +675,8 @@ # Supported sites
|
|||
- **PicartoVod**
|
||||
- **Piksel**
|
||||
- **Pinkbike**
|
||||
- **Pinterest**
|
||||
- **PinterestCollection**
|
||||
- **Pladform**
|
||||
- **Platzi**
|
||||
- **PlatziCourse**
|
||||
|
@ -764,6 +773,7 @@ # Supported sites
|
|||
- **RTVNH**
|
||||
- **RTVS**
|
||||
- **RUHD**
|
||||
- **RumbleEmbed**
|
||||
- **rutube**: Rutube videos
|
||||
- **rutube:channel**: Rutube channels
|
||||
- **rutube:embed**: Rutube embedded videos
|
||||
|
@ -834,12 +844,14 @@ # Supported sites
|
|||
- **SpankBangPlaylist**
|
||||
- **Spankwire**
|
||||
- **Spiegel**
|
||||
- **Spiegel:Article**: Articles on spiegel.de
|
||||
- **Spiegeltv**
|
||||
- **sport.francetvinfo.fr**
|
||||
- **Sport5**
|
||||
- **SportBox**
|
||||
- **SportDeutschland**
|
||||
- **Spreaker**
|
||||
- **SpreakerPage**
|
||||
- **SpreakerShow**
|
||||
- **SpreakerShowPage**
|
||||
- **SpringboardPlatform**
|
||||
- **Sprout**
|
||||
- **sr:mediathek**: Saarländischer Rundfunk
|
||||
|
@ -943,6 +955,7 @@ # Supported sites
|
|||
- **TV2DKBornholmPlay**
|
||||
- **TV4**: tv4.se and tv4play.se
|
||||
- **TV5MondePlus**: TV5MONDE+
|
||||
- **tv8.it**
|
||||
- **TVA**
|
||||
- **TVANouvelles**
|
||||
- **TVANouvellesArticle**
|
||||
|
@ -1057,7 +1070,7 @@ # Supported sites
|
|||
- **vk:wallpost**
|
||||
- **vlive**
|
||||
- **vlive:channel**
|
||||
- **vlive:playlist**
|
||||
- **vlive:post**
|
||||
- **Vodlocker**
|
||||
- **VODPl**
|
||||
- **VODPlatform**
|
||||
|
@ -1146,20 +1159,17 @@ # Supported sites
|
|||
- **YourPorn**
|
||||
- **YourUpload**
|
||||
- **youtube**: YouTube.com
|
||||
- **youtube:channel**: YouTube.com channels
|
||||
- **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication)
|
||||
- **youtube:favorites**: YouTube.com liked videos, ":ytfav" for short (requires authentication)
|
||||
- **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
|
||||
- **youtube:live**: YouTube.com live streams
|
||||
- **youtube:playlist**: YouTube.com playlists
|
||||
- **youtube:playlists**: YouTube.com user/channel playlists
|
||||
- **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
|
||||
- **youtube:search**: YouTube.com searches
|
||||
- **youtube:search:date**: YouTube.com searches, newest videos first
|
||||
- **youtube:search**: YouTube.com searches, "ytsearch" keyword
|
||||
- **youtube:search:date**: YouTube.com searches, newest videos first, "ytsearchdate" keyword
|
||||
- **youtube:search_url**: YouTube.com search URLs
|
||||
- **youtube:show**: YouTube.com (multi-season) shows
|
||||
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
||||
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
||||
- **youtube:subscriptions**: YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)
|
||||
- **youtube:tab**: YouTube.com tab
|
||||
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
||||
- **YoutubeYtUser**: YouTube.com user videos, URL or "ytuser" keyword
|
||||
- **Zapiks**
|
||||
- **Zaq1**
|
||||
- **Zattoo**
|
||||
|
|
|
@ -1 +1 @@
|
|||
py -m PyInstaller youtube_dlc\__main__.py --onefile --name youtube-dlc --version-file win\ver.txt --icon win\icon\cloud.ico
|
||||
py -m PyInstaller youtube_dlc\__main__.py --onefile --name youtube-dlc --version-file win\ver.txt --icon win\icon\cloud.ico --upx-exclude=vcruntime140.dll
|
2
setup.py
2
setup.py
|
@ -66,7 +66,7 @@ def run(self):
|
|||
description=DESCRIPTION,
|
||||
long_description=LONG_DESCRIPTION,
|
||||
# long_description_content_type="text/markdown",
|
||||
url="https://github.com/blackjack4494/youtube-dlc",
|
||||
url="https://github.com/blackjack4494/yt-dlc",
|
||||
packages=find_packages(exclude=("youtube_dl","test",)),
|
||||
#packages=[
|
||||
# 'youtube_dlc',
|
||||
|
|
|
@ -37,7 +37,7 @@
|
|||
"writeinfojson": true,
|
||||
"writesubtitles": false,
|
||||
"allsubtitles": false,
|
||||
"listssubtitles": false,
|
||||
"listsubtitles": false,
|
||||
"socket_timeout": 20,
|
||||
"fixup": "never"
|
||||
}
|
||||
|
|
|
@ -919,6 +919,76 @@ def _real_extract(self, url):
|
|||
self.assertEqual(downloaded['extractor'], 'testex')
|
||||
self.assertEqual(downloaded['extractor_key'], 'TestEx')
|
||||
|
||||
# Test case for https://github.com/ytdl-org/youtube-dl/issues/27064
|
||||
def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self):
|
||||
|
||||
class _YDL(YDL):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(_YDL, self).__init__(*args, **kwargs)
|
||||
|
||||
def trouble(self, s, tb=None):
|
||||
pass
|
||||
|
||||
ydl = _YDL({
|
||||
'format': 'extra',
|
||||
'ignoreerrors': True,
|
||||
})
|
||||
|
||||
class VideoIE(InfoExtractor):
|
||||
_VALID_URL = r'video:(?P<id>\d+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
formats = [{
|
||||
'format_id': 'default',
|
||||
'url': 'url:',
|
||||
}]
|
||||
if video_id == '0':
|
||||
raise ExtractorError('foo')
|
||||
if video_id == '2':
|
||||
formats.append({
|
||||
'format_id': 'extra',
|
||||
'url': TEST_URL,
|
||||
})
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': 'Video %s' % video_id,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
class PlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'playlist:'
|
||||
|
||||
def _entries(self):
|
||||
for n in range(3):
|
||||
video_id = compat_str(n)
|
||||
yield {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': VideoIE.ie_key(),
|
||||
'id': video_id,
|
||||
'url': 'video:%s' % video_id,
|
||||
'title': 'Video Transparent %s' % video_id,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.playlist_result(self._entries())
|
||||
|
||||
ydl.add_info_extractor(VideoIE(ydl))
|
||||
ydl.add_info_extractor(PlaylistIE(ydl))
|
||||
info = ydl.extract_info('playlist:')
|
||||
entries = info['entries']
|
||||
self.assertEqual(len(entries), 3)
|
||||
self.assertTrue(entries[0] is None)
|
||||
self.assertTrue(entries[1] is None)
|
||||
self.assertEqual(len(ydl.downloaded_info_dicts), 1)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(entries[2], downloaded)
|
||||
self.assertEqual(downloaded['url'], TEST_URL)
|
||||
self.assertEqual(downloaded['title'], 'Video Transparent 2')
|
||||
self.assertEqual(downloaded['id'], '2')
|
||||
self.assertEqual(downloaded['extractor'], 'Video')
|
||||
self.assertEqual(downloaded['extractor_key'], 'Video')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -31,45 +31,47 @@ def assertMatch(self, url, ie_list):
|
|||
|
||||
def test_youtube_playlist_matching(self):
|
||||
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
|
||||
assertTab = lambda url: self.assertMatch(url, ['youtube:tab'])
|
||||
assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585
|
||||
assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||
assertPlaylist('PL63F0C78739B09958')
|
||||
assertTab('https://www.youtube.com/AsapSCIENCE')
|
||||
assertTab('https://www.youtube.com/embedded')
|
||||
assertTab('https://www.youtube.com/feed') # Own channel's home page
|
||||
assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||
assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668
|
||||
assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||
assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668
|
||||
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
|
||||
# Top tracks
|
||||
assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101')
|
||||
assertTab('https://www.youtube.com/playlist?list=MCUS.20142101')
|
||||
|
||||
def test_youtube_matching(self):
|
||||
self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
|
||||
self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) # 668
|
||||
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
|
||||
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
|
||||
# self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) # /v/ is no longer valid
|
||||
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
|
||||
self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])
|
||||
|
||||
def test_youtube_channel_matching(self):
|
||||
assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
|
||||
assertChannel = lambda url: self.assertMatch(url, ['youtube:tab'])
|
||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM')
|
||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
|
||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
||||
|
||||
def test_youtube_user_matching(self):
|
||||
self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
|
||||
# def test_youtube_user_matching(self):
|
||||
# self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab'])
|
||||
|
||||
def test_youtube_feeds(self):
|
||||
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
|
||||
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
|
||||
self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
|
||||
self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
|
||||
self.assertMatch('https://www.youtube.com/feed/library', ['youtube:tab'])
|
||||
self.assertMatch('https://www.youtube.com/feed/history', ['youtube:tab'])
|
||||
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab'])
|
||||
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab'])
|
||||
|
||||
def test_youtube_show_matching(self):
|
||||
self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])
|
||||
|
||||
def test_youtube_search_matching(self):
|
||||
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
||||
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
||||
# def test_youtube_search_matching(self):
|
||||
# self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
||||
# self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
||||
|
||||
def test_youtube_extract(self):
|
||||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
||||
|
|
|
@ -937,6 +937,28 @@ def test_js_to_json_edgecases(self):
|
|||
self.assertEqual(d['x'], 1)
|
||||
self.assertEqual(d['y'], 'a')
|
||||
|
||||
# Just drop ! prefix for now though this results in a wrong value
|
||||
on = js_to_json('''{
|
||||
a: !0,
|
||||
b: !1,
|
||||
c: !!0,
|
||||
d: !!42.42,
|
||||
e: !!![],
|
||||
f: !"abc",
|
||||
g: !"",
|
||||
!42: 42
|
||||
}''')
|
||||
self.assertEqual(json.loads(on), {
|
||||
'a': 0,
|
||||
'b': 1,
|
||||
'c': 0,
|
||||
'd': 42.42,
|
||||
'e': [],
|
||||
'f': "abc",
|
||||
'g': "",
|
||||
'42': 42
|
||||
})
|
||||
|
||||
on = js_to_json('["abc", "def",]')
|
||||
self.assertEqual(json.loads(on), ['abc', 'def'])
|
||||
|
||||
|
@ -994,6 +1016,12 @@ def test_js_to_json_edgecases(self):
|
|||
on = js_to_json('{42:4.2e1}')
|
||||
self.assertEqual(json.loads(on), {'42': 42.0})
|
||||
|
||||
on = js_to_json('{ "0x40": "0x40" }')
|
||||
self.assertEqual(json.loads(on), {'0x40': '0x40'})
|
||||
|
||||
on = js_to_json('{ "040": "040" }')
|
||||
self.assertEqual(json.loads(on), {'040': '040'})
|
||||
|
||||
def test_js_to_json_malformed(self):
|
||||
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
||||
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
||||
|
|
|
@ -210,6 +210,8 @@ class YoutubeDL(object):
|
|||
download_archive: File name of a file where all downloads are recorded.
|
||||
Videos already present in the file are not downloaded
|
||||
again.
|
||||
break_on_existing: Stop the download process after attempting to download a file that's
|
||||
in the archive.
|
||||
cookiefile: File name where cookies should be read from and dumped to.
|
||||
nocheckcertificate:Do not verify SSL certificates
|
||||
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
|
||||
|
@ -801,7 +803,7 @@ def add_extra_info(info_dict, extra_info):
|
|||
for key, value in extra_info.items():
|
||||
info_dict.setdefault(key, value)
|
||||
|
||||
def extract_info(self, url, download=True, ie_key=None, extra_info={},
|
||||
def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
|
||||
process=True, force_generic_extractor=False):
|
||||
'''
|
||||
Returns a list with a dictionary for each video we find.
|
||||
|
@ -821,26 +823,30 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={},
|
|||
if not ie.suitable(url):
|
||||
continue
|
||||
|
||||
ie = self.get_info_extractor(ie.ie_key())
|
||||
ie_key = ie.ie_key()
|
||||
ie = self.get_info_extractor(ie_key)
|
||||
if not ie.working():
|
||||
self.report_warning('The program functionality for this site has been marked as broken, '
|
||||
'and will probably not work.')
|
||||
|
||||
try:
|
||||
ie_result = ie.extract(url)
|
||||
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
|
||||
break
|
||||
if isinstance(ie_result, list):
|
||||
# Backwards compatibility: old IE result format
|
||||
ie_result = {
|
||||
'_type': 'compat_list',
|
||||
'entries': ie_result,
|
||||
}
|
||||
self.add_default_extra_info(ie_result, ie, url)
|
||||
if process:
|
||||
return self.process_ie_result(ie_result, download, extra_info)
|
||||
else:
|
||||
return ie_result
|
||||
temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
|
||||
except (AssertionError, IndexError, AttributeError):
|
||||
temp_id = None
|
||||
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
|
||||
self.to_screen("[%s] %s: has already been recorded in archive" % (
|
||||
ie_key, temp_id))
|
||||
break
|
||||
|
||||
return self.__extract_info(url, ie, download, extra_info, process, info_dict)
|
||||
|
||||
else:
|
||||
self.report_error('no suitable InfoExtractor for URL %s' % url)
|
||||
|
||||
def __handle_extraction_exceptions(func):
|
||||
def wrapper(self, *args, **kwargs):
|
||||
try:
|
||||
return func(self, *args, **kwargs)
|
||||
except GeoRestrictedError as e:
|
||||
msg = e.msg
|
||||
if e.countries:
|
||||
|
@ -848,20 +854,38 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={},
|
|||
map(ISO3166Utils.short2full, e.countries))
|
||||
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
|
||||
self.report_error(msg)
|
||||
break
|
||||
except ExtractorError as e: # An error we somewhat expected
|
||||
self.report_error(compat_str(e), e.format_traceback())
|
||||
break
|
||||
except MaxDownloadsReached:
|
||||
raise
|
||||
except Exception as e:
|
||||
if self.params.get('ignoreerrors', False):
|
||||
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
|
||||
break
|
||||
else:
|
||||
raise
|
||||
return wrapper
|
||||
|
||||
@__handle_extraction_exceptions
|
||||
def __extract_info(self, url, ie, download, extra_info, process, info_dict):
|
||||
ie_result = ie.extract(url)
|
||||
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
|
||||
return
|
||||
if isinstance(ie_result, list):
|
||||
# Backwards compatibility: old IE result format
|
||||
ie_result = {
|
||||
'_type': 'compat_list',
|
||||
'entries': ie_result,
|
||||
}
|
||||
if info_dict:
|
||||
if info_dict.get('id'):
|
||||
ie_result['id'] = info_dict['id']
|
||||
if info_dict.get('title'):
|
||||
ie_result['title'] = info_dict['title']
|
||||
self.add_default_extra_info(ie_result, ie, url)
|
||||
if process:
|
||||
return self.process_ie_result(ie_result, download, extra_info)
|
||||
else:
|
||||
self.report_error('no suitable InfoExtractor for URL %s' % url)
|
||||
return ie_result
|
||||
|
||||
def add_default_extra_info(self, ie_result, ie, url):
|
||||
self.add_extra_info(ie_result, {
|
||||
|
@ -898,7 +922,7 @@ def process_ie_result(self, ie_result, download=True, extra_info={}):
|
|||
# We have to add extra_info to the results because it may be
|
||||
# contained in a playlist
|
||||
return self.extract_info(ie_result['url'],
|
||||
download,
|
||||
download, info_dict=ie_result,
|
||||
ie_key=ie_result.get('ie_key'),
|
||||
extra_info=extra_info)
|
||||
elif result_type == 'url_transparent':
|
||||
|
@ -1033,12 +1057,15 @@ def report_download(num_entries):
|
|||
|
||||
reason = self._match_entry(entry, incomplete=True)
|
||||
if reason is not None:
|
||||
self.to_screen('[download] ' + reason)
|
||||
continue
|
||||
if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
|
||||
print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')
|
||||
break
|
||||
else:
|
||||
self.to_screen('[download] ' + reason)
|
||||
continue
|
||||
|
||||
entry_result = self.process_ie_result(entry,
|
||||
download=download,
|
||||
extra_info=extra)
|
||||
entry_result = self.__process_iterable_entry(entry, download, extra)
|
||||
# TODO: skip failed (empty) entries?
|
||||
playlist_results.append(entry_result)
|
||||
ie_result['entries'] = playlist_results
|
||||
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
|
||||
|
@ -1067,6 +1094,11 @@ def _fixup(r):
|
|||
else:
|
||||
raise Exception('Invalid result type: %s' % result_type)
|
||||
|
||||
@__handle_extraction_exceptions
|
||||
def __process_iterable_entry(self, entry, download, extra_info):
|
||||
return self.process_ie_result(
|
||||
entry, download=download, extra_info=extra_info)
|
||||
|
||||
def _build_format_filter(self, filter_spec):
|
||||
" Returns a function to filter the formats according to the filter_spec "
|
||||
|
||||
|
@ -1852,13 +1884,13 @@ def ensure_dir_exists(path):
|
|||
self.report_error('Cannot write annotations file: ' + annofn)
|
||||
return
|
||||
|
||||
def dl(name, info):
|
||||
def dl(name, info, subtitle=False):
|
||||
fd = get_suitable_downloader(info, self.params)(self, self.params)
|
||||
for ph in self._progress_hooks:
|
||||
fd.add_progress_hook(ph)
|
||||
if self.params.get('verbose'):
|
||||
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
|
||||
return fd.download(name, info)
|
||||
return fd.download(name, info, subtitle)
|
||||
|
||||
subtitles_are_requested = any([self.params.get('writesubtitles', False),
|
||||
self.params.get('writeautomaticsub')])
|
||||
|
@ -1867,7 +1899,7 @@ def dl(name, info):
|
|||
# subtitles download errors are already managed as troubles in relevant IE
|
||||
# that way it will silently go on when used with unsupporting IE
|
||||
subtitles = info_dict['requested_subtitles']
|
||||
ie = self.get_info_extractor(info_dict['extractor_key'])
|
||||
# ie = self.get_info_extractor(info_dict['extractor_key'])
|
||||
for sub_lang, sub_info in subtitles.items():
|
||||
sub_format = sub_info['ext']
|
||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
|
||||
|
@ -1886,6 +1918,8 @@ def dl(name, info):
|
|||
return
|
||||
else:
|
||||
try:
|
||||
dl(sub_filename, sub_info, subtitle=True)
|
||||
'''
|
||||
if self.params.get('sleep_interval_subtitles', False):
|
||||
dl(sub_filename, sub_info)
|
||||
else:
|
||||
|
@ -1893,6 +1927,7 @@ def dl(name, info):
|
|||
sub_info['url'], info_dict['id'], note=False).read()
|
||||
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
|
||||
subfile.write(sub_data)
|
||||
'''
|
||||
except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self.report_warning('Unable to download subtitle for "%s": %s' %
|
||||
(sub_lang, error_to_compat_str(err)))
|
||||
|
|
|
@ -405,6 +405,7 @@ def parse_retries(retries):
|
|||
'youtube_print_sig_code': opts.youtube_print_sig_code,
|
||||
'age_limit': opts.age_limit,
|
||||
'download_archive': download_archive_fn,
|
||||
'break_on_existing': opts.break_on_existing,
|
||||
'cookiefile': opts.cookiefile,
|
||||
'nocheckcertificate': opts.no_check_certificate,
|
||||
'prefer_insecure': opts.prefer_insecure,
|
||||
|
|
|
@ -2345,7 +2345,7 @@ def __init__(self, version, name, value, *args, **kwargs):
|
|||
|
||||
# HTMLParseError has been deprecated in Python 3.3 and removed in
|
||||
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
|
||||
# and uniform cross-version exceptiong handling
|
||||
# and uniform cross-version exception handling
|
||||
class compat_HTMLParseError(Exception):
|
||||
pass
|
||||
|
||||
|
|
|
@ -326,7 +326,7 @@ def report_unable_to_resume(self):
|
|||
"""Report it was impossible to resume download."""
|
||||
self.to_screen('[download] Unable to resume')
|
||||
|
||||
def download(self, filename, info_dict):
|
||||
def download(self, filename, info_dict, subtitle=False):
|
||||
"""Download to a filename using the info from info_dict
|
||||
Return True on success and False otherwise
|
||||
"""
|
||||
|
@ -353,16 +353,25 @@ def download(self, filename, info_dict):
|
|||
})
|
||||
return True
|
||||
|
||||
min_sleep_interval = self.params.get('sleep_interval')
|
||||
if min_sleep_interval:
|
||||
max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
|
||||
sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
|
||||
self.to_screen(
|
||||
'[download] Sleeping %s seconds...' % (
|
||||
int(sleep_interval) if sleep_interval.is_integer()
|
||||
else '%.2f' % sleep_interval))
|
||||
time.sleep(sleep_interval)
|
||||
|
||||
if subtitle is False:
|
||||
min_sleep_interval = self.params.get('sleep_interval')
|
||||
if min_sleep_interval:
|
||||
max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
|
||||
sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
|
||||
self.to_screen(
|
||||
'[download] Sleeping %s seconds...' % (
|
||||
int(sleep_interval) if sleep_interval.is_integer()
|
||||
else '%.2f' % sleep_interval))
|
||||
time.sleep(sleep_interval)
|
||||
else:
|
||||
sleep_interval_sub = 0
|
||||
if type(self.params.get('sleep_interval_subtitles')) is int:
|
||||
sleep_interval_sub = self.params.get('sleep_interval_subtitles')
|
||||
if sleep_interval_sub > 0:
|
||||
self.to_screen(
|
||||
'[download] Sleeping %s seconds...' % (
|
||||
sleep_interval_sub))
|
||||
time.sleep(sleep_interval_sub)
|
||||
return self.real_download(filename, info_dict)
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
|
|
|
@ -115,8 +115,10 @@ class CurlFD(ExternalFD):
|
|||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '--location', '-o', tmpfilename]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
|
||||
cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
|
||||
cmd += self._valueless_option('--silent', 'noprogress')
|
||||
cmd += self._valueless_option('--verbose', 'verbose')
|
||||
|
@ -150,8 +152,9 @@ class AxelFD(ExternalFD):
|
|||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-o', tmpfilename]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['-H', '%s: %s' % (key, val)]
|
||||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['-H', '%s: %s' % (key, val)]
|
||||
cmd += self._configuration_args()
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
@ -162,8 +165,9 @@ class WgetFD(ExternalFD):
|
|||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._option('--limit-rate', 'ratelimit')
|
||||
retry = self._option('--tries', 'retries')
|
||||
if len(retry) == 2:
|
||||
|
@ -189,8 +193,9 @@ def _make_cmd(self, tmpfilename, info_dict):
|
|||
if dn:
|
||||
cmd += ['--dir', dn]
|
||||
cmd += ['--out', os.path.basename(tmpfilename)]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._option('--interface', 'source_address')
|
||||
cmd += self._option('--all-proxy', 'proxy')
|
||||
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
|
||||
|
@ -206,8 +211,10 @@ def available(cls):
|
|||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['%s:%s' % (key, val)]
|
||||
|
||||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['%s:%s' % (key, val)]
|
||||
return cmd
|
||||
|
||||
|
||||
|
@ -253,7 +260,7 @@ def _call_downloader(self, tmpfilename, info_dict):
|
|||
# if end_time:
|
||||
# args += ['-t', compat_str(end_time - start_time)]
|
||||
|
||||
if info_dict['http_headers'] and re.match(r'^https?://', url):
|
||||
if info_dict.get('http_headers') is not None and re.match(r'^https?://', url):
|
||||
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
|
||||
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
|
||||
headers = handle_youtubedl_headers(info_dict['http_headers'])
|
||||
|
|
|
@ -97,12 +97,15 @@ def _write_ytdl_file(self, ctx):
|
|||
|
||||
def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
|
||||
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
|
||||
success = ctx['dl'].download(fragment_filename, {
|
||||
fragment_info_dict = {
|
||||
'url': frag_url,
|
||||
'http_headers': headers or info_dict.get('http_headers'),
|
||||
})
|
||||
}
|
||||
success = ctx['dl'].download(fragment_filename, fragment_info_dict)
|
||||
if not success:
|
||||
return False, None
|
||||
if fragment_info_dict.get('filetime'):
|
||||
ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
|
||||
down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
|
||||
ctx['fragment_filename_sanitized'] = frag_sanitized
|
||||
frag_content = down.read()
|
||||
|
@ -258,6 +261,13 @@ def _finish_frag_download(self, ctx):
|
|||
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
|
||||
else:
|
||||
self.try_rename(ctx['tmpfilename'], ctx['filename'])
|
||||
if self.params.get('updatetime', True):
|
||||
filetime = ctx.get('fragment_filetime')
|
||||
if filetime:
|
||||
try:
|
||||
os.utime(ctx['filename'], (time.time(), filetime))
|
||||
except Exception:
|
||||
pass
|
||||
downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
|
||||
|
||||
self._hook_progress({
|
||||
|
|
|
@ -109,7 +109,9 @@ def establish_connection():
|
|||
try:
|
||||
ctx.data = self.ydl.urlopen(request)
|
||||
except (compat_urllib_error.URLError, ) as err:
|
||||
if isinstance(err.reason, socket.timeout):
|
||||
# reason may not be available, e.g. for urllib2.HTTPError on python 2.6
|
||||
reason = getattr(err, 'reason', None)
|
||||
if isinstance(reason, socket.timeout):
|
||||
raise RetryDownload(err)
|
||||
raise err
|
||||
# When trying to resume, Content-Range HTTP header of response has to be checked
|
||||
|
|
|
@ -82,7 +82,10 @@ def parse_yt_initial_data(data):
|
|||
offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
|
||||
processed_fragment.extend(
|
||||
json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
|
||||
continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation']
|
||||
try:
|
||||
continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation']
|
||||
except KeyError:
|
||||
continuation_id = None
|
||||
|
||||
self._append_fragment(ctx, processed_fragment)
|
||||
|
||||
|
|
|
@ -1438,6 +1438,13 @@ def extract_redirect_url(html, url=None, fatal=False):
|
|||
provider_redirect_page, 'oauth redirect')
|
||||
self._download_webpage(
|
||||
oauth_redirect_url, video_id, 'Confirming auto login')
|
||||
elif 'automatically signed in with' in provider_redirect_page:
|
||||
# Seems like comcast is rolling up new way of automatically signing customers
|
||||
oauth_redirect_url = self._html_search_regex(
|
||||
r'continue:\s*"(https://oauth.xfinity.com/oauth/authorize\?.+)"', provider_redirect_page,
|
||||
'oauth redirect (signed)')
|
||||
# Just need to process the request. No useful data comes back
|
||||
self._download_webpage(oauth_redirect_url, video_id, 'Confirming auto login')
|
||||
else:
|
||||
if '<form name="signin"' in provider_redirect_page:
|
||||
provider_login_page_res = provider_redirect_page_res
|
||||
|
|
|
@ -275,7 +275,7 @@ def _real_extract(self, url):
|
|||
video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
|
||||
if video_element is None or video_element.text is None:
|
||||
raise ExtractorError(
|
||||
'Video %s video does not exist' % video_id, expected=True)
|
||||
'Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
video_url = video_element.text.strip()
|
||||
|
||||
|
|
103
youtube_dlc/extractor/amara.py
Normal file
103
youtube_dlc/extractor/amara.py
Normal file
|
@ -0,0 +1,103 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from .vimeo import VimeoIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class AmaraIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
# Youtube
|
||||
'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
|
||||
'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
|
||||
'info_dict': {
|
||||
'id': 'h6ZuVdvYnfE',
|
||||
'ext': 'mp4',
|
||||
'title': 'Why jury trials are becoming less common',
|
||||
'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'subtitles': dict,
|
||||
'upload_date': '20160813',
|
||||
'uploader': 'PBS NewsHour',
|
||||
'uploader_id': 'PBSNewsHour',
|
||||
'timestamp': 1549639570,
|
||||
}
|
||||
}, {
|
||||
# Vimeo
|
||||
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
|
||||
'md5': '99392c75fa05d432a8f11df03612195e',
|
||||
'info_dict': {
|
||||
'id': '18622084',
|
||||
'ext': 'mov',
|
||||
'title': 'Vimeo at CES 2011!',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'subtitles': dict,
|
||||
'timestamp': 1294763658,
|
||||
'upload_date': '20110111',
|
||||
'uploader': 'Sam Morrill',
|
||||
'uploader_id': 'sammorrill'
|
||||
}
|
||||
}, {
|
||||
# Direct Link
|
||||
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
|
||||
'md5': 'd3970f08512738ee60c5807311ff5d3f',
|
||||
'info_dict': {
|
||||
'id': 's8KL7I3jLmh6',
|
||||
'ext': 'mp4',
|
||||
'title': 'The danger of a single story',
|
||||
'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'subtitles': dict,
|
||||
'upload_date': '20091007',
|
||||
'timestamp': 1254942511,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
meta = self._download_json(
|
||||
'https://amara.org/api/videos/%s/' % video_id,
|
||||
video_id, query={'format': 'json'})
|
||||
title = meta['title']
|
||||
video_url = meta['all_urls'][0]
|
||||
|
||||
subtitles = {}
|
||||
for language in (meta.get('languages') or []):
|
||||
subtitles_uri = language.get('subtitles_uri')
|
||||
if not (subtitles_uri and language.get('published')):
|
||||
continue
|
||||
subtitle = subtitles.setdefault(language.get('code') or 'en', [])
|
||||
for f in ('json', 'srt', 'vtt'):
|
||||
subtitle.append({
|
||||
'ext': f,
|
||||
'url': update_url_query(subtitles_uri, {'format': f}),
|
||||
})
|
||||
|
||||
info = {
|
||||
'url': video_url,
|
||||
'id': video_id,
|
||||
'subtitles': subtitles,
|
||||
'title': title,
|
||||
'description': meta.get('description'),
|
||||
'thumbnail': meta.get('thumbnail'),
|
||||
'duration': int_or_none(meta.get('duration')),
|
||||
'timestamp': parse_iso8601(meta.get('created')),
|
||||
}
|
||||
|
||||
for ie in (YoutubeIE, VimeoIE):
|
||||
if ie.suitable(video_url):
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': ie.ie_key(),
|
||||
})
|
||||
break
|
||||
|
||||
return info
|
|
@ -4,23 +4,57 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
# There are different sources of video in arte.tv, the extraction process
|
||||
# is different for each one. The videos usually expire in 7 days, so we can't
|
||||
# add tests.
|
||||
|
||||
|
||||
class ArteTVBaseIE(InfoExtractor):
|
||||
def _extract_from_json_url(self, json_url, video_id, lang, title=None):
|
||||
info = self._download_json(json_url, video_id)
|
||||
_ARTE_LANGUAGES = 'fr|de|en|es|it|pl'
|
||||
_API_BASE = 'https://api.arte.tv/api/player/v1'
|
||||
|
||||
|
||||
class ArteTVIE(ArteTVBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
|
||||
api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
|
||||
)
|
||||
/(?P<id>\d{6}-\d{3}-[AF])
|
||||
''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
|
||||
_TESTS = [{
|
||||
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
|
||||
'info_dict': {
|
||||
'id': '088501-000-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mexico: Stealing Petrol to Survive',
|
||||
'upload_date': '20190628',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
lang = mobj.group('lang') or mobj.group('lang_2')
|
||||
|
||||
info = self._download_json(
|
||||
'%s/config/%s/%s' % (self._API_BASE, lang, video_id), video_id)
|
||||
player_info = info['videoJsonPlayer']
|
||||
|
||||
vsr = try_get(player_info, lambda x: x['VSR'], dict)
|
||||
|
@ -37,18 +71,11 @@ def _extract_from_json_url(self, json_url, video_id, lang, title=None):
|
|||
if not upload_date_str:
|
||||
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
|
||||
|
||||
title = (player_info.get('VTI') or title or player_info['VID']).strip()
|
||||
title = (player_info.get('VTI') or player_info['VID']).strip()
|
||||
subtitle = player_info.get('VSU', '').strip()
|
||||
if subtitle:
|
||||
title += ' - %s' % subtitle
|
||||
|
||||
info_dict = {
|
||||
'id': player_info['VID'],
|
||||
'title': title,
|
||||
'description': player_info.get('VDE'),
|
||||
'upload_date': unified_strdate(upload_date_str),
|
||||
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
||||
}
|
||||
qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])
|
||||
|
||||
LANGS = {
|
||||
|
@ -65,6 +92,10 @@ def _extract_from_json_url(self, json_url, video_id, lang, title=None):
|
|||
formats = []
|
||||
for format_id, format_dict in vsr.items():
|
||||
f = dict(format_dict)
|
||||
format_url = url_or_none(f.get('url'))
|
||||
streamer = f.get('streamer')
|
||||
if not format_url and not streamer:
|
||||
continue
|
||||
versionCode = f.get('versionCode')
|
||||
l = re.escape(langcode)
|
||||
|
||||
|
@ -107,6 +138,16 @@ def _extract_from_json_url(self, json_url, video_id, lang, title=None):
|
|||
else:
|
||||
lang_pref = -1
|
||||
|
||||
media_type = f.get('mediaType')
|
||||
if media_type == 'hls':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False)
|
||||
for m3u8_format in m3u8_formats:
|
||||
m3u8_format['language_preference'] = lang_pref
|
||||
formats.extend(m3u8_formats)
|
||||
continue
|
||||
|
||||
format = {
|
||||
'format_id': format_id,
|
||||
'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
|
||||
|
@ -118,7 +159,7 @@ def _extract_from_json_url(self, json_url, video_id, lang, title=None):
|
|||
'quality': qfunc(f.get('quality')),
|
||||
}
|
||||
|
||||
if f.get('mediaType') == 'rtmp':
|
||||
if media_type == 'rtmp':
|
||||
format['url'] = f['streamer']
|
||||
format['play_path'] = 'mp4:' + f['url']
|
||||
format['ext'] = 'flv'
|
||||
|
@ -127,56 +168,50 @@ def _extract_from_json_url(self, json_url, video_id, lang, title=None):
|
|||
|
||||
formats.append(format)
|
||||
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
info_dict['formats'] = formats
|
||||
return info_dict
|
||||
return {
|
||||
'id': player_info.get('VID') or video_id,
|
||||
'title': title,
|
||||
'description': player_info.get('VDE'),
|
||||
'upload_date': unified_strdate(upload_date_str),
|
||||
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class ArteTVPlus7IE(ArteTVBaseIE):
|
||||
IE_NAME = 'arte.tv:+7'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>\d{6}-\d{3}-[AF])'
|
||||
|
||||
class ArteTVEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
|
||||
'url': 'https://www.arte.tv/player/v5/index.php?json_url=https%3A%2F%2Fapi.arte.tv%2Fapi%2Fplayer%2Fv2%2Fconfig%2Fde%2F100605-013-A&lang=de&autoplay=true&mute=0100605-013-A',
|
||||
'info_dict': {
|
||||
'id': '088501-000-A',
|
||||
'id': '100605-013-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mexico: Stealing Petrol to Survive',
|
||||
'upload_date': '20190628',
|
||||
'title': 'United we Stream November Lockdown Edition #13',
|
||||
'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
|
||||
'upload_date': '20201116',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, video_id = re.match(self._VALID_URL, url).groups()
|
||||
return self._extract_from_json_url(
|
||||
'https://api.arte.tv/api/player/v1/config/%s/%s' % (lang, video_id),
|
||||
video_id, lang)
|
||||
|
||||
|
||||
class ArteTVEmbedIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:embed'
|
||||
_VALID_URL = r'''(?x)
|
||||
https://www\.arte\.tv
|
||||
/player/v3/index\.php\?json_url=
|
||||
(?P<json_url>
|
||||
https?://api\.arte\.tv/api/player/v1/config/
|
||||
(?P<lang>[^/]+)/(?P<id>\d{6}-\d{3}-[AF])
|
||||
)
|
||||
'''
|
||||
|
||||
_TESTS = []
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [url for _, url in re.findall(
|
||||
r'<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
json_url, lang, video_id = re.match(self._VALID_URL, url).groups()
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
json_url = qs['json_url'][0]
|
||||
video_id = ArteTVIE._match_id(json_url)
|
||||
return self.url_result(
|
||||
json_url, ie=ArteTVIE.ie_key(), video_id=video_id)
|
||||
|
||||
|
||||
class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||
IE_NAME = 'arte.tv:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>RC-\d{6})'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES
|
||||
_TESTS = [{
|
||||
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
|
||||
'info_dict': {
|
||||
|
@ -185,17 +220,35 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
|
|||
'description': 'md5:d322c55011514b3a7241f7fb80d494c2',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/pl/videos/RC-014123/arte-reportage/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, playlist_id = re.match(self._VALID_URL, url).groups()
|
||||
collection = self._download_json(
|
||||
'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos'
|
||||
% (lang, playlist_id), playlist_id)
|
||||
'%s/collectionData/%s/%s?source=videos'
|
||||
% (self._API_BASE, lang, playlist_id), playlist_id)
|
||||
entries = []
|
||||
for video in collection['videos']:
|
||||
if not isinstance(video, dict):
|
||||
continue
|
||||
video_url = url_or_none(video.get('url')) or url_or_none(video.get('jsonUrl'))
|
||||
if not video_url:
|
||||
continue
|
||||
video_id = video.get('programId')
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': video_url,
|
||||
'id': video_id,
|
||||
'title': video.get('title'),
|
||||
'alt_title': video.get('subtitle'),
|
||||
'thumbnail': url_or_none(try_get(video, lambda x: x['mainImage']['url'], compat_str)),
|
||||
'duration': int_or_none(video.get('durationSeconds')),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'ie_key': ArteTVIE.ie_key(),
|
||||
})
|
||||
title = collection.get('title')
|
||||
description = collection.get('shortDescription') or collection.get('teaserText')
|
||||
entries = [
|
||||
self._extract_from_json_url(
|
||||
video['jsonUrl'], video.get('programId') or playlist_id, lang)
|
||||
for video in collection['videos'] if video.get('jsonUrl')]
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
|
@ -5,10 +6,7 @@
|
|||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
|
@ -17,33 +15,32 @@
|
|||
parse_filesize,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class BandcampIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://youtube-dlc.bandcamp.com/track/youtube-dlc-test-song',
|
||||
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||
'md5': 'c557841d5e50261777a6585648adf439',
|
||||
'info_dict': {
|
||||
'id': '1812978515',
|
||||
'ext': 'mp3',
|
||||
'title': "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
|
||||
'title': "youtube-dl \"'/\\ä↭ - youtube-dl \"'/\\ä↭ - youtube-dl test song \"'/\\ä↭",
|
||||
'duration': 9.8485,
|
||||
'uploader': "youtube-dl \"'/\\\u00e4\u21ad",
|
||||
'timestamp': 1354224127,
|
||||
'uploader': 'youtube-dl "\'/\\ä↭',
|
||||
'upload_date': '20121129',
|
||||
'timestamp': 1354224127,
|
||||
},
|
||||
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
||||
}, {
|
||||
# free download
|
||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||
'md5': '5d92af55811e47f38962a54c30b07ef0',
|
||||
'info_dict': {
|
||||
'id': '2650410135',
|
||||
'ext': 'aiff',
|
||||
|
@ -82,11 +79,16 @@ class BandcampIE(InfoExtractor):
|
|||
},
|
||||
}]
|
||||
|
||||
def _extract_data_attr(self, webpage, video_id, attr='tralbum', fatal=True):
|
||||
return self._parse_json(self._html_search_regex(
|
||||
r'data-%s=(["\'])({.+?})\1' % attr, webpage,
|
||||
attr + ' data', group=2), video_id, fatal=fatal)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = mobj.group('title')
|
||||
title = self._match_id(url)
|
||||
webpage = self._download_webpage(url, title)
|
||||
thumbnail = self._html_search_meta('og:image', webpage, default=None)
|
||||
tralbum = self._extract_data_attr(webpage, title)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
track_id = None
|
||||
track = None
|
||||
|
@ -94,11 +96,7 @@ def _real_extract(self, url):
|
|||
duration = None
|
||||
|
||||
formats = []
|
||||
trackinfo_block = self._html_search_regex(
|
||||
r'trackinfo(?:["\']|"):\[\s*({.+?})\s*\],(?:["\']|")',
|
||||
webpage, 'track info', default='{}')
|
||||
|
||||
track_info = self._parse_json(trackinfo_block, title)
|
||||
track_info = try_get(tralbum, lambda x: x['trackinfo'][0], dict)
|
||||
if track_info:
|
||||
file_ = track_info.get('file')
|
||||
if isinstance(file_, dict):
|
||||
|
@ -114,40 +112,26 @@ def _real_extract(self, url):
|
|||
'acodec': ext,
|
||||
'abr': int_or_none(abr_str),
|
||||
})
|
||||
|
||||
track_id = str_or_none(track_info.get('track_id') or track_info.get('id'))
|
||||
track = track_info.get('title')
|
||||
track_id = str_or_none(
|
||||
track_info.get('track_id') or track_info.get('id'))
|
||||
track_number = int_or_none(track_info.get('track_num'))
|
||||
duration = float_or_none(track_info.get('duration'))
|
||||
|
||||
def extract(key):
|
||||
data = self._html_search_regex(
|
||||
r',(["\']|")%s\1:\1(?P<value>(?:\\\1|((?!\1).))+)\1' % key,
|
||||
webpage, key, default=None, group='value')
|
||||
return data.replace(r'\"', '"').replace('\\\\', '\\') if data else data
|
||||
|
||||
track = extract('title')
|
||||
artist = extract('artist')
|
||||
album = extract('album_title')
|
||||
embed = self._extract_data_attr(webpage, title, 'embed', False)
|
||||
current = tralbum.get('current') or {}
|
||||
artist = embed.get('artist') or current.get('artist') or tralbum.get('artist')
|
||||
timestamp = unified_timestamp(
|
||||
extract('publish_date') or extract('album_publish_date'))
|
||||
release_date = unified_strdate(extract('album_release_date'))
|
||||
current.get('publish_date') or tralbum.get('album_publish_date'))
|
||||
|
||||
download_link = self._search_regex(
|
||||
r'freeDownloadPage(?:["\']|"):\s*(["\']|")(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'download link', default=None, group='url')
|
||||
download_link = tralbum.get('freeDownloadPage')
|
||||
if download_link:
|
||||
track_id = self._search_regex(
|
||||
r'\?id=(?P<id>\d+)&',
|
||||
download_link, 'track id')
|
||||
track_id = compat_str(tralbum['id'])
|
||||
|
||||
download_webpage = self._download_webpage(
|
||||
download_link, track_id, 'Downloading free downloads page')
|
||||
|
||||
blob = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
|
||||
'blob', group='blob'),
|
||||
track_id, transform_source=unescapeHTML)
|
||||
blob = self._extract_data_attr(download_webpage, track_id, 'blob')
|
||||
|
||||
info = try_get(
|
||||
blob, (lambda x: x['digital_items'][0],
|
||||
|
@ -213,20 +197,20 @@ def extract(key):
|
|||
'thumbnail': thumbnail,
|
||||
'uploader': artist,
|
||||
'timestamp': timestamp,
|
||||
'release_date': release_date,
|
||||
'release_date': unified_strdate(tralbum.get('album_release_date')),
|
||||
'duration': duration,
|
||||
'track': track,
|
||||
'track_number': track_number,
|
||||
'track_id': track_id,
|
||||
'artist': artist,
|
||||
'album': album,
|
||||
'album': embed.get('album_title'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class BandcampAlbumIE(InfoExtractor):
|
||||
class BandcampAlbumIE(BandcampIE):
|
||||
IE_NAME = 'Bandcamp:album'
|
||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?'
|
||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<id>[^/?#&]+))?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||
|
@ -236,7 +220,10 @@ class BandcampAlbumIE(InfoExtractor):
|
|||
'info_dict': {
|
||||
'id': '1353101989',
|
||||
'ext': 'mp3',
|
||||
'title': 'Intro',
|
||||
'title': 'Blazo - Intro',
|
||||
'timestamp': 1311756226,
|
||||
'upload_date': '20110727',
|
||||
'uploader': 'Blazo',
|
||||
}
|
||||
},
|
||||
{
|
||||
|
@ -244,7 +231,10 @@ class BandcampAlbumIE(InfoExtractor):
|
|||
'info_dict': {
|
||||
'id': '38097443',
|
||||
'ext': 'mp3',
|
||||
'title': 'Kero One - Keep It Alive (Blazo remix)',
|
||||
'title': 'Blazo - Kero One - Keep It Alive (Blazo remix)',
|
||||
'timestamp': 1311757238,
|
||||
'upload_date': '20110727',
|
||||
'uploader': 'Blazo',
|
||||
}
|
||||
},
|
||||
],
|
||||
|
@ -280,6 +270,7 @@ class BandcampAlbumIE(InfoExtractor):
|
|||
'title': '"Entropy" EP',
|
||||
'uploader_id': 'jstrecords',
|
||||
'id': 'entropy-ep',
|
||||
'description': 'md5:0ff22959c943622972596062f2f366a5',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
|
@ -289,6 +280,7 @@ class BandcampAlbumIE(InfoExtractor):
|
|||
'id': 'we-are-the-plague',
|
||||
'title': 'WE ARE THE PLAGUE',
|
||||
'uploader_id': 'insulters',
|
||||
'description': 'md5:b3cf845ee41b2b1141dc7bde9237255f',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}]
|
||||
|
@ -300,43 +292,34 @@ def suitable(cls, url):
|
|||
else super(BandcampAlbumIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
uploader_id = mobj.group('subdomain')
|
||||
album_id = mobj.group('album_id')
|
||||
uploader_id, album_id = re.match(self._VALID_URL, url).groups()
|
||||
playlist_id = album_id or uploader_id
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
track_elements = re.findall(
|
||||
r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage)
|
||||
if not track_elements:
|
||||
tralbum = self._extract_data_attr(webpage, playlist_id)
|
||||
track_info = tralbum.get('trackinfo')
|
||||
if not track_info:
|
||||
raise ExtractorError('The page doesn\'t contain any tracks')
|
||||
# Only tracks with duration info have songs
|
||||
entries = [
|
||||
self.url_result(
|
||||
compat_urlparse.urljoin(url, t_path),
|
||||
ie=BandcampIE.ie_key(),
|
||||
video_title=self._search_regex(
|
||||
r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
|
||||
elem_content, 'track title', fatal=False))
|
||||
for elem_content, t_path in track_elements
|
||||
if self._html_search_meta('duration', elem_content, default=None)]
|
||||
urljoin(url, t['title_link']), BandcampIE.ie_key(),
|
||||
str_or_none(t.get('track_id') or t.get('id')), t.get('title'))
|
||||
for t in track_info
|
||||
if t.get('duration')]
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'album_title\s*(?:"|["\']):\s*("|["\'])(?P<album>(?:\\\1|((?!\1).))+)\1',
|
||||
webpage, 'title', fatal=False, group='album')
|
||||
|
||||
if title:
|
||||
title = title.replace(r'\"', '"')
|
||||
current = tralbum.get('current') or {}
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'uploader_id': uploader_id,
|
||||
'id': playlist_id,
|
||||
'title': title,
|
||||
'title': current.get('title'),
|
||||
'description': current.get('about'),
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
||||
class BandcampWeeklyIE(InfoExtractor):
|
||||
class BandcampWeeklyIE(BandcampIE):
|
||||
IE_NAME = 'Bandcamp:weekly'
|
||||
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
|
@ -351,29 +334,23 @@ class BandcampWeeklyIE(InfoExtractor):
|
|||
'release_date': '20170404',
|
||||
'series': 'Bandcamp Weekly',
|
||||
'episode': 'Magic Moments',
|
||||
'episode_number': 208,
|
||||
'episode_id': '224',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
'format': 'opus-lo',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
show_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, show_id)
|
||||
|
||||
blob = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
|
||||
'blob', group='blob'),
|
||||
video_id, transform_source=unescapeHTML)
|
||||
blob = self._extract_data_attr(webpage, show_id, 'blob')
|
||||
|
||||
show = blob['bcw_show']
|
||||
|
||||
# This is desired because any invalid show id redirects to `bandcamp.com`
|
||||
# which happens to expose the latest Bandcamp Weekly episode.
|
||||
show_id = int_or_none(show.get('show_id')) or int_or_none(video_id)
|
||||
show = blob['bcw_data'][show_id]
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in show['audio_stream'].items():
|
||||
|
@ -398,20 +375,8 @@ def _real_extract(self, url):
|
|||
if subtitle:
|
||||
title += ' - %s' % subtitle
|
||||
|
||||
episode_number = None
|
||||
seq = blob.get('bcw_seq')
|
||||
|
||||
if seq and isinstance(seq, list):
|
||||
try:
|
||||
episode_number = next(
|
||||
int_or_none(e.get('episode_number'))
|
||||
for e in seq
|
||||
if isinstance(e, dict) and int_or_none(e.get('id')) == show_id)
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'id': show_id,
|
||||
'title': title,
|
||||
'description': show.get('desc') or show.get('short_desc'),
|
||||
'duration': float_or_none(show.get('audio_duration')),
|
||||
|
@ -419,7 +384,6 @@ def _real_extract(self, url):
|
|||
'release_date': unified_strdate(show.get('published_date')),
|
||||
'series': 'Bandcamp Weekly',
|
||||
'episode': show.get('subtitle'),
|
||||
'episode_number': episode_number,
|
||||
'episode_id': compat_str(video_id),
|
||||
'episode_id': show_id,
|
||||
'formats': formats
|
||||
}
|
||||
|
|
|
@ -981,7 +981,7 @@ def _real_extract(self, url):
|
|||
group_id = self._search_regex(
|
||||
r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
|
||||
webpage, 'group id', default=None)
|
||||
if playlist_id:
|
||||
if group_id:
|
||||
return self.url_result(
|
||||
'https://www.bbc.co.uk/programmes/%s' % group_id,
|
||||
ie=BBCCoUkIE.ie_key())
|
||||
|
@ -1092,10 +1092,26 @@ def _real_extract(self, url):
|
|||
self._search_regex(
|
||||
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
|
||||
'bbcthree config', default='{}'),
|
||||
playlist_id, transform_source=js_to_json, fatal=False)
|
||||
if bbc3_config:
|
||||
playlist_id, transform_source=js_to_json, fatal=False) or {}
|
||||
payload = bbc3_config.get('payload') or {}
|
||||
if payload:
|
||||
clip = payload.get('currentClip') or {}
|
||||
clip_vpid = clip.get('vpid')
|
||||
clip_title = clip.get('title')
|
||||
if clip_vpid and clip_title:
|
||||
formats, subtitles = self._download_media_selector(clip_vpid)
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': clip_vpid,
|
||||
'title': clip_title,
|
||||
'thumbnail': dict_get(clip, ('poster', 'imageUrl')),
|
||||
'description': clip.get('description'),
|
||||
'duration': parse_duration(clip.get('duration')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
bbc3_playlist = try_get(
|
||||
bbc3_config, lambda x: x['payload']['content']['bbcMedia']['playlist'],
|
||||
payload, lambda x: x['content']['bbcMedia']['playlist'],
|
||||
dict)
|
||||
if bbc3_playlist:
|
||||
playlist_title = bbc3_playlist.get('title') or playlist_title
|
||||
|
@ -1118,6 +1134,39 @@ def _real_extract(self, url):
|
|||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
initial_data = self._parse_json(self._search_regex(
|
||||
r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
|
||||
'preload state', default='{}'), playlist_id, fatal=False)
|
||||
if initial_data:
|
||||
def parse_media(media):
|
||||
if not media:
|
||||
return
|
||||
for item in (try_get(media, lambda x: x['media']['items'], list) or []):
|
||||
item_id = item.get('id')
|
||||
item_title = item.get('title')
|
||||
if not (item_id and item_title):
|
||||
continue
|
||||
formats, subtitles = self._download_media_selector(item_id)
|
||||
self._sort_formats(formats)
|
||||
entries.append({
|
||||
'id': item_id,
|
||||
'title': item_title,
|
||||
'thumbnail': item.get('holdingImageUrl'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
for resp in (initial_data.get('data') or {}).values():
|
||||
name = resp.get('name')
|
||||
if name == 'media-experience':
|
||||
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
|
||||
elif name == 'article':
|
||||
for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []):
|
||||
if block.get('type') != 'media':
|
||||
continue
|
||||
parse_media(block.get('model'))
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def extract_all(pattern):
|
||||
return list(filter(None, map(
|
||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||
|
|
|
@ -36,6 +36,14 @@ class BitChuteIE(InfoExtractor):
|
|||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>%s)' % BitChuteIE._VALID_URL,
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
|
|
61
youtube_dlc/extractor/bitwave.py
Normal file
61
youtube_dlc/extractor/bitwave.py
Normal file
|
@ -0,0 +1,61 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class BitwaveReplayIE(InfoExtractor):
|
||||
IE_NAME = 'bitwave:replay'
|
||||
_VALID_URL = r'https?://(?:www\.)?bitwave\.tv/(?P<user>\w+)/replay/(?P<id>\w+)/?$'
|
||||
_TEST = {
|
||||
'url': 'https://bitwave.tv/RhythmicCarnage/replay/z4P6eq5L7WDrM85UCrVr',
|
||||
'only_matching': True
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
replay_id = self._match_id(url)
|
||||
replay = self._download_json(
|
||||
'https://api.bitwave.tv/v1/replays/' + replay_id,
|
||||
replay_id
|
||||
)
|
||||
|
||||
return {
|
||||
'id': replay_id,
|
||||
'title': replay['data']['title'],
|
||||
'uploader': replay['data']['name'],
|
||||
'uploader_id': replay['data']['name'],
|
||||
'url': replay['data']['url'],
|
||||
'thumbnails': [
|
||||
{'url': x} for x in replay['data']['thumbnails']
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
class BitwaveStreamIE(InfoExtractor):
|
||||
IE_NAME = 'bitwave:stream'
|
||||
_VALID_URL = r'https?://(?:www\.)?bitwave\.tv/(?P<id>\w+)/?$'
|
||||
_TEST = {
|
||||
'url': 'https://bitwave.tv/doomtube',
|
||||
'only_matching': True
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
username = self._match_id(url)
|
||||
channel = self._download_json(
|
||||
'https://api.bitwave.tv/v1/channels/' + username,
|
||||
username)
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
channel['data']['url'], username,
|
||||
'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': username,
|
||||
'title': self._live_title(channel['data']['title']),
|
||||
'uploader': username,
|
||||
'uploader_id': username,
|
||||
'formats': formats,
|
||||
'thumbnail': channel['data']['thumbnail'],
|
||||
'is_live': True,
|
||||
'view_count': channel['data']['viewCount']
|
||||
}
|
98
youtube_dlc/extractor/box.py
Normal file
98
youtube_dlc/extractor/box.py
Normal file
|
@ -0,0 +1,98 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
parse_iso8601,
|
||||
# try_get,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class BoxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/]+)/file/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
|
||||
'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
|
||||
'info_dict': {
|
||||
'id': '510727257538',
|
||||
'ext': 'mp4',
|
||||
'title': 'Garber St. Louis will be 28th MLS team +scarving.mp4',
|
||||
'uploader': 'MLS Video',
|
||||
'timestamp': 1566320259,
|
||||
'upload_date': '20190820',
|
||||
'uploader_id': '235196876',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
shared_name, file_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, file_id)
|
||||
request_token = self._parse_json(self._search_regex(
|
||||
r'Box\.config\s*=\s*({.+?});', webpage,
|
||||
'Box config'), file_id)['requestToken']
|
||||
access_token = self._download_json(
|
||||
'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
|
||||
'Downloading token JSON metadata',
|
||||
data=json.dumps({'fileIDs': [file_id]}).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
'X-Request-Token': request_token,
|
||||
'X-Box-EndUser-API': 'sharedName=' + shared_name,
|
||||
})[file_id]['read']
|
||||
shared_link = 'https://app.box.com/s/' + shared_name
|
||||
f = self._download_json(
|
||||
'https://api.box.com/2.0/files/' + file_id, file_id,
|
||||
'Downloading file JSON metadata', headers={
|
||||
'Authorization': 'Bearer ' + access_token,
|
||||
'BoxApi': 'shared_link=' + shared_link,
|
||||
'X-Rep-Hints': '[dash]', # TODO: extract `hls` formats
|
||||
}, query={
|
||||
'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size'
|
||||
})
|
||||
title = f['name']
|
||||
|
||||
query = {
|
||||
'access_token': access_token,
|
||||
'shared_link': shared_link
|
||||
}
|
||||
|
||||
formats = []
|
||||
|
||||
# for entry in (try_get(f, lambda x: x['representations']['entries'], list) or []):
|
||||
# entry_url_template = try_get(
|
||||
# entry, lambda x: x['content']['url_template'])
|
||||
# if not entry_url_template:
|
||||
# continue
|
||||
# representation = entry.get('representation')
|
||||
# if representation == 'dash':
|
||||
# TODO: append query to every fragment URL
|
||||
# formats.extend(self._extract_mpd_formats(
|
||||
# entry_url_template.replace('{+asset_path}', 'manifest.mpd'),
|
||||
# file_id, query=query))
|
||||
|
||||
authenticated_download_url = f.get('authenticated_download_url')
|
||||
if authenticated_download_url and f.get('is_download_available'):
|
||||
formats.append({
|
||||
'ext': f.get('extension') or determine_ext(title),
|
||||
'filesize': f.get('size'),
|
||||
'format_id': 'download',
|
||||
'url': update_url_query(authenticated_download_url, query),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
creator = f.get('created_by') or {}
|
||||
|
||||
return {
|
||||
'id': file_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': f.get('description') or None,
|
||||
'uploader': creator.get('name'),
|
||||
'timestamp': parse_iso8601(f.get('created_at')),
|
||||
'uploader_id': creator.get('id'),
|
||||
}
|
|
@ -147,7 +147,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||
]
|
||||
|
||||
@classmethod
|
||||
def _build_brighcove_url(cls, object_str):
|
||||
def _build_brightcove_url(cls, object_str):
|
||||
"""
|
||||
Build a Brightcove url from a xml string containing
|
||||
<object class="BrightcoveExperience">{params}</object>
|
||||
|
@ -217,7 +217,7 @@ def find_param(name):
|
|||
return cls._make_brightcove_url(params)
|
||||
|
||||
@classmethod
|
||||
def _build_brighcove_url_from_js(cls, object_js):
|
||||
def _build_brightcove_url_from_js(cls, object_js):
|
||||
# The layout of JS is as follows:
|
||||
# customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
|
||||
# // build Brightcove <object /> XML
|
||||
|
@ -272,12 +272,12 @@ def _extract_brightcove_urls(cls, webpage):
|
|||
).+?>\s*</object>''',
|
||||
webpage)
|
||||
if matches:
|
||||
return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
|
||||
return list(filter(None, [cls._build_brightcove_url(m) for m in matches]))
|
||||
|
||||
matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
|
||||
if matches:
|
||||
return list(filter(None, [
|
||||
cls._build_brighcove_url_from_js(custom_bc)
|
||||
cls._build_brightcove_url_from_js(custom_bc)
|
||||
for custom_bc in matches]))
|
||||
return [src for _, src in re.findall(
|
||||
r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]
|
||||
|
@ -471,12 +471,17 @@ def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
|
|||
title = json_data['name'].strip()
|
||||
|
||||
formats = []
|
||||
sources_num = len(json_data.get('sources'))
|
||||
key_systems_present = 0
|
||||
for source in json_data.get('sources', []):
|
||||
container = source.get('container')
|
||||
ext = mimetype2ext(source.get('type'))
|
||||
src = source.get('src')
|
||||
# https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
|
||||
if ext == 'ism' or container == 'WVM' or source.get('key_systems'):
|
||||
# https://apis.support.brightcove.com/playback/references/playback-api-video-fields-reference.html
|
||||
if source.get('key_systems'):
|
||||
key_systems_present += 1
|
||||
continue
|
||||
elif ext == 'ism' or container == 'WVM':
|
||||
continue
|
||||
elif ext == 'm3u8' or container == 'M2TS':
|
||||
if not src:
|
||||
|
@ -533,6 +538,10 @@ def build_format_id(kind):
|
|||
'format_id': build_format_id('rtmp'),
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
if sources_num == key_systems_present:
|
||||
raise ExtractorError('This video is DRM protected', expected=True)
|
||||
|
||||
if not formats:
|
||||
# for sonyliv.com DRM protected videos
|
||||
s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl')
|
||||
|
|
|
@ -5,10 +5,16 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_chr,
|
||||
compat_ord,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
multipart_encode,
|
||||
parse_duration,
|
||||
random_birthday,
|
||||
|
@ -107,8 +113,9 @@ def _real_extract(self, url):
|
|||
r'Odsłony:(?:\s| )*([0-9]+)', webpage,
|
||||
'view_count', default=None)
|
||||
average_rating = self._search_regex(
|
||||
r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
|
||||
webpage, 'rating', fatal=False, group='rating_value')
|
||||
(r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
|
||||
r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
|
||||
group='rating_value')
|
||||
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
|
@ -123,6 +130,24 @@ def _real_extract(self, url):
|
|||
'age_limit': 18 if need_confirm_age else 0,
|
||||
}
|
||||
|
||||
# Source: https://www.cda.pl/js/player.js?t=1606154898
|
||||
def decrypt_file(a):
|
||||
for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'):
|
||||
a = a.replace(p, '')
|
||||
a = compat_urllib_parse_unquote(a)
|
||||
b = []
|
||||
for c in a:
|
||||
f = compat_ord(c)
|
||||
b.append(compat_chr(33 + (f + 14) % 94) if 33 <= f and 126 >= f else compat_chr(f))
|
||||
a = ''.join(b)
|
||||
a = a.replace('.cda.mp4', '')
|
||||
for p in ('.2cda.pl', '.3cda.pl'):
|
||||
a = a.replace(p, '.cda.pl')
|
||||
if '/upstream' in a:
|
||||
a = a.replace('/upstream', '.mp4/upstream')
|
||||
return 'https://' + a
|
||||
return 'https://' + a + '.mp4'
|
||||
|
||||
def extract_format(page, version):
|
||||
json_str = self._html_search_regex(
|
||||
r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
|
||||
|
@ -141,6 +166,8 @@ def extract_format(page, version):
|
|||
video['file'] = codecs.decode(video['file'], 'rot_13')
|
||||
if video['file'].endswith('adc.mp4'):
|
||||
video['file'] = video['file'].replace('adc.mp4', '.mp4')
|
||||
elif not video['file'].startswith('http'):
|
||||
video['file'] = decrypt_file(video['file'])
|
||||
f = {
|
||||
'url': video['file'],
|
||||
}
|
||||
|
@ -179,4 +206,6 @@ def extract_format(page, version):
|
|||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return info_dict
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
return merge_dicts(info_dict, info)
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
|
@ -38,7 +39,7 @@ def _real_extract(self, url):
|
|||
|
||||
|
||||
class CNBCVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/]+/)+(?P<id>[^./?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?cnbc\.com(?P<path>/video/(?:[^/]+/)+(?P<id>[^./?#&]+)\.html)'
|
||||
_TEST = {
|
||||
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
|
||||
'info_dict': {
|
||||
|
@ -56,11 +57,15 @@ class CNBCVideoIE(InfoExtractor):
|
|||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id,
|
||||
'video id')
|
||||
path, display_id = re.match(self._VALID_URL, url).groups()
|
||||
video_id = self._download_json(
|
||||
'https://webql-redesign.cnbcfm.com/graphql', display_id, query={
|
||||
'query': '''{
|
||||
page(path: "%s") {
|
||||
vcpsId
|
||||
}
|
||||
}''' % path,
|
||||
})['data']['page']['vcpsId']
|
||||
return self.url_result(
|
||||
'http://video.cnbc.com/gallery/?video=%s' % video_id,
|
||||
'http://video.cnbc.com/gallery/?video=%d' % video_id,
|
||||
CNBCIE.ie_key())
|
||||
|
|
|
@ -1456,9 +1456,10 @@ def _is_valid_url(self, url, video_id, item='video', headers={}):
|
|||
try:
|
||||
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
|
||||
return True
|
||||
except ExtractorError:
|
||||
except ExtractorError as e:
|
||||
self.to_screen(
|
||||
'%s: %s URL is invalid, skipping' % (video_id, item))
|
||||
'%s: %s URL is invalid, skipping: %s'
|
||||
% (video_id, item, error_to_compat_str(e.cause)))
|
||||
return False
|
||||
|
||||
def http_scheme(self):
|
||||
|
@ -1663,7 +1664,7 @@ def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
|
|||
# just the media without qualities renditions.
|
||||
# Fortunately, master playlist can be easily distinguished from media
|
||||
# playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
|
||||
# master playlist tags MUST NOT appear in a media playist and vice versa.
|
||||
# master playlist tags MUST NOT appear in a media playlist and vice versa.
|
||||
# As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
|
||||
# media playlist and MUST NOT appear in master playlist thus we can
|
||||
# clearly detect media playlist with this criterion.
|
||||
|
@ -2596,6 +2597,7 @@ def _media_formats(src, cur_media_type, type_info={}):
|
|||
|
||||
def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
|
||||
formats = []
|
||||
|
||||
hdcore_sign = 'hdcore=3.7.0'
|
||||
f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
|
||||
hds_host = hosts.get('hds')
|
||||
|
@ -2608,6 +2610,7 @@ def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
|
|||
for entry in f4m_formats:
|
||||
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
||||
formats.extend(f4m_formats)
|
||||
|
||||
m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
|
||||
hls_host = hosts.get('hls')
|
||||
if hls_host:
|
||||
|
@ -2615,6 +2618,31 @@ def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
|
|||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
http_host = hosts.get('http')
|
||||
if http_host and 'hdnea=' not in manifest_url:
|
||||
REPL_REGEX = r'https://[^/]+/i/([^,]+),([^/]+),([^/]+).csmil/.+'
|
||||
qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',')
|
||||
qualities_length = len(qualities)
|
||||
if len(formats) in (qualities_length + 1, qualities_length * 2 + 1):
|
||||
i = 0
|
||||
http_formats = []
|
||||
for f in formats:
|
||||
if f['protocol'] == 'm3u8_native' and f['vcodec'] != 'none':
|
||||
for protocol in ('http', 'https'):
|
||||
http_f = f.copy()
|
||||
del http_f['manifest_url']
|
||||
http_url = re.sub(
|
||||
REPL_REGEX, protocol + r'://%s/\1%s\3' % (http_host, qualities[i]), f['url'])
|
||||
http_f.update({
|
||||
'format_id': http_f['format_id'].replace('hls-', protocol + '-'),
|
||||
'url': http_url,
|
||||
'protocol': protocol,
|
||||
})
|
||||
http_formats.append(http_f)
|
||||
i += 1
|
||||
formats.extend(http_formats)
|
||||
|
||||
return formats
|
||||
|
||||
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
|
@ -82,6 +84,7 @@ class CondeNastIE(InfoExtractor):
|
|||
'uploader': 'gq',
|
||||
'upload_date': '20170321',
|
||||
'timestamp': 1490126427,
|
||||
'description': 'How much grimmer would things be if these people were competent?',
|
||||
},
|
||||
}, {
|
||||
# JS embed
|
||||
|
@ -93,7 +96,7 @@ class CondeNastIE(InfoExtractor):
|
|||
'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
|
||||
'uploader': 'arstechnica',
|
||||
'upload_date': '20150916',
|
||||
'timestamp': 1442434955,
|
||||
'timestamp': 1442434920,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player',
|
||||
|
@ -196,6 +199,13 @@ def _extract_video(self, params):
|
|||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for t, caption in video_info.get('captions', {}).items():
|
||||
caption_url = caption.get('src')
|
||||
if not (t in ('vtt', 'srt', 'tml') and caption_url):
|
||||
continue
|
||||
subtitles.setdefault('en', []).append({'url': caption_url})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
|
@ -208,6 +218,7 @@ def _extract_video(self, params):
|
|||
'season': video_info.get('season_title'),
|
||||
'timestamp': parse_iso8601(video_info.get('premiere_date')),
|
||||
'categories': video_info.get('categories'),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -225,8 +236,16 @@ def _real_extract(self, url):
|
|||
if url_type == 'series':
|
||||
return self._extract_series(url, webpage)
|
||||
else:
|
||||
params = self._extract_video_params(webpage, display_id)
|
||||
info = self._search_json_ld(
|
||||
webpage, display_id, fatal=False)
|
||||
video = try_get(self._parse_json(self._search_regex(
|
||||
r'__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
|
||||
'preload state', '{}'), display_id),
|
||||
lambda x: x['transformed']['video'])
|
||||
if video:
|
||||
params = {'videoId': video['id']}
|
||||
info = {'description': strip_or_none(video.get('description'))}
|
||||
else:
|
||||
params = self._extract_video_params(webpage, display_id)
|
||||
info = self._search_json_ld(
|
||||
webpage, display_id, fatal=False)
|
||||
info.update(self._extract_video(params))
|
||||
return info
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
|
||||
class DiscoveryNetworksDeIE(DPlayIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show)/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
|
||||
|
@ -29,6 +29,9 @@ class DiscoveryNetworksDeIE(DPlayIE):
|
|||
}, {
|
||||
'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
|
|
@ -60,7 +60,7 @@ def get_item(type_, preference):
|
|||
|
||||
title = get_item('title', preferred_langs) or video_id
|
||||
description = get_item('description', preferred_langs)
|
||||
thumbnmail = xpath_text(playlist, './info/thumburl', 'thumbnail')
|
||||
thumbnail = xpath_text(playlist, './info/thumburl', 'thumbnail')
|
||||
upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date'))
|
||||
duration = parse_duration(xpath_text(playlist, './info/duration', 'duration'))
|
||||
view_count = int_or_none(xpath_text(playlist, './info/views', 'views'))
|
||||
|
@ -85,7 +85,7 @@ def get_item(type_, preference):
|
|||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnmail,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
from .airmozilla import AirMozillaIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
from .alphaporno import AlphaPornoIE
|
||||
from .amara import AmaraIE
|
||||
from .alura import (
|
||||
AluraIE,
|
||||
AluraCourseIE
|
||||
|
@ -62,7 +63,7 @@
|
|||
ARDMediathekIE,
|
||||
)
|
||||
from .arte import (
|
||||
ArteTVPlus7IE,
|
||||
ArteTVIE,
|
||||
ArteTVEmbedIE,
|
||||
ArteTVPlaylistIE,
|
||||
)
|
||||
|
@ -116,6 +117,10 @@
|
|||
BitChuteIE,
|
||||
BitChuteChannelIE,
|
||||
)
|
||||
from .bitwave import (
|
||||
BitwaveReplayIE,
|
||||
BitwaveStreamIE,
|
||||
)
|
||||
from .biqle import BIQLEIE
|
||||
from .bleacherreport import (
|
||||
BleacherReportIE,
|
||||
|
@ -125,6 +130,7 @@
|
|||
from .bloomberg import BloombergIE
|
||||
from .bokecc import BokeCCIE
|
||||
from .bostonglobe import BostonGlobeIE
|
||||
from .box import BoxIE
|
||||
from .bpb import BpbIE
|
||||
from .br import (
|
||||
BRIE,
|
||||
|
@ -546,6 +552,7 @@
|
|||
EHFTVIE,
|
||||
ITTFIE,
|
||||
)
|
||||
from .lbry import LBRYIE
|
||||
from .lci import LCIIE
|
||||
from .lcp import (
|
||||
LcpPlayIE,
|
||||
|
@ -621,6 +628,7 @@
|
|||
from .massengeschmacktv import MassengeschmackTVIE
|
||||
from .matchtv import MatchTVIE
|
||||
from .mdr import MDRIE
|
||||
from .medaltv import MedalTVIE
|
||||
from .mediaset import MediasetIE
|
||||
from .mediasite import (
|
||||
MediasiteIE,
|
||||
|
@ -755,6 +763,7 @@
|
|||
from .ninegag import NineGagIE
|
||||
from .ninenow import NineNowIE
|
||||
from .nintendo import NintendoIE
|
||||
from .nitter import NitterIE
|
||||
from .njpwworld import NJPWWorldIE
|
||||
from .nobelprize import NobelPrizeIE
|
||||
from .noco import NocoIE
|
||||
|
@ -802,6 +811,7 @@
|
|||
from .nytimes import (
|
||||
NYTimesIE,
|
||||
NYTimesArticleIE,
|
||||
NYTimesCookingIE,
|
||||
)
|
||||
from .nuvid import NuvidIE
|
||||
from .nzz import NZZIE
|
||||
|
@ -864,6 +874,10 @@
|
|||
)
|
||||
from .piksel import PikselIE
|
||||
from .pinkbike import PinkbikeIE
|
||||
from .pinterest import (
|
||||
PinterestIE,
|
||||
PinterestCollectionIE,
|
||||
)
|
||||
from .pladform import PladformIE
|
||||
from .platzi import (
|
||||
PlatziIE,
|
||||
|
@ -940,6 +954,11 @@
|
|||
RayWenderlichCourseIE,
|
||||
)
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .rcs import (
|
||||
RCSIE,
|
||||
RCSEmbedsIE,
|
||||
RCSVariousIE,
|
||||
)
|
||||
from .rds import RDSIE
|
||||
from .redbulltv import (
|
||||
RedBullTVIE,
|
||||
|
@ -982,6 +1001,7 @@
|
|||
from .rtvnh import RTVNHIE
|
||||
from .rtvs import RTVSIE
|
||||
from .ruhd import RUHDIE
|
||||
from .rumble import RumbleEmbedIE
|
||||
from .rutube import (
|
||||
RutubeIE,
|
||||
RutubeChannelIE,
|
||||
|
@ -1041,6 +1061,10 @@
|
|||
SkyNewsIE,
|
||||
SkySportsIE,
|
||||
)
|
||||
from .skyitalia import (
|
||||
SkyArteItaliaIE,
|
||||
SkyItaliaIE,
|
||||
)
|
||||
from .slideshare import SlideshareIE
|
||||
from .slideslive import SlidesLiveIE
|
||||
from .slutload import SlutloadIE
|
||||
|
@ -1078,8 +1102,7 @@
|
|||
SpankBangPlaylistIE,
|
||||
)
|
||||
from .spankwire import SpankwireIE
|
||||
from .spiegel import SpiegelIE, SpiegelArticleIE
|
||||
from .spiegeltv import SpiegeltvIE
|
||||
from .spiegel import SpiegelIE
|
||||
from .spike import (
|
||||
BellatorIE,
|
||||
ParamountNetworkIE,
|
||||
|
@ -1093,6 +1116,12 @@
|
|||
from .sport5 import Sport5IE
|
||||
from .sportbox import SportBoxIE
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
from .spreaker import (
|
||||
SpreakerIE,
|
||||
SpreakerPageIE,
|
||||
SpreakerShowIE,
|
||||
SpreakerShowPageIE,
|
||||
)
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
from .sprout import SproutIE
|
||||
from .srgssr import (
|
||||
|
@ -1174,6 +1203,7 @@
|
|||
from .thisamericanlife import ThisAmericanLifeIE
|
||||
from .thisav import ThisAVIE
|
||||
from .thisoldhouse import ThisOldHouseIE
|
||||
from .thisvid import ThisVidIE
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
from .tiktok import TikTokIE
|
||||
from .tinypic import TinyPicIE
|
||||
|
@ -1385,8 +1415,8 @@
|
|||
)
|
||||
from .vlive import (
|
||||
VLiveIE,
|
||||
VLivePostIE,
|
||||
VLiveChannelIE,
|
||||
VLivePlaylistIE
|
||||
)
|
||||
from .vodlocker import VodlockerIE
|
||||
from .vodpl import VODPlIE
|
||||
|
@ -1503,21 +1533,18 @@
|
|||
from .yourupload import YourUploadIE
|
||||
from .youtube import (
|
||||
YoutubeIE,
|
||||
YoutubeChannelIE,
|
||||
YoutubeFavouritesIE,
|
||||
YoutubeHistoryIE,
|
||||
YoutubeLiveIE,
|
||||
YoutubeTabIE,
|
||||
YoutubePlaylistIE,
|
||||
YoutubePlaylistsIE,
|
||||
YoutubeRecommendedIE,
|
||||
YoutubeSearchDateIE,
|
||||
YoutubeSearchIE,
|
||||
YoutubeSearchURLIE,
|
||||
YoutubeShowIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeUserIE,
|
||||
YoutubeYtUserIE,
|
||||
YoutubeWatchLaterIE,
|
||||
)
|
||||
from .zapiks import ZapiksIE
|
||||
|
@ -1543,4 +1570,5 @@
|
|||
)
|
||||
from .zdf import ZDFIE, ZDFChannelIE
|
||||
from .zingmp3 import ZingMp3IE
|
||||
from .zoom import ZoomIE
|
||||
from .zype import ZypeIE
|
||||
|
|
|
@ -16,6 +16,7 @@ class FranceInterIE(InfoExtractor):
|
|||
'ext': 'mp3',
|
||||
'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
|
||||
'description': 'md5:401969c5d318c061f86bda1fa359292b',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'upload_date': '20160907',
|
||||
},
|
||||
}
|
||||
|
@ -31,6 +32,7 @@ def _real_extract(self, url):
|
|||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
|
||||
|
||||
upload_date_str = self._search_regex(
|
||||
r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
|
||||
|
@ -48,6 +50,7 @@ def _real_extract(self, url):
|
|||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'formats': [{
|
||||
'url': video_url,
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
parse_duration,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from .dailymotion import DailymotionIE
|
||||
|
||||
|
@ -128,18 +129,38 @@ def sign(manifest_url, manifest_id):
|
|||
|
||||
is_live = None
|
||||
|
||||
formats = []
|
||||
for video in info['videos']:
|
||||
if video['statut'] != 'ONLINE':
|
||||
videos = []
|
||||
|
||||
for video in (info.get('videos') or []):
|
||||
if video.get('statut') != 'ONLINE':
|
||||
continue
|
||||
video_url = video['url']
|
||||
if not video.get('url'):
|
||||
continue
|
||||
videos.append(video)
|
||||
|
||||
if not videos:
|
||||
for device_type in ['desktop', 'mobile']:
|
||||
fallback_info = self._download_json(
|
||||
'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id,
|
||||
video_id, 'Downloading fallback %s video JSON' % device_type, query={
|
||||
'device_type': device_type,
|
||||
'browser': 'chrome',
|
||||
}, fatal=False)
|
||||
|
||||
if fallback_info and fallback_info.get('video'):
|
||||
videos.append(fallback_info['video'])
|
||||
|
||||
formats = []
|
||||
for video in videos:
|
||||
video_url = video.get('url')
|
||||
if not video_url:
|
||||
continue
|
||||
if is_live is None:
|
||||
is_live = (try_get(
|
||||
video, lambda x: x['plages_ouverture'][0]['direct'],
|
||||
bool) is True) or '/live.francetv.fr/' in video_url
|
||||
format_id = video['format']
|
||||
video, lambda x: x['plages_ouverture'][0]['direct'], bool) is True
|
||||
or video.get('is_live') is True
|
||||
or '/live.francetv.fr/' in video_url)
|
||||
format_id = video.get('format')
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'f4m':
|
||||
if georestricted:
|
||||
|
@ -154,6 +175,9 @@ def sign(manifest_url, manifest_id):
|
|||
sign(video_url, format_id), video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id=format_id,
|
||||
fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
sign(video_url, format_id), video_id, mpd_id=format_id, fatal=False))
|
||||
elif video_url.startswith('rtmp'):
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
|
@ -166,6 +190,7 @@ def sign(manifest_url, manifest_id):
|
|||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = info['titre']
|
||||
|
@ -185,10 +210,10 @@ def sign(manifest_url, manifest_id):
|
|||
return {
|
||||
'id': video_id,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'description': clean_html(info['synopsis']),
|
||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
|
||||
'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
|
||||
'timestamp': int_or_none(info['diffusion']['timestamp']),
|
||||
'description': clean_html(info.get('synopsis')),
|
||||
'thumbnail': urljoin('https://sivideo.webservices.francetelevisions.fr', info.get('image')),
|
||||
'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')),
|
||||
'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])),
|
||||
'is_live': is_live,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
|
|
|
@ -91,6 +91,7 @@
|
|||
from .videa import VideaIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
from .ustream import UstreamIE
|
||||
from .arte import ArteTVEmbedIE
|
||||
from .videopress import VideoPressIE
|
||||
from .rutube import RutubeIE
|
||||
from .limelight import LimelightBaseIE
|
||||
|
@ -120,6 +121,8 @@
|
|||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .kinja import KinjaEmbedIE
|
||||
from .gedi import GediEmbedsIE
|
||||
from .rcs import RCSEmbedsIE
|
||||
from .bitchute import BitChuteIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
|
@ -842,7 +845,7 @@ class GenericIE(InfoExtractor):
|
|||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
# MTVSercices embed
|
||||
# MTVServices embed
|
||||
{
|
||||
'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
|
||||
'md5': 'ca1aef97695ef2c1d6973256a57e5252',
|
||||
|
@ -2761,11 +2764,9 @@ def _real_extract(self, url):
|
|||
return self.url_result(ustream_url, UstreamIE.ie_key())
|
||||
|
||||
# Look for embedded arte.tv player
|
||||
mobj = re.search(
|
||||
r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'ArteTVEmbed')
|
||||
arte_urls = ArteTVEmbedIE._extract_urls(webpage)
|
||||
if arte_urls:
|
||||
return self.playlist_from_matches(arte_urls, video_id, video_title)
|
||||
|
||||
# Look for embedded francetv player
|
||||
mobj = re.search(
|
||||
|
@ -3220,6 +3221,16 @@ def _real_extract(self, url):
|
|||
return self.playlist_from_matches(
|
||||
gedi_urls, video_id, video_title, ie=GediEmbedsIE.ie_key())
|
||||
|
||||
rcs_urls = RCSEmbedsIE._extract_urls(webpage)
|
||||
if rcs_urls:
|
||||
return self.playlist_from_matches(
|
||||
rcs_urls, video_id, video_title, ie=RCSEmbedsIE.ie_key())
|
||||
|
||||
bitchute_urls = BitChuteIE._extract_urls(webpage)
|
||||
if bitchute_urls:
|
||||
return self.playlist_from_matches(
|
||||
bitchute_urls, video_id, video_title, ie=BitChuteIE.ie_key())
|
||||
|
||||
# Look for HTML5 media
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||
if entries:
|
||||
|
|
|
@ -3,11 +3,13 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_parse_qs
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
try_get,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
@ -38,21 +40,10 @@ class GoogleDriveIE(InfoExtractor):
|
|||
# video can't be watched anonymously due to view count limit reached,
|
||||
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
|
||||
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
|
||||
'md5': 'bfbd670d03a470bb1e6d4a257adec12e',
|
||||
'info_dict': {
|
||||
'id': '0B-vUyvmDLdWDcEt4WjBqcmI2XzQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Annabelle Creation (2017)- Z.V1 [TH].MP4',
|
||||
}
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# video id is longer than 28 characters
|
||||
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
|
||||
'info_dict': {
|
||||
'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4',
|
||||
'duration': 189,
|
||||
},
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
||||
|
@ -171,23 +162,21 @@ def _get_automatic_captions(self, video_id, subtitles_id, hl):
|
|||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
'http://docs.google.com/file/d/%s' % video_id, video_id)
|
||||
video_info = compat_parse_qs(self._download_webpage(
|
||||
'https://drive.google.com/get_video_info',
|
||||
video_id, query={'docid': video_id}))
|
||||
|
||||
title = self._search_regex(
|
||||
r'"title"\s*,\s*"([^"]+)', webpage, 'title',
|
||||
default=None) or self._og_search_title(webpage)
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds',
|
||||
default=None))
|
||||
def get_value(key):
|
||||
return try_get(video_info, lambda x: x[key][0])
|
||||
|
||||
reason = get_value('reason')
|
||||
title = get_value('title')
|
||||
if not title and reason:
|
||||
raise ExtractorError(reason, expected=True)
|
||||
|
||||
formats = []
|
||||
fmt_stream_map = self._search_regex(
|
||||
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage,
|
||||
'fmt stream map', default='').split(',')
|
||||
fmt_list = self._search_regex(
|
||||
r'"fmt_list"\s*,\s*"([^"]+)', webpage,
|
||||
'fmt_list', default='').split(',')
|
||||
fmt_stream_map = (get_value('fmt_stream_map') or '').split(',')
|
||||
fmt_list = (get_value('fmt_list') or '').split(',')
|
||||
if fmt_stream_map and fmt_list:
|
||||
resolutions = {}
|
||||
for fmt in fmt_list:
|
||||
|
@ -257,19 +246,14 @@ def add_source_format(urlh):
|
|||
if urlh and urlh.headers.get('Content-Disposition'):
|
||||
add_source_format(urlh)
|
||||
|
||||
if not formats:
|
||||
reason = self._search_regex(
|
||||
r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
||||
if reason:
|
||||
raise ExtractorError(reason, expected=True)
|
||||
if not formats and reason:
|
||||
raise ExtractorError(reason, expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
hl = self._search_regex(
|
||||
r'"hl"\s*,\s*"([^"]+)', webpage, 'hl', default=None)
|
||||
hl = get_value('hl')
|
||||
subtitles_id = None
|
||||
ttsurl = self._search_regex(
|
||||
r'"ttsurl"\s*,\s*"([^"]+)', webpage, 'ttsurl', default=None)
|
||||
ttsurl = get_value('ttsurl')
|
||||
if ttsurl:
|
||||
# the video Id for subtitles will be the last value in the ttsurl
|
||||
# query string
|
||||
|
@ -281,8 +265,8 @@ def add_source_format(urlh):
|
|||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'duration': duration,
|
||||
'thumbnail': 'https://drive.google.com/thumbnail?id=' + video_id,
|
||||
'duration': int_or_none(get_value('length_seconds')),
|
||||
'formats': formats,
|
||||
'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
|
||||
'automatic_captions': self.extract_automatic_captions(
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
|
||||
|
||||
class InaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ina\.fr/(?:video|audio)/(?P<id>[A-Z0-9_]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?ina\.fr/(?:video|audio)/(?P<id>[A-Z0-9_]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
|
||||
'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
|
||||
|
@ -31,6 +31,9 @@ class InaIE(InfoExtractor):
|
|||
}, {
|
||||
'url': 'https://www.ina.fr/video/P16173408-video.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://m.ina.fr/video/I12055569',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
|
|
@ -54,7 +54,7 @@ class InfoQIE(BokeCCBaseIE):
|
|||
|
||||
def _extract_rtmp_video(self, webpage):
|
||||
# The server URL is hardcoded
|
||||
video_url = 'rtmpe://video.infoq.com/cfx/st/'
|
||||
video_url = 'rtmpe://videof.infoq.com/cfx/st/'
|
||||
|
||||
# Extract video URL
|
||||
encoded_id = self._search_regex(
|
||||
|
@ -86,17 +86,18 @@ def _extract_http_video(self, webpage):
|
|||
return [{
|
||||
'format_id': 'http_video',
|
||||
'url': http_video_url,
|
||||
'http_headers': {'Referer': 'https://www.infoq.com/'},
|
||||
}]
|
||||
|
||||
def _extract_http_audio(self, webpage, video_id):
|
||||
fields = self._hidden_inputs(webpage)
|
||||
fields = self._form_hidden_inputs('mp3Form', webpage)
|
||||
http_audio_url = fields.get('filename')
|
||||
if not http_audio_url:
|
||||
return []
|
||||
|
||||
# base URL is found in the Location header in the response returned by
|
||||
# GET https://www.infoq.com/mp3download.action?filename=... when logged in.
|
||||
http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url)
|
||||
http_audio_url = compat_urlparse.urljoin('http://ress.infoq.com/downloads/mp3downloads/', http_audio_url)
|
||||
http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage))
|
||||
|
||||
# audio file seem to be missing some times even if there is a download link
|
||||
|
|
|
@ -126,16 +126,23 @@ def _real_extract(self, url):
|
|||
uploader_id, like_count, comment_count, comments, height,
|
||||
width) = [None] * 11
|
||||
|
||||
shared_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\._sharedData\s*=\s*({.+?});',
|
||||
webpage, 'shared data', default='{}'),
|
||||
video_id, fatal=False)
|
||||
shared_data = try_get(webpage,
|
||||
(lambda x: self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.__additionalDataLoaded\(\'/(?:p|tv)/(?:[^/?#&]+)/\',({.+?})\);',
|
||||
x, 'additional data', default='{}'),
|
||||
video_id, fatal=False),
|
||||
lambda x: self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\._sharedData\s*=\s*({.+?});',
|
||||
x, 'shared data', default='{}'),
|
||||
video_id, fatal=False)['entry_data']['PostPage'][0]),
|
||||
None)
|
||||
if shared_data:
|
||||
media = try_get(
|
||||
shared_data,
|
||||
(lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'],
|
||||
lambda x: x['entry_data']['PostPage'][0]['media']),
|
||||
(lambda x: x['graphql']['shortcode_media'],
|
||||
lambda x: x['media']),
|
||||
dict)
|
||||
if media:
|
||||
video_url = media.get('video_url')
|
||||
|
@ -144,7 +151,7 @@ def _real_extract(self, url):
|
|||
description = try_get(
|
||||
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
|
||||
compat_str) or media.get('caption')
|
||||
thumbnail = media.get('display_src')
|
||||
thumbnail = media.get('display_src') or media.get('thumbnail_src')
|
||||
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
|
||||
uploader = media.get('owner', {}).get('full_name')
|
||||
uploader_id = media.get('owner', {}).get('username')
|
||||
|
|
|
@ -150,7 +150,7 @@ def run(self, target, ip, timestamp):
|
|||
elif function in other_functions:
|
||||
other_functions[function]()
|
||||
else:
|
||||
raise ExtractorError('Unknown funcion %s' % function)
|
||||
raise ExtractorError('Unknown function %s' % function)
|
||||
|
||||
return sdk.target
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
merge_dicts,
|
||||
parse_duration,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
url_or_none,
|
||||
xpath_with_ns,
|
||||
xpath_element,
|
||||
|
@ -280,12 +281,12 @@ def extract_subtitle(sub_url):
|
|||
class ITVBTCCIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch',
|
||||
'url': 'https://www.itv.com/btcc/articles/btcc-2019-brands-hatch-gp-race-action',
|
||||
'info_dict': {
|
||||
'id': 'btcc-2018-all-the-action-from-brands-hatch',
|
||||
'title': 'BTCC 2018: All the action from Brands Hatch',
|
||||
'id': 'btcc-2019-brands-hatch-gp-race-action',
|
||||
'title': 'BTCC 2019: Brands Hatch GP race action',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
'playlist_count': 12,
|
||||
}
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s'
|
||||
|
||||
|
@ -294,6 +295,16 @@ def _real_extract(self, url):
|
|||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
json_map = try_get(self._parse_json(self._html_search_regex(
|
||||
'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[^>]*>([^<]+)</script>', webpage, 'json_map'), playlist_id),
|
||||
lambda x: x['props']['pageProps']['article']['body']['content']) or []
|
||||
|
||||
# Discard empty objects
|
||||
video_ids = []
|
||||
for video in json_map:
|
||||
if video['data'].get('id'):
|
||||
video_ids.append(video['data']['id'])
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {
|
||||
|
@ -305,7 +316,7 @@ def _real_extract(self, url):
|
|||
'referrer': url,
|
||||
}),
|
||||
ie=BrightcoveNewIE.ie_key(), video_id=video_id)
|
||||
for video_id in re.findall(r'data-video-id=["\'](\d+)', webpage)]
|
||||
for video_id in video_ids]
|
||||
|
||||
title = self._og_search_title(webpage, fatal=False)
|
||||
|
||||
|
|
|
@ -64,7 +64,7 @@ def _real_extract(self, url):
|
|||
duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000)
|
||||
description = xpath_text(doc, 'ABSTRACT')
|
||||
thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME')
|
||||
createtion_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
|
||||
creation_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
|
||||
|
||||
quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
|
||||
formats = []
|
||||
|
@ -84,5 +84,5 @@ def _real_extract(self, url):
|
|||
'duration': duration,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': createtion_time,
|
||||
'timestamp': creation_time,
|
||||
}
|
||||
|
|
|
@ -36,6 +36,9 @@ class LA7IE(InfoExtractor):
|
|||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
if not url.startswith('http'):
|
||||
url = '%s//%s' % (self.http_scheme(), url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_data = self._search_regex(
|
||||
|
|
91
youtube_dlc/extractor/lbry.py
Normal file
91
youtube_dlc/extractor/lbry.py
Normal file
|
@ -0,0 +1,91 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class LBRYIE(InfoExtractor):
|
||||
IE_NAME = 'lbry.tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/(?P<id>@[^:]+:[0-9a-z]+/[^:]+:[0-9a-z])'
|
||||
_TESTS = [{
|
||||
# Video
|
||||
'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1',
|
||||
'md5': '65bd7ec1f6744ada55da8e4c48a2edf9',
|
||||
'info_dict': {
|
||||
'id': '17f983b61f53091fb8ea58a9c56804e4ff8cff4d',
|
||||
'ext': 'mp4',
|
||||
'title': 'First day in LBRY? Start HERE!',
|
||||
'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
|
||||
'timestamp': 1595694354,
|
||||
'upload_date': '20200725',
|
||||
}
|
||||
}, {
|
||||
# Audio
|
||||
'url': 'https://lbry.tv/@LBRYFoundation:0/Episode-1:e',
|
||||
'md5': 'c94017d3eba9b49ce085a8fad6b98d00',
|
||||
'info_dict': {
|
||||
'id': 'e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
|
||||
'ext': 'mp3',
|
||||
'title': 'The LBRY Foundation Community Podcast Episode 1 - Introduction, Streaming on LBRY, Transcoding',
|
||||
'description': 'md5:661ac4f1db09f31728931d7b88807a61',
|
||||
'timestamp': 1591312601,
|
||||
'upload_date': '20200604',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': "https://odysee.com/@ScammerRevolts:b0/I-SYSKEY'D-THE-SAME-SCAMMERS-3-TIMES!:b",
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _call_api_proxy(self, method, display_id, params):
|
||||
return self._download_json(
|
||||
'https://api.lbry.tv/api/v1/proxy', display_id,
|
||||
headers={'Content-Type': 'application/json-rpc'},
|
||||
data=json.dumps({
|
||||
'method': method,
|
||||
'params': params,
|
||||
}).encode())['result']
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url).replace(':', '#')
|
||||
uri = 'lbry://' + display_id
|
||||
result = self._call_api_proxy(
|
||||
'resolve', display_id, {'urls': [uri]})[uri]
|
||||
result_value = result['value']
|
||||
if result_value.get('stream_type') not in ('video', 'audio'):
|
||||
raise ExtractorError('Unsupported URL', expected=True)
|
||||
streaming_url = self._call_api_proxy(
|
||||
'get', display_id, {'uri': uri})['streaming_url']
|
||||
source = result_value.get('source') or {}
|
||||
media = result_value.get('video') or result_value.get('audio') or {}
|
||||
signing_channel = result_value.get('signing_channel') or {}
|
||||
|
||||
return {
|
||||
'id': result['claim_id'],
|
||||
'title': result_value['title'],
|
||||
'thumbnail': try_get(result_value, lambda x: x['thumbnail']['url'], compat_str),
|
||||
'description': result_value.get('description'),
|
||||
'license': result_value.get('license'),
|
||||
'timestamp': int_or_none(result.get('timestamp')),
|
||||
'tags': result_value.get('tags'),
|
||||
'width': int_or_none(media.get('width')),
|
||||
'height': int_or_none(media.get('height')),
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'channel': signing_channel.get('name'),
|
||||
'channel_id': signing_channel.get('claim_id'),
|
||||
'ext': determine_ext(source.get('name')) or mimetype2ext(source.get('media_type')),
|
||||
'filesize': int_or_none(source.get('size')),
|
||||
'url': streaming_url,
|
||||
}
|
|
@ -5,28 +5,26 @@
|
|||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
remove_end,
|
||||
clean_html,
|
||||
merge_dicts,
|
||||
)
|
||||
|
||||
|
||||
class LRTIE(InfoExtractor):
|
||||
IE_NAME = 'lrt.lt'
|
||||
_VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?lrt\.lt(?P<path>/mediateka/irasas/(?P<id>[0-9]+))'
|
||||
_TESTS = [{
|
||||
# m3u8 download
|
||||
'url': 'http://www.lrt.lt/mediateka/irasas/54391/',
|
||||
'md5': 'fe44cf7e4ab3198055f2c598fc175cb0',
|
||||
'url': 'https://www.lrt.lt/mediateka/irasas/2000127261/greita-ir-gardu-sicilijos-ikvepta-klasikiniu-makaronu-su-baklazanais-vakariene',
|
||||
'md5': '85cb2bb530f31d91a9c65b479516ade4',
|
||||
'info_dict': {
|
||||
'id': '54391',
|
||||
'id': '2000127261',
|
||||
'ext': 'mp4',
|
||||
'title': 'Septynios Kauno dienos',
|
||||
'description': 'md5:24d84534c7dc76581e59f5689462411a',
|
||||
'duration': 1783,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'title': 'Greita ir gardu: Sicilijos įkvėpta klasikinių makaronų su baklažanais vakarienė',
|
||||
'description': 'md5:ad7d985f51b0dc1489ba2d76d7ed47fa',
|
||||
'duration': 3035,
|
||||
'timestamp': 1604079000,
|
||||
'upload_date': '20201030',
|
||||
},
|
||||
}, {
|
||||
# direct mp3 download
|
||||
|
@ -43,52 +41,35 @@ class LRTIE(InfoExtractor):
|
|||
},
|
||||
}]
|
||||
|
||||
def _extract_js_var(self, webpage, var_name, default):
|
||||
return self._search_regex(
|
||||
r'%s\s*=\s*(["\'])((?:(?!\1).)+)\1' % var_name,
|
||||
webpage, var_name.replace('_', ' '), default, group=2)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
path, video_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' - LRT')
|
||||
media_url = self._extract_js_var(webpage, 'main_url', path)
|
||||
media = self._download_json(self._extract_js_var(
|
||||
webpage, 'media_info_url',
|
||||
'https://www.lrt.lt/servisai/stream_url/vod/media_info/'),
|
||||
video_id, query={'url': media_url})
|
||||
jw_data = self._parse_jwplayer_data(
|
||||
media['playlist_item'], video_id, base_url=url)
|
||||
|
||||
formats = []
|
||||
for _, file_url in re.findall(
|
||||
r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
|
||||
ext = determine_ext(file_url)
|
||||
if ext not in ('m3u8', 'mp3'):
|
||||
json_ld_data = self._search_json_ld(webpage, video_id)
|
||||
|
||||
tags = []
|
||||
for tag in (media.get('tags') or []):
|
||||
tag_name = tag.get('name')
|
||||
if not tag_name:
|
||||
continue
|
||||
# mp3 served as m3u8 produces stuttered media file
|
||||
if ext == 'm3u8' and '.mp3' in file_url:
|
||||
continue
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
fatal=False))
|
||||
elif ext == 'mp3':
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
tags.append(tag_name)
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'var\s+record_len\s*=\s*(["\'])(?P<duration>[0-9]+:[0-9]+:[0-9]+)\1',
|
||||
webpage, 'duration', default=None, group='duration'))
|
||||
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'<div[^>]+class=(["\']).*?record-desc-seen.*?\1[^>]*>(?P<count>.+?)</div>',
|
||||
webpage, 'view count', fatal=False, group='count'))
|
||||
like_count = int_or_none(self._search_regex(
|
||||
r'<span[^>]+id=(["\'])flikesCount.*?\1>(?P<count>\d+)<',
|
||||
webpage, 'like count', fatal=False, group='count'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
clean_info = {
|
||||
'description': clean_html(media.get('content')),
|
||||
'tags': tags,
|
||||
}
|
||||
|
||||
return merge_dicts(clean_info, jw_data, json_ld_data)
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
parse_duration,
|
||||
remove_end,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
|
@ -93,6 +94,14 @@ class MailRuIE(InfoExtractor):
|
|||
{
|
||||
'url': 'https://my.mail.ru//list//sinyutin10/video/_myvideo/4.html',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://my.mail.ru/mail/cloud-strife/video/embed/Games/2009',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://videoapi.my.mail.ru/videos/embed/mail/cloud-strife/Games/2009.html',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
|
@ -110,7 +119,7 @@ def _real_extract(self, url):
|
|||
webpage = self._download_webpage(url, video_id)
|
||||
page_config = self._parse_json(self._search_regex([
|
||||
r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>',
|
||||
r'(?s)"video":\s*(\{.+?\}),'],
|
||||
r'(?s)"video":\s*({.+?}),'],
|
||||
webpage, 'page config', default='{}'), video_id, fatal=False)
|
||||
if page_config:
|
||||
meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl') or page_config.get('metadataUrl')
|
||||
|
@ -121,7 +130,7 @@ def _real_extract(self, url):
|
|||
|
||||
# fix meta_url if missing the host address
|
||||
if re.match(r'^\/\+\/', meta_url):
|
||||
meta_url = 'https://my.mail.ru' + meta_url
|
||||
meta_url = urljoin('https://my.mail.ru', meta_url)
|
||||
|
||||
if meta_url:
|
||||
video_data = self._download_json(
|
||||
|
|
|
@ -1,10 +1,16 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import merge_dicts
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
dict_get,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class MallTVIE(InfoExtractor):
|
||||
|
@ -17,7 +23,7 @@ class MallTVIE(InfoExtractor):
|
|||
'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||
'ext': 'mp4',
|
||||
'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
|
||||
'description': 'md5:25fc0ec42a72ba602b602c683fa29deb',
|
||||
'description': 'md5:db7d5744a4bd4043d9d98324aa72ab35',
|
||||
'duration': 216,
|
||||
'timestamp': 1538870400,
|
||||
'upload_date': '20181007',
|
||||
|
@ -37,20 +43,46 @@ def _real_extract(self, url):
|
|||
webpage = self._download_webpage(
|
||||
url, display_id, headers=self.geo_verification_headers())
|
||||
|
||||
SOURCE_RE = r'(<source[^>]+\bsrc=(?:(["\'])(?:(?!\2).)+|[^\s]+)/(?P<id>[\da-z]+)/index)\b'
|
||||
video = self._parse_json(self._search_regex(
|
||||
r'videoObject\s*=\s*JSON\.parse\(JSON\.stringify\(({.+?})\)\);',
|
||||
webpage, 'video object'), display_id)
|
||||
video_source = video['VideoSource']
|
||||
video_id = self._search_regex(
|
||||
SOURCE_RE, webpage, 'video id', group='id')
|
||||
r'/([\da-z]+)/index\b', video_source, 'video id')
|
||||
|
||||
media = self._parse_html5_media_entries(
|
||||
url, re.sub(SOURCE_RE, r'\1.m3u8', webpage), video_id,
|
||||
m3u8_id='hls', m3u8_entry_protocol='m3u8_native')[0]
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_source + '.m3u8', video_id, 'mp4', 'm3u8_native')
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for s in (video.get('Subtitles') or {}):
|
||||
s_url = s.get('Url')
|
||||
if not s_url:
|
||||
continue
|
||||
subtitles.setdefault(s.get('Language') or 'cz', []).append({
|
||||
'url': s_url,
|
||||
})
|
||||
|
||||
entity_counts = video.get('EntityCounts') or {}
|
||||
|
||||
def get_count(k):
|
||||
v = entity_counts.get(k + 's') or {}
|
||||
return int_or_none(dict_get(v, ('Count', 'StrCount')))
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
return merge_dicts(media, info, {
|
||||
return merge_dicts({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': self._og_search_title(webpage, default=None) or display_id,
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
})
|
||||
'title': video.get('Title'),
|
||||
'description': clean_html(video.get('Description')),
|
||||
'thumbnail': video.get('ThumbnailUrl'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'duration': int_or_none(video.get('DurationSeconds')) or parse_duration(video.get('Duration')),
|
||||
'view_count': get_count('View'),
|
||||
'like_count': get_count('Like'),
|
||||
'dislike_count': get_count('Dislike'),
|
||||
'average_rating': float_or_none(try_get(video, lambda x: x['EntityRating']['AvarageRate'])),
|
||||
'comment_count': get_count('Comment'),
|
||||
}, info)
|
||||
|
|
131
youtube_dlc/extractor/medaltv.py
Normal file
131
youtube_dlc/extractor/medaltv.py
Normal file
|
@ -0,0 +1,131 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class MedalTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://medal.tv/clips/34934644/3Is9zyGMoBMr',
|
||||
'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
|
||||
'info_dict': {
|
||||
'id': '34934644',
|
||||
'ext': 'mp4',
|
||||
'title': 'Quad Cold',
|
||||
'description': 'Medal,https://medal.tv/desktop/',
|
||||
'uploader': 'MowgliSB',
|
||||
'timestamp': 1603165266,
|
||||
'upload_date': '20201020',
|
||||
'uploader_id': 10619174,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://medal.tv/clips/36787208',
|
||||
'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
|
||||
'info_dict': {
|
||||
'id': '36787208',
|
||||
'ext': 'mp4',
|
||||
'title': 'u tk me i tk u bigger',
|
||||
'description': 'Medal,https://medal.tv/desktop/',
|
||||
'uploader': 'Mimicc',
|
||||
'timestamp': 1605580939,
|
||||
'upload_date': '20201117',
|
||||
'uploader_id': 5156321,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
hydration_data = self._parse_json(self._search_regex(
|
||||
r'<script[^>]*>\s*(?:var\s*)?hydrationData\s*=\s*({.+?})\s*</script>',
|
||||
webpage, 'hydration data', default='{}'), video_id)
|
||||
|
||||
clip = try_get(
|
||||
hydration_data, lambda x: x['clips'][video_id], dict) or {}
|
||||
if not clip:
|
||||
raise ExtractorError(
|
||||
'Could not find video information.', video_id=video_id)
|
||||
|
||||
title = clip['contentTitle']
|
||||
|
||||
source_width = int_or_none(clip.get('sourceWidth'))
|
||||
source_height = int_or_none(clip.get('sourceHeight'))
|
||||
|
||||
aspect_ratio = source_width / source_height if source_width and source_height else 16 / 9
|
||||
|
||||
def add_item(container, item_url, height, id_key='format_id', item_id=None):
|
||||
item_id = item_id or '%dp' % height
|
||||
if item_id not in item_url:
|
||||
return
|
||||
width = int(round(aspect_ratio * height))
|
||||
container.append({
|
||||
'url': item_url,
|
||||
id_key: item_id,
|
||||
'width': width,
|
||||
'height': height
|
||||
})
|
||||
|
||||
formats = []
|
||||
thumbnails = []
|
||||
for k, v in clip.items():
|
||||
if not (v and isinstance(v, compat_str)):
|
||||
continue
|
||||
mobj = re.match(r'(contentUrl|thumbnail)(?:(\d+)p)?$', k)
|
||||
if not mobj:
|
||||
continue
|
||||
prefix = mobj.group(1)
|
||||
height = int_or_none(mobj.group(2))
|
||||
if prefix == 'contentUrl':
|
||||
add_item(
|
||||
formats, v, height or source_height,
|
||||
item_id=None if height else 'source')
|
||||
elif prefix == 'thumbnail':
|
||||
add_item(thumbnails, v, height, 'id')
|
||||
|
||||
error = clip.get('error')
|
||||
if not formats and error:
|
||||
if error == 404:
|
||||
raise ExtractorError(
|
||||
'That clip does not exist.',
|
||||
expected=True, video_id=video_id)
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'An unknown error occurred ({0}).'.format(error),
|
||||
video_id=video_id)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
# Necessary because the id of the author is not known in advance.
|
||||
# Won't raise an issue if no profile can be found as this is optional.
|
||||
author = try_get(
|
||||
hydration_data, lambda x: list(x['profiles'].values())[0], dict) or {}
|
||||
author_id = str_or_none(author.get('id'))
|
||||
author_url = 'https://medal.tv/users/{0}'.format(author_id) if author_id else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': clip.get('contentDescription'),
|
||||
'uploader': author.get('displayName'),
|
||||
'timestamp': float_or_none(clip.get('created'), 1000),
|
||||
'uploader_id': author_id,
|
||||
'uploader_url': author_url,
|
||||
'duration': int_or_none(clip.get('videoLengthSeconds')),
|
||||
'view_count': int_or_none(clip.get('views')),
|
||||
'like_count': int_or_none(clip.get('likes')),
|
||||
'comment_count': int_or_none(clip.get('comments')),
|
||||
}
|
|
@ -17,9 +17,8 @@
|
|||
|
||||
|
||||
class MGTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||
IE_DESC = '芒果TV'
|
||||
_GEO_COUNTRIES = ['CN']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
|
||||
|
@ -34,14 +33,18 @@ class MGTVIE(InfoExtractor):
|
|||
}, {
|
||||
'url': 'http://www.mgtv.com/b/301817/3826653.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://w.mgtv.com/b/301817/3826653.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
tk2 = base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1]
|
||||
try:
|
||||
api_data = self._download_json(
|
||||
'https://pcweb.api.mgtv.com/player/video', video_id, query={
|
||||
'tk2': base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1],
|
||||
'tk2': tk2,
|
||||
'video_id': video_id,
|
||||
}, headers=self.geo_verification_headers())['data']
|
||||
except ExtractorError as e:
|
||||
|
@ -56,6 +59,7 @@ def _real_extract(self, url):
|
|||
stream_data = self._download_json(
|
||||
'https://pcweb.api.mgtv.com/player/getSource', video_id, query={
|
||||
'pm2': api_data['atc']['pm2'],
|
||||
'tk2': tk2,
|
||||
'video_id': video_id,
|
||||
}, headers=self.geo_verification_headers())['data']
|
||||
stream_domain = stream_data['stream_domain'][0]
|
||||
|
|
|
@ -289,7 +289,7 @@ def _extract_new_triforce_mgid(self, webpage, url='', video_id=None):
|
|||
|
||||
return mgid
|
||||
|
||||
def _extract_mgid(self, webpage, url, data_zone=None):
|
||||
def _extract_mgid(self, webpage, url, title=None, data_zone=None):
|
||||
try:
|
||||
# the url can be http://media.mtvnservices.com/fb/{mgid}.swf
|
||||
# or http://media.mtvnservices.com/{mgid}
|
||||
|
@ -300,7 +300,8 @@ def _extract_mgid(self, webpage, url, data_zone=None):
|
|||
except RegexNotFoundError:
|
||||
mgid = None
|
||||
|
||||
title = self._match_id(url)
|
||||
if not title:
|
||||
title = url_basename(url)
|
||||
|
||||
try:
|
||||
window_data = self._parse_json(self._search_regex(
|
||||
|
@ -336,7 +337,7 @@ def _extract_mgid(self, webpage, url, data_zone=None):
|
|||
def _real_extract(self, url):
|
||||
title = url_basename(url)
|
||||
webpage = self._download_webpage(url, title)
|
||||
mgid = self._extract_mgid(webpage, url)
|
||||
mgid = self._extract_mgid(webpage, url, title=title)
|
||||
videos_info = self._get_videos_info(mgid, url=url)
|
||||
return videos_info
|
||||
|
||||
|
@ -402,6 +403,18 @@ class MTVIE(MTVServicesInfoExtractor):
|
|||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def extract_child_with_type(parent, t):
|
||||
children = parent['children']
|
||||
return next(c for c in children if c.get('type') == t)
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
data = self._parse_json(self._search_regex(
|
||||
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
|
||||
main_container = self.extract_child_with_type(data, 'MainContainer')
|
||||
video_player = self.extract_child_with_type(main_container, 'VideoPlayer')
|
||||
return video_player['props']['media']['video']['config']['uri']
|
||||
|
||||
|
||||
class MTVJapanIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'mtvjapan'
|
||||
|
|
|
@ -10,7 +10,6 @@
|
|||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
|
@ -394,8 +393,8 @@ def _real_extract(self, url):
|
|||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data = self._parse_json(self._search_regex(
|
||||
r'window\.__data\s*=\s*({.+});', webpage,
|
||||
'bootstrap json'), video_id, js_to_json)
|
||||
r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>',
|
||||
webpage, 'bootstrap json'), video_id)['props']['initialState']
|
||||
video_data = try_get(data, lambda x: x['video']['current'], dict)
|
||||
if not video_data:
|
||||
video_data = data['article']['content'][0]['primaryMedia']['video']
|
||||
|
|
|
@ -82,6 +82,29 @@ class NDRIE(NDRBaseIE):
|
|||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# with subtitles
|
||||
'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html',
|
||||
'info_dict': {
|
||||
'id': 'extra18674',
|
||||
'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring',
|
||||
'ext': 'mp4',
|
||||
'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
|
||||
'description': 'md5:42ee53990a715eaaf4dc7f13a3bd56c6',
|
||||
'uploader': 'ndrtv',
|
||||
'upload_date': '20201113',
|
||||
'duration': 1749,
|
||||
'subtitles': {
|
||||
'de': [{
|
||||
'ext': 'ttml',
|
||||
'url': r're:^https://www\.ndr\.de.+',
|
||||
}],
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
|
||||
'only_matching': True,
|
||||
|
@ -242,6 +265,20 @@ def _real_extract(self, url):
|
|||
'preference': quality_key(thumbnail.get('quality')),
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
tracks = config.get('tracks')
|
||||
if tracks and isinstance(tracks, list):
|
||||
for track in tracks:
|
||||
if not isinstance(track, dict):
|
||||
continue
|
||||
track_url = urljoin(url, track.get('src'))
|
||||
if not track_url:
|
||||
continue
|
||||
subtitles.setdefault(track.get('srclang') or 'de', []).append({
|
||||
'url': track_url,
|
||||
'ext': 'ttml',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
|
@ -251,6 +288,7 @@ def _real_extract(self, url):
|
|||
'duration': duration,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -13,17 +13,16 @@
|
|||
|
||||
|
||||
class NetzkinoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/(?P<category>[^/]+)/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/[^/]+/(?P<id>[^/]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.netzkino.de/#!/scifikino/rakete-zum-mond',
|
||||
_TESTS = [{
|
||||
'url': 'https://www.netzkino.de/#!/scifikino/rakete-zum-mond',
|
||||
'md5': '92a3f8b76f8d7220acce5377ea5d4873',
|
||||
'info_dict': {
|
||||
'id': 'rakete-zum-mond',
|
||||
'ext': 'mp4',
|
||||
'title': 'Rakete zum Mond (Endstation Mond, Destination Moon)',
|
||||
'comments': 'mincount:3',
|
||||
'description': 'md5:1eddeacc7e62d5a25a2d1a7290c64a28',
|
||||
'title': 'Rakete zum Mond \u2013 Jules Verne',
|
||||
'description': 'md5:f0a8024479618ddbfa450ff48ffa6c60',
|
||||
'upload_date': '20120813',
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
'timestamp': 1344858571,
|
||||
|
@ -32,17 +31,30 @@ class NetzkinoIE(InfoExtractor):
|
|||
'params': {
|
||||
'skip_download': 'Download only works from Germany',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.netzkino.de/#!/filme/dr-jekyll-mrs-hyde-2',
|
||||
'md5': 'c7728b2dadd04ff6727814847a51ef03',
|
||||
'info_dict': {
|
||||
'id': 'dr-jekyll-mrs-hyde-2',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dr. Jekyll & Mrs. Hyde 2',
|
||||
'description': 'md5:c2e9626ebd02de0a794b95407045d186',
|
||||
'upload_date': '20190130',
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
'timestamp': 1548849437,
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Download only works from Germany',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
category_id = mobj.group('category')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
api_url = 'http://api.netzkino.de.simplecache.net/capi-2.0a/categories/%s.json?d=www' % category_id
|
||||
api_info = self._download_json(api_url, video_id)
|
||||
info = next(
|
||||
p for p in api_info['posts'] if p['slug'] == video_id)
|
||||
api_url = 'https://api.netzkino.de.simplecache.net/capi-2.0a/movies/%s.json?d=www' % video_id
|
||||
info = self._download_json(api_url, video_id)
|
||||
custom_fields = info['custom_fields']
|
||||
|
||||
production_js = self._download_webpage(
|
||||
|
@ -67,23 +79,12 @@ def _real_extract(self, url):
|
|||
} for key, tpl in templates.items()]
|
||||
self._sort_formats(formats)
|
||||
|
||||
comments = [{
|
||||
'timestamp': parse_iso8601(c.get('date'), delimiter=' '),
|
||||
'id': c['id'],
|
||||
'author': c['name'],
|
||||
'html': c['content'],
|
||||
'parent': 'root' if c.get('parent', 0) == 0 else c['parent'],
|
||||
} for c in info.get('comments', [])]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'comments': comments,
|
||||
'title': info['title'],
|
||||
'age_limit': int_or_none(custom_fields.get('FSK')[0]),
|
||||
'timestamp': parse_iso8601(info.get('date'), delimiter=' '),
|
||||
'description': clean_html(info.get('content')),
|
||||
'thumbnail': info.get('thumbnail'),
|
||||
'playlist_title': api_info.get('title'),
|
||||
'playlist_id': category_id,
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
|
@ -20,22 +21,22 @@ class NewgroundsIE(InfoExtractor):
|
|||
'info_dict': {
|
||||
'id': '549479',
|
||||
'ext': 'mp3',
|
||||
'title': 'B7 - BusMode',
|
||||
'title': 'Burn7 - B7 - BusMode',
|
||||
'uploader': 'Burn7',
|
||||
'timestamp': 1378878540,
|
||||
'upload_date': '20130911',
|
||||
'duration': 143,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.newgrounds.com/portal/view/673111',
|
||||
'md5': '3394735822aab2478c31b1004fe5e5bc',
|
||||
'url': 'https://www.newgrounds.com/portal/view/1',
|
||||
'md5': 'fbfb40e2dc765a7e830cb251d370d981',
|
||||
'info_dict': {
|
||||
'id': '673111',
|
||||
'id': '1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dancin',
|
||||
'uploader': 'Squirrelman82',
|
||||
'timestamp': 1460256780,
|
||||
'upload_date': '20160410',
|
||||
'title': 'Brian-Beaton - Scrotum 1',
|
||||
'uploader': 'Brian-Beaton',
|
||||
'timestamp': 955064100,
|
||||
'upload_date': '20000406',
|
||||
},
|
||||
}, {
|
||||
# source format unavailable, additional mp4 formats
|
||||
|
@ -43,7 +44,7 @@ class NewgroundsIE(InfoExtractor):
|
|||
'info_dict': {
|
||||
'id': '689400',
|
||||
'ext': 'mp4',
|
||||
'title': 'ZTV News Episode 8',
|
||||
'title': 'Bennettthesage - ZTV News Episode 8',
|
||||
'uploader': 'BennettTheSage',
|
||||
'timestamp': 1487965140,
|
||||
'upload_date': '20170224',
|
||||
|
@ -55,42 +56,73 @@ class NewgroundsIE(InfoExtractor):
|
|||
|
||||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
|
||||
formats = []
|
||||
uploader = None
|
||||
webpage = self._download_webpage(url, media_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>([^>]+)</title>', webpage, 'title')
|
||||
|
||||
media_url = self._parse_json(self._search_regex(
|
||||
r'"url"\s*:\s*("[^"]+"),', webpage, ''), media_id)
|
||||
media_url_string = self._search_regex(
|
||||
r'"url"\s*:\s*("[^"]+"),', webpage, 'media url', default=None, fatal=False)
|
||||
|
||||
formats = [{
|
||||
'url': media_url,
|
||||
'format_id': 'source',
|
||||
'quality': 1,
|
||||
}]
|
||||
if media_url_string:
|
||||
media_url = self._parse_json(media_url_string, media_id)
|
||||
formats = [{
|
||||
'url': media_url,
|
||||
'format_id': 'source',
|
||||
'quality': 1,
|
||||
}]
|
||||
|
||||
max_resolution = int_or_none(self._search_regex(
|
||||
r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution',
|
||||
default=None))
|
||||
if max_resolution:
|
||||
url_base = media_url.rpartition('.')[0]
|
||||
for resolution in (360, 720, 1080):
|
||||
if resolution > max_resolution:
|
||||
break
|
||||
formats.append({
|
||||
'url': '%s.%dp.mp4' % (url_base, resolution),
|
||||
'format_id': '%dp' % resolution,
|
||||
'height': resolution,
|
||||
})
|
||||
max_resolution = int_or_none(self._search_regex(
|
||||
r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution',
|
||||
default=None))
|
||||
if max_resolution:
|
||||
url_base = media_url.rpartition('.')[0]
|
||||
for resolution in (360, 720, 1080):
|
||||
if resolution > max_resolution:
|
||||
break
|
||||
formats.append({
|
||||
'url': '%s.%dp.mp4' % (url_base, resolution),
|
||||
'format_id': '%dp' % resolution,
|
||||
'height': resolution,
|
||||
})
|
||||
else:
|
||||
video_id = int_or_none(self._search_regex(
|
||||
r'data-movie-id=\\"([0-9]+)\\"', webpage, ''))
|
||||
if not video_id:
|
||||
raise ExtractorError('Could not extract media data')
|
||||
|
||||
url_video_data = 'https://www.newgrounds.com/portal/video/%s' % video_id
|
||||
headers = {
|
||||
'Accept': 'application/json',
|
||||
'Referer': url,
|
||||
'X-Requested-With': 'XMLHttpRequest'
|
||||
}
|
||||
json_video = self._download_json(url_video_data, video_id, headers=headers, fatal=False)
|
||||
if not json_video:
|
||||
raise ExtractorError('Could not fetch media data')
|
||||
|
||||
uploader = json_video.get('author')
|
||||
title = json_video.get('title')
|
||||
media_formats = json_video.get('sources', [])
|
||||
for media_format in media_formats:
|
||||
media_sources = media_formats[media_format]
|
||||
for source in media_sources:
|
||||
formats.append({
|
||||
'format_id': media_format,
|
||||
'quality': int_or_none(media_format[:-1]),
|
||||
'url': source.get('src')
|
||||
})
|
||||
|
||||
self._check_formats(formats, media_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
(r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*Author\s*</em>',
|
||||
r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader',
|
||||
fatal=False)
|
||||
if not uploader:
|
||||
uploader = self._html_search_regex(
|
||||
(r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*(?:Author|Artist)\s*</em>',
|
||||
r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader',
|
||||
fatal=False)
|
||||
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
(r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+</dd>\s*<dd>[^<]+)',
|
||||
|
@ -109,6 +141,9 @@ def _real_extract(self, url):
|
|||
if '<dd>Song' in webpage:
|
||||
formats[0]['vcodec'] = 'none'
|
||||
|
||||
if uploader:
|
||||
title = "%s - %s" % (uploader, title)
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'title': title,
|
||||
|
|
167
youtube_dlc/extractor/nitter.py
Normal file
167
youtube_dlc/extractor/nitter.py
Normal file
|
@ -0,0 +1,167 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
parse_count,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
remove_end,
|
||||
determine_ext,
|
||||
)
|
||||
import re
|
||||
|
||||
|
||||
class NitterIE(InfoExtractor):
|
||||
# Taken from https://github.com/zedeus/nitter/wiki/Instances
|
||||
INSTANCES = ('nitter.net',
|
||||
'nitter.snopyta.org',
|
||||
'nitter.42l.fr',
|
||||
'nitter.nixnet.services',
|
||||
'nitter.13ad.de',
|
||||
'nitter.pussthecat.org',
|
||||
'nitter.mastodont.cat',
|
||||
'nitter.dark.fail',
|
||||
'nitter.tedomum.net',
|
||||
'nitter.cattube.org',
|
||||
'nitter.fdn.fr',
|
||||
'nitter.1d4.us',
|
||||
'nitter.kavin.rocks',
|
||||
'tweet.lambda.dance',
|
||||
'nitter.cc',
|
||||
'nitter.weaponizedhumiliation.com',
|
||||
'3nzoldnxplag42gqjs23xvghtzf6t6yzssrtytnntc6ppc7xxuoneoad.onion',
|
||||
'nitter.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd.onion',
|
||||
'nitterlgj3n5fgwesu3vxc5h67ruku33nqaoeoocae2mvlzhsu6k7fqd.onion')
|
||||
|
||||
_INSTANCES_RE = '(?:' + '|'.join([re.escape(instance) for instance in INSTANCES]) + ')'
|
||||
_VALID_URL = r'https?://%(instance)s/(?P<uploader_id>.+)/status/(?P<id>[0-9]+)(#.)?' % {'instance': _INSTANCES_RE}
|
||||
current_instance = INSTANCES[0] # the test and official instance
|
||||
_TESTS = [
|
||||
{
|
||||
# GIF (wrapped in mp4)
|
||||
'url': 'https://' + current_instance + '/firefox/status/1314279897502629888#m',
|
||||
'info_dict': {
|
||||
'id': '1314279897502629888',
|
||||
'ext': 'mp4',
|
||||
'title': 'Firefox 🔥 - You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg #UnfckTheInternet',
|
||||
'description': 'You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg #UnfckTheInternet',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Firefox 🔥',
|
||||
'uploader_id': 'firefox',
|
||||
'uploader_url': 'https://' + current_instance + '/firefox',
|
||||
'upload_date': '20201008',
|
||||
'timestamp': 1602183720,
|
||||
},
|
||||
}, { # normal video
|
||||
'url': 'https://' + current_instance + '/Le___Doc/status/1299715685392756737#m',
|
||||
'info_dict': {
|
||||
'id': '1299715685392756737',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le Doc - "Je ne prédis jamais rien" D Raoult, Août 2020...',
|
||||
'description': '"Je ne prédis jamais rien" D Raoult, Août 2020...',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Le Doc',
|
||||
'uploader_id': 'Le___Doc',
|
||||
'uploader_url': 'https://' + current_instance + '/Le___Doc',
|
||||
'upload_date': '20200829',
|
||||
'timestamp': 1598711341,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, { # video embed in a "Streaming Political Ads" box
|
||||
'url': 'https://' + current_instance + '/mozilla/status/1321147074491092994#m',
|
||||
'info_dict': {
|
||||
'id': '1321147074491092994',
|
||||
'ext': 'mp4',
|
||||
'title': "Mozilla - Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows? This isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. Learn more ➡️ https://mzl.la/StreamingAds",
|
||||
'description': "Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows? This isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. Learn more ➡️ https://mzl.la/StreamingAds",
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Mozilla',
|
||||
'uploader_id': 'mozilla',
|
||||
'uploader_url': 'https://' + current_instance + '/mozilla',
|
||||
'upload_date': '20201027',
|
||||
'timestamp': 1603820982
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
base_url = parsed_url.scheme + '://' + parsed_url.netloc
|
||||
|
||||
self._set_cookie(parsed_url.netloc, 'hlsPlayback', 'on')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = base_url + self._html_search_regex(r'(?:<video[^>]+data-url|<source[^>]+src)="([^"]+)"', webpage, 'video url')
|
||||
ext = determine_ext(video_url)
|
||||
|
||||
if ext == 'unknown_video':
|
||||
formats = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
|
||||
else:
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'ext': ext
|
||||
}]
|
||||
|
||||
title = (
|
||||
self._og_search_description(webpage).replace('\n', ' ')
|
||||
or self._html_search_regex(r'<div class="tweet-content[^>]+>([^<]+)</div>', webpage, 'title'))
|
||||
description = title
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
uploader_id = (
|
||||
mobj.group('uploader_id')
|
||||
or self._html_search_regex(r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False))
|
||||
|
||||
if uploader_id:
|
||||
uploader_url = base_url + '/' + uploader_id
|
||||
|
||||
uploader = self._html_search_regex(r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False)
|
||||
|
||||
if uploader:
|
||||
title = uploader + ' - ' + title
|
||||
|
||||
view_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-play[^>]*></span>\s([^<]+)</div>', webpage, 'view count', fatal=False))
|
||||
like_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-heart[^>]*></span>\s([^<]+)</div>', webpage, 'like count', fatal=False))
|
||||
repost_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-retweet[^>]*></span>\s([^<]+)</div>', webpage, 'repost count', fatal=False))
|
||||
comment_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-comment[^>]*></span>\s([^<]+)</div>', webpage, 'repost count', fatal=False))
|
||||
|
||||
thumbnail = base_url + (self._html_search_meta('og:image', webpage, 'thumbnail url')
|
||||
or self._html_search_regex(r'<video[^>]+poster="([^"]+)"', webpage, 'thumbnail url', fatal=False))
|
||||
|
||||
thumbnail = remove_end(thumbnail, '%3Asmall') # if parsed with regex, it should contain this
|
||||
|
||||
thumbnails = []
|
||||
thumbnail_ids = ('thumb', 'small', 'large', 'medium', 'orig')
|
||||
for id in thumbnail_ids:
|
||||
thumbnails.append({
|
||||
'id': id,
|
||||
'url': thumbnail + '%3A' + id,
|
||||
})
|
||||
|
||||
date = self._html_search_regex(r'<span[^>]+class="tweet-date"[^>]*><a[^>]+title="([^"]+)"', webpage, 'upload date', fatal=False)
|
||||
upload_date = unified_strdate(date)
|
||||
timestamp = unified_timestamp(date)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
'timestamp': timestamp,
|
||||
'uploader_id': uploader_id,
|
||||
'uploader_url': uploader_url,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'repost_count': repost_count,
|
||||
'comment_count': comment_count,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
}
|
|
@ -33,7 +33,7 @@ class NprIE(InfoExtractor):
|
|||
},
|
||||
}],
|
||||
}, {
|
||||
# mutlimedia, not media title
|
||||
# multimedia, not media title
|
||||
'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert',
|
||||
'info_dict': {
|
||||
'id': '533198237',
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
|
@ -16,17 +17,269 @@
|
|||
parse_age_limit,
|
||||
parse_duration,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class NRKBaseIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['NO']
|
||||
|
||||
_api_host = None
|
||||
|
||||
class NRKIE(NRKBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
nrk:|
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?nrk\.no/video/(?:PS\*|[^_]+_)|
|
||||
v8[-.]psapi\.nrk\.no/mediaelement/
|
||||
)
|
||||
)
|
||||
(?P<id>[^?\#&]+)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
# video
|
||||
'url': 'http://www.nrk.no/video/PS*150533',
|
||||
'md5': '706f34cdf1322577589e369e522b50ef',
|
||||
'info_dict': {
|
||||
'id': '150533',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dompap og andre fugler i Piip-Show',
|
||||
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
|
||||
'duration': 262,
|
||||
}
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://www.nrk.no/video/PS*154915',
|
||||
# MD5 is unstable
|
||||
'info_dict': {
|
||||
'id': '154915',
|
||||
'ext': 'flv',
|
||||
'title': 'Slik høres internett ut når du er blind',
|
||||
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
|
||||
'duration': 20,
|
||||
}
|
||||
}, {
|
||||
'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'nrk:clip/7707d5a3-ebe7-434a-87d5-a3ebe7a34a70',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.nrk.no/video/dompap-og-andre-fugler-i-piip-show_150533',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.nrk.no/video/humor/kommentatorboksen-reiser-til-sjos_d1fda11f-a4ad-437a-a374-0398bc84e999',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_from_playback(self, video_id):
|
||||
manifest = self._download_json(
|
||||
'http://psapi.nrk.no/playback/manifest/%s' % video_id,
|
||||
video_id, 'Downloading manifest JSON')
|
||||
|
||||
playable = manifest['playable']
|
||||
|
||||
formats = []
|
||||
for asset in playable['assets']:
|
||||
if not isinstance(asset, dict):
|
||||
continue
|
||||
if asset.get('encrypted'):
|
||||
continue
|
||||
format_url = url_or_none(asset.get('url'))
|
||||
if not format_url:
|
||||
continue
|
||||
if asset.get('format') == 'HLS' or determine_ext(format_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
data = self._download_json(
|
||||
'http://psapi.nrk.no/playback/metadata/%s' % video_id,
|
||||
video_id, 'Downloading metadata JSON')
|
||||
|
||||
preplay = data['preplay']
|
||||
titles = preplay['titles']
|
||||
title = titles['title']
|
||||
alt_title = titles.get('subtitle')
|
||||
|
||||
description = preplay.get('description')
|
||||
duration = parse_duration(playable.get('duration')) or parse_duration(data.get('duration'))
|
||||
|
||||
thumbnails = []
|
||||
for image in try_get(
|
||||
preplay, lambda x: x['poster']['images'], list) or []:
|
||||
if not isinstance(image, dict):
|
||||
continue
|
||||
image_url = url_or_none(image.get('url'))
|
||||
if not image_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': image_url,
|
||||
'width': int_or_none(image.get('pixelWidth')),
|
||||
'height': int_or_none(image.get('pixelHeight')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'alt_title': alt_title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_from_playback(video_id)
|
||||
|
||||
|
||||
class NRKTVIE(NRKBaseIE):
|
||||
IE_DESC = 'NRK TV and NRK Radio'
|
||||
_EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:tv|radio)\.nrk(?:super)?\.no/
|
||||
(?:serie(?:/[^/]+){1,2}|program)/
|
||||
(?![Ee]pisodes)%s
|
||||
(?:/\d{2}-\d{2}-\d{4})?
|
||||
(?:\#del=(?P<part_id>\d+))?
|
||||
''' % _EPISODE_RE
|
||||
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.nrk.no/program/MDDP12000117',
|
||||
'md5': '8270824df46ec629b66aeaa5796b36fb',
|
||||
'info_dict': {
|
||||
'id': 'MDDP12000117AA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Alarm Trolltunga',
|
||||
'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
|
||||
'duration': 2223,
|
||||
'age_limit': 6,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||
'md5': '9a167e54d04671eb6317a37b7bc8a280',
|
||||
'info_dict': {
|
||||
'id': 'MUHH48000314AA',
|
||||
'ext': 'mp4',
|
||||
'title': '20 spørsmål 23.05.2014',
|
||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
||||
'duration': 1741,
|
||||
'series': '20 spørsmål',
|
||||
'episode': '23.05.2014',
|
||||
},
|
||||
'skip': 'NoProgramRights',
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
||||
'info_dict': {
|
||||
'id': 'MDFP15000514CA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014',
|
||||
'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
|
||||
'duration': 4605,
|
||||
'series': 'Kunnskapskanalen',
|
||||
'episode': '24.05.2014',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# single playlist video
|
||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515-part2',
|
||||
'ext': 'flv',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Video is geo restricted'],
|
||||
'skip': 'particular part is not supported currently',
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515AH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
|
||||
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
||||
'duration': 772,
|
||||
'series': 'Tour de Ski',
|
||||
'episode': '06.01.2015',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515BH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
|
||||
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
||||
'duration': 6175,
|
||||
'series': 'Tour de Ski',
|
||||
'episode': '06.01.2015',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}],
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
|
||||
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
||||
},
|
||||
'expected_warnings': ['Video is geo restricted'],
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13',
|
||||
'info_dict': {
|
||||
'id': 'KMTE50001317AA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Anno 13:30',
|
||||
'description': 'md5:11d9613661a8dbe6f9bef54e3a4cbbfa',
|
||||
'duration': 2340,
|
||||
'series': 'Anno',
|
||||
'episode': '13:30',
|
||||
'season_number': 3,
|
||||
'episode_number': 13,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017',
|
||||
'info_dict': {
|
||||
'id': 'MUHH46000317AA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nytt på Nytt 27.01.2017',
|
||||
'description': 'md5:5358d6388fba0ea6f0b6d11c48b9eb4b',
|
||||
'duration': 1796,
|
||||
'series': 'Nytt på nytt',
|
||||
'episode': '27.01.2017',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_api_host = None
|
||||
|
||||
def _extract_from_mediaelement(self, video_id):
|
||||
api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS
|
||||
|
||||
for api_host in api_hosts:
|
||||
|
@ -195,190 +448,9 @@ def video_id_and_title(idx):
|
|||
|
||||
return self.playlist_result(entries, video_id, title, description)
|
||||
|
||||
|
||||
class NRKIE(NRKBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
nrk:|
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?nrk\.no/video/PS\*|
|
||||
v8[-.]psapi\.nrk\.no/mediaelement/
|
||||
)
|
||||
)
|
||||
(?P<id>[^?#&]+)
|
||||
'''
|
||||
_API_HOSTS = ('psapi.nrk.no', 'v8-psapi.nrk.no')
|
||||
_TESTS = [{
|
||||
# video
|
||||
'url': 'http://www.nrk.no/video/PS*150533',
|
||||
'md5': '706f34cdf1322577589e369e522b50ef',
|
||||
'info_dict': {
|
||||
'id': '150533',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dompap og andre fugler i Piip-Show',
|
||||
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
|
||||
'duration': 262,
|
||||
}
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://www.nrk.no/video/PS*154915',
|
||||
# MD5 is unstable
|
||||
'info_dict': {
|
||||
'id': '154915',
|
||||
'ext': 'flv',
|
||||
'title': 'Slik høres internett ut når du er blind',
|
||||
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
|
||||
'duration': 20,
|
||||
}
|
||||
}, {
|
||||
'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'nrk:clip/7707d5a3-ebe7-434a-87d5-a3ebe7a34a70',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class NRKTVIE(NRKBaseIE):
|
||||
IE_DESC = 'NRK TV and NRK Radio'
|
||||
_EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:tv|radio)\.nrk(?:super)?\.no/
|
||||
(?:serie(?:/[^/]+){1,2}|program)/
|
||||
(?![Ee]pisodes)%s
|
||||
(?:/\d{2}-\d{2}-\d{4})?
|
||||
(?:\#del=(?P<part_id>\d+))?
|
||||
''' % _EPISODE_RE
|
||||
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.nrk.no/program/MDDP12000117',
|
||||
'md5': '8270824df46ec629b66aeaa5796b36fb',
|
||||
'info_dict': {
|
||||
'id': 'MDDP12000117AA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Alarm Trolltunga',
|
||||
'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
|
||||
'duration': 2223,
|
||||
'age_limit': 6,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||
'md5': '9a167e54d04671eb6317a37b7bc8a280',
|
||||
'info_dict': {
|
||||
'id': 'MUHH48000314AA',
|
||||
'ext': 'mp4',
|
||||
'title': '20 spørsmål 23.05.2014',
|
||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
||||
'duration': 1741,
|
||||
'series': '20 spørsmål',
|
||||
'episode': '23.05.2014',
|
||||
},
|
||||
'skip': 'NoProgramRights',
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
||||
'info_dict': {
|
||||
'id': 'MDFP15000514CA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014',
|
||||
'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
|
||||
'duration': 4605,
|
||||
'series': 'Kunnskapskanalen',
|
||||
'episode': '24.05.2014',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# single playlist video
|
||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515-part2',
|
||||
'ext': 'flv',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Video is geo restricted'],
|
||||
'skip': 'particular part is not supported currently',
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515AH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
|
||||
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
||||
'duration': 772,
|
||||
'series': 'Tour de Ski',
|
||||
'episode': '06.01.2015',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515BH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
|
||||
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
||||
'duration': 6175,
|
||||
'series': 'Tour de Ski',
|
||||
'episode': '06.01.2015',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}],
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
|
||||
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
||||
},
|
||||
'expected_warnings': ['Video is geo restricted'],
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13',
|
||||
'info_dict': {
|
||||
'id': 'KMTE50001317AA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Anno 13:30',
|
||||
'description': 'md5:11d9613661a8dbe6f9bef54e3a4cbbfa',
|
||||
'duration': 2340,
|
||||
'series': 'Anno',
|
||||
'episode': '13:30',
|
||||
'season_number': 3,
|
||||
'episode_number': 13,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017',
|
||||
'info_dict': {
|
||||
'id': 'MUHH46000317AA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nytt på Nytt 27.01.2017',
|
||||
'description': 'md5:5358d6388fba0ea6f0b6d11c48b9eb4b',
|
||||
'duration': 1796,
|
||||
'series': 'Nytt på nytt',
|
||||
'episode': '27.01.2017',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller',
|
||||
'only_matching': True,
|
||||
}]
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_from_mediaelement(video_id)
|
||||
|
||||
|
||||
class NRKTVEpisodeIE(InfoExtractor):
|
||||
|
|
|
@ -221,3 +221,41 @@ def _real_extract(self, url):
|
|||
r'NYTD\.FlexTypes\.push\s*\(\s*({.+})\s*\)\s*;'),
|
||||
webpage, 'podcast data')
|
||||
return self._extract_podcast_from_json(podcast_data, page_id, webpage)
|
||||
|
||||
|
||||
class NYTimesCookingIE(NYTimesBaseIE):
|
||||
_VALID_URL = r'https?://cooking\.nytimes\.com/(?:guid|recip)es/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart',
|
||||
'md5': 'dab81fa2eaeb3f9ed47498bdcfcdc1d3',
|
||||
'info_dict': {
|
||||
'id': '100000004756089',
|
||||
'ext': 'mov',
|
||||
'timestamp': 1479383008,
|
||||
'uploader': 'By SHAW LASH, ADAM SAEWITZ and JAMES HERRON',
|
||||
'title': 'Cranberry Tart',
|
||||
'upload_date': '20161117',
|
||||
'description': 'If you are a fan of lemon curd or the classic French tarte au citron, you will love this cranberry version.',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://cooking.nytimes.com/guides/13-how-to-cook-a-turkey',
|
||||
'md5': '4b2e8c70530a89b8d905a2b572316eb8',
|
||||
'info_dict': {
|
||||
'id': '100000003951728',
|
||||
'ext': 'mov',
|
||||
'timestamp': 1445509539,
|
||||
'description': 'Turkey guide',
|
||||
'upload_date': '20151022',
|
||||
'title': 'Turkey',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-video-id=["\'](\d+)', webpage, 'video id')
|
||||
|
||||
return self._extract_video_from_id(video_id)
|
||||
|
|
|
@ -477,7 +477,7 @@ def _extract_webpage(self, url):
|
|||
if media_id:
|
||||
return media_id, presumptive_id, upload_date, description
|
||||
|
||||
# Fronline video embedded via flp
|
||||
# Frontline video embedded via flp
|
||||
video_id = self._search_regex(
|
||||
r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid', default=None)
|
||||
if video_id:
|
||||
|
|
201
youtube_dlc/extractor/pinterest.py
Normal file
201
youtube_dlc/extractor/pinterest.py
Normal file
|
@ -0,0 +1,201 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class PinterestBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:[^/]+\.)?pinterest\.(?:com|fr|de|ch|jp|cl|ca|it|co\.uk|nz|ru|com\.au|at|pt|co\.kr|es|com\.mx|dk|ph|th|com\.uy|co|nl|info|kr|ie|vn|com\.vn|ec|mx|in|pe|co\.at|hu|co\.in|co\.nz|id|com\.ec|com\.py|tw|be|uk|com\.bo|com\.pe)'
|
||||
|
||||
def _call_api(self, resource, video_id, options):
|
||||
return self._download_json(
|
||||
'https://www.pinterest.com/resource/%sResource/get/' % resource,
|
||||
video_id, 'Download %s JSON metadata' % resource, query={
|
||||
'data': json.dumps({'options': options})
|
||||
})['resource_response']
|
||||
|
||||
def _extract_video(self, data, extract_formats=True):
|
||||
video_id = data['id']
|
||||
|
||||
title = (data.get('title') or data.get('grid_title') or video_id).strip()
|
||||
|
||||
formats = []
|
||||
duration = None
|
||||
if extract_formats:
|
||||
for format_id, format_dict in data['videos']['video_list'].items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
format_url = url_or_none(format_dict.get('url'))
|
||||
if not format_url:
|
||||
continue
|
||||
duration = float_or_none(format_dict.get('duration'), scale=1000)
|
||||
ext = determine_ext(format_url)
|
||||
if 'hls' in format_id.lower() or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
'width': int_or_none(format_dict.get('width')),
|
||||
'height': int_or_none(format_dict.get('height')),
|
||||
'duration': duration,
|
||||
})
|
||||
self._sort_formats(
|
||||
formats, field_preference=('height', 'width', 'tbr', 'format_id'))
|
||||
|
||||
description = data.get('description') or data.get('description_html') or data.get('seo_description')
|
||||
timestamp = unified_timestamp(data.get('created_at'))
|
||||
|
||||
def _u(field):
|
||||
return try_get(data, lambda x: x['closeup_attribution'][field], compat_str)
|
||||
|
||||
uploader = _u('full_name')
|
||||
uploader_id = _u('id')
|
||||
|
||||
repost_count = int_or_none(data.get('repin_count'))
|
||||
comment_count = int_or_none(data.get('comment_count'))
|
||||
categories = try_get(data, lambda x: x['pin_join']['visual_annotation'], list)
|
||||
tags = data.get('hashtags')
|
||||
|
||||
thumbnails = []
|
||||
images = data.get('images')
|
||||
if isinstance(images, dict):
|
||||
for thumbnail_id, thumbnail in images.items():
|
||||
if not isinstance(thumbnail, dict):
|
||||
continue
|
||||
thumbnail_url = url_or_none(thumbnail.get('url'))
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
'width': int_or_none(thumbnail.get('width')),
|
||||
'height': int_or_none(thumbnail.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'thumbnails': thumbnails,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'repost_count': repost_count,
|
||||
'comment_count': comment_count,
|
||||
'categories': categories,
|
||||
'tags': tags,
|
||||
'formats': formats,
|
||||
'extractor_key': PinterestIE.ie_key(),
|
||||
}
|
||||
|
||||
|
||||
class PinterestIE(PinterestBaseIE):
|
||||
_VALID_URL = r'%s/pin/(?P<id>\d+)' % PinterestBaseIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pinterest.com/pin/664281013778109217/',
|
||||
'md5': '6550c2af85d6d9f3fe3b88954d1577fc',
|
||||
'info_dict': {
|
||||
'id': '664281013778109217',
|
||||
'ext': 'mp4',
|
||||
'title': 'Origami',
|
||||
'description': 'md5:b9d90ddf7848e897882de9e73344f7dd',
|
||||
'duration': 57.7,
|
||||
'timestamp': 1593073622,
|
||||
'upload_date': '20200625',
|
||||
'uploader': 'Love origami -I am Dafei',
|
||||
'uploader_id': '586523688879454212',
|
||||
'repost_count': 50,
|
||||
'comment_count': 0,
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://co.pinterest.com/pin/824721750502199491/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._call_api(
|
||||
'Pin', video_id, {
|
||||
'field_set_key': 'unauth_react_main_pin',
|
||||
'id': video_id,
|
||||
})['data']
|
||||
return self._extract_video(data)
|
||||
|
||||
|
||||
class PinterestCollectionIE(PinterestBaseIE):
|
||||
_VALID_URL = r'%s/(?P<username>[^/]+)/(?P<id>[^/?#&]+)' % PinterestBaseIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pinterest.ca/mashal0407/cool-diys/',
|
||||
'info_dict': {
|
||||
'id': '585890301462791043',
|
||||
'title': 'cool diys',
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}, {
|
||||
'url': 'https://www.pinterest.ca/fudohub/videos/',
|
||||
'info_dict': {
|
||||
'id': '682858430939307450',
|
||||
'title': 'VIDEOS',
|
||||
},
|
||||
'playlist_mincount': 365,
|
||||
'skip': 'Test with extract_formats=False',
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if PinterestIE.suitable(url) else super(
|
||||
PinterestCollectionIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
username, slug = re.match(self._VALID_URL, url).groups()
|
||||
board = self._call_api(
|
||||
'Board', slug, {
|
||||
'slug': slug,
|
||||
'username': username
|
||||
})['data']
|
||||
board_id = board['id']
|
||||
options = {
|
||||
'board_id': board_id,
|
||||
'page_size': 250,
|
||||
}
|
||||
bookmark = None
|
||||
entries = []
|
||||
while True:
|
||||
if bookmark:
|
||||
options['bookmarks'] = [bookmark]
|
||||
board_feed = self._call_api('BoardFeed', board_id, options)
|
||||
for item in (board_feed.get('data') or []):
|
||||
if not isinstance(item, dict) or item.get('type') != 'pin':
|
||||
continue
|
||||
video_id = item.get('id')
|
||||
if video_id:
|
||||
# Some pins may not be available anonymously via pin URL
|
||||
# video = self._extract_video(item, extract_formats=False)
|
||||
# video.update({
|
||||
# '_type': 'url_transparent',
|
||||
# 'url': 'https://www.pinterest.com/pin/%s/' % video_id,
|
||||
# })
|
||||
# entries.append(video)
|
||||
entries.append(self._extract_video(item))
|
||||
bookmark = board_feed.get('bookmark')
|
||||
if not bookmark:
|
||||
break
|
||||
return self.playlist_result(
|
||||
entries, playlist_id=board_id, playlist_title=board.get('name'))
|
|
@ -16,6 +16,7 @@
|
|||
GeoRestrictedError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
remove_start,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
|
@ -30,7 +31,6 @@ class RaiBaseIE(InfoExtractor):
|
|||
_UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
|
||||
_GEO_COUNTRIES = ['IT']
|
||||
_GEO_BYPASS = False
|
||||
_BASE_URL = 'https://www.raiplay.it'
|
||||
|
||||
def _extract_relinker_info(self, relinker_url, video_id):
|
||||
if not re.match(r'https?://', relinker_url):
|
||||
|
@ -68,7 +68,7 @@ def _extract_relinker_info(self, relinker_url, video_id):
|
|||
|
||||
# This does not imply geo restriction (e.g.
|
||||
# http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html)
|
||||
if media_url == 'http://download.rai.it/video_no_available.mp4':
|
||||
if '/video_no_available.mp4' in media_url:
|
||||
continue
|
||||
|
||||
ext = determine_ext(media_url)
|
||||
|
@ -123,7 +123,7 @@ def _extract_subtitles(url, subtitle_url):
|
|||
|
||||
|
||||
class RaiPlayIE(RaiBaseIE):
|
||||
_VALID_URL = r'(?P<url>(?P<base>https?://(?:www\.)?raiplay\.it/.+?-)(?P<id>%s)(?P<ext>\.(?:html|json)))' % RaiBaseIE._UUID_RE
|
||||
_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s))\.(?:html|json)' % RaiBaseIE._UUID_RE
|
||||
_TESTS = [{
|
||||
'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
|
||||
'md5': '8970abf8caf8aef4696e7b1f2adfc696',
|
||||
|
@ -131,11 +131,13 @@ class RaiPlayIE(RaiBaseIE):
|
|||
'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
|
||||
'ext': 'mp4',
|
||||
'title': 'Report del 07/04/2014',
|
||||
'alt_title': 'St 2013/14 - Espresso nel caffè - 07/04/2014 ',
|
||||
'alt_title': 'St 2013/14 - Espresso nel caffè - 07/04/2014',
|
||||
'description': 'md5:d730c168a58f4bb35600fc2f881ec04e',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Rai Gulp',
|
||||
'duration': 6160,
|
||||
'series': 'Report',
|
||||
'season': '2013/14',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
@ -146,11 +148,10 @@ class RaiPlayIE(RaiBaseIE):
|
|||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
url, base, video_id, ext = mobj.group('url', 'base', 'id', 'ext')
|
||||
base, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
media = self._download_json(
|
||||
'%s%s.json' % (base, video_id), video_id, 'Downloading video JSON')
|
||||
base + '.json', video_id, 'Downloading video JSON')
|
||||
|
||||
title = media['name']
|
||||
video = media['video']
|
||||
|
@ -159,34 +160,39 @@ def _real_extract(self, url):
|
|||
self._sort_formats(relinker_info['formats'])
|
||||
|
||||
thumbnails = []
|
||||
if 'images' in media:
|
||||
for _, value in media.get('images').items():
|
||||
if value:
|
||||
thumbnails.append({
|
||||
'url': urljoin(RaiBaseIE._BASE_URL, value.replace('[RESOLUTION]', '600x400'))
|
||||
})
|
||||
for _, value in media.get('images', {}).items():
|
||||
if value:
|
||||
thumbnails.append({
|
||||
'url': urljoin(url, value),
|
||||
})
|
||||
|
||||
timestamp = unified_timestamp(try_get(
|
||||
media, lambda x: x['availabilities'][0]['start'], compat_str))
|
||||
date_published = media.get('date_published')
|
||||
time_published = media.get('time_published')
|
||||
if date_published and time_published:
|
||||
date_published += ' ' + time_published
|
||||
|
||||
subtitles = self._extract_subtitles(url, video.get('subtitles'))
|
||||
|
||||
program_info = media.get('program_info') or {}
|
||||
season = media.get('season')
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'id': remove_start(media.get('id'), 'ContentItem-') or video_id,
|
||||
'display_id': video_id,
|
||||
'title': self._live_title(title) if relinker_info.get(
|
||||
'is_live') else title,
|
||||
'alt_title': media.get('subtitle'),
|
||||
'alt_title': strip_or_none(media.get('subtitle')),
|
||||
'description': media.get('description'),
|
||||
'uploader': strip_or_none(media.get('channel')),
|
||||
'creator': strip_or_none(media.get('editor')),
|
||||
'creator': strip_or_none(media.get('editor') or None),
|
||||
'duration': parse_duration(video.get('duration')),
|
||||
'timestamp': timestamp,
|
||||
'timestamp': unified_timestamp(date_published),
|
||||
'thumbnails': thumbnails,
|
||||
'series': try_get(
|
||||
media, lambda x: x['isPartOf']['name'], compat_str),
|
||||
'season_number': int_or_none(try_get(
|
||||
media, lambda x: x['isPartOf']['numeroStagioni'])),
|
||||
'season': media.get('stagione') or None,
|
||||
'series': program_info.get('name'),
|
||||
'season_number': int_or_none(season),
|
||||
'season': season if (season and not season.isdigit()) else None,
|
||||
'episode': media.get('episode_title'),
|
||||
'episode_number': int_or_none(media.get('episode')),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
@ -194,9 +200,9 @@ def _real_extract(self, url):
|
|||
return info
|
||||
|
||||
|
||||
class RaiPlayLiveIE(RaiBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
class RaiPlayLiveIE(RaiPlayIE):
|
||||
_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+))'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.raiplay.it/dirette/rainews24',
|
||||
'info_dict': {
|
||||
'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
|
||||
|
@ -211,40 +217,11 @@ class RaiPlayLiveIE(RaiBaseIE):
|
|||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
media = self._download_json(
|
||||
'%s.json' % urljoin(RaiBaseIE._BASE_URL, 'dirette/' + display_id),
|
||||
display_id, 'Downloading channel JSON')
|
||||
|
||||
title = media['name']
|
||||
video = media['video']
|
||||
video_id = media['id'].replace('ContentItem-', '')
|
||||
|
||||
relinker_info = self._extract_relinker_info(video['content_url'], video_id)
|
||||
self._sort_formats(relinker_info['formats'])
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': self._live_title(title) if relinker_info.get(
|
||||
'is_live') else title,
|
||||
'alt_title': media.get('subtitle'),
|
||||
'description': media.get('description'),
|
||||
'uploader': strip_or_none(media.get('channel')),
|
||||
'creator': strip_or_none(media.get('editor')),
|
||||
'duration': parse_duration(video.get('duration')),
|
||||
}
|
||||
|
||||
info.update(relinker_info)
|
||||
return info
|
||||
}]
|
||||
|
||||
|
||||
class RaiPlayPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+))'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/',
|
||||
'info_dict': {
|
||||
|
@ -256,29 +233,34 @@ class RaiPlayPlaylistIE(InfoExtractor):
|
|||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
base, playlist_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
media = self._download_json(
|
||||
'%s.json' % urljoin(RaiBaseIE._BASE_URL, 'programmi/' + playlist_id),
|
||||
playlist_id, 'Downloading program JSON')
|
||||
|
||||
title = media['name']
|
||||
description = media['program_info']['description']
|
||||
|
||||
content_sets = [s['id'] for b in media['blocks'] for s in b['sets']]
|
||||
program = self._download_json(
|
||||
base + '.json', playlist_id, 'Downloading program JSON')
|
||||
|
||||
entries = []
|
||||
for cs in content_sets:
|
||||
medias = self._download_json(
|
||||
'%s/%s.json' % (urljoin(RaiBaseIE._BASE_URL, 'programmi/' + playlist_id), cs),
|
||||
cs, 'Downloading content set JSON')
|
||||
for m in medias['items']:
|
||||
video_url = urljoin(url, m['path_id'])
|
||||
entries.append(self.url_result(
|
||||
video_url, ie=RaiPlayIE.ie_key(),
|
||||
video_id=RaiPlayIE._match_id(video_url)))
|
||||
for b in (program.get('blocks') or []):
|
||||
for s in (b.get('sets') or []):
|
||||
s_id = s.get('id')
|
||||
if not s_id:
|
||||
continue
|
||||
medias = self._download_json(
|
||||
'%s/%s.json' % (base, s_id), s_id,
|
||||
'Downloading content set JSON', fatal=False)
|
||||
if not medias:
|
||||
continue
|
||||
for m in (medias.get('items') or []):
|
||||
path_id = m.get('path_id')
|
||||
if not path_id:
|
||||
continue
|
||||
video_url = urljoin(url, path_id)
|
||||
entries.append(self.url_result(
|
||||
video_url, ie=RaiPlayIE.ie_key(),
|
||||
video_id=RaiPlayIE._match_id(video_url)))
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, program.get('name'),
|
||||
try_get(program, lambda x: x['program_info']['description']))
|
||||
|
||||
|
||||
class RaiIE(RaiBaseIE):
|
||||
|
@ -294,7 +276,8 @@ class RaiIE(RaiBaseIE):
|
|||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1758,
|
||||
'upload_date': '20140612',
|
||||
}
|
||||
},
|
||||
'skip': 'This content is available only in Italy',
|
||||
}, {
|
||||
# with ContentItem in many metas
|
||||
'url': 'http://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html',
|
||||
|
@ -440,7 +423,7 @@ def _real_extract(self, url):
|
|||
except ExtractorError:
|
||||
pass
|
||||
|
||||
relinker_url = self._search_regex(
|
||||
relinker_url = self._proto_relative_url(self._search_regex(
|
||||
r'''(?x)
|
||||
(?:
|
||||
var\s+videoURL|
|
||||
|
@ -452,7 +435,7 @@ def _real_extract(self, url):
|
|||
//mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\?
|
||||
(?:(?!\1).)*\bcont=(?:(?!\1).)+)\1
|
||||
''',
|
||||
webpage, 'relinker URL', group='url')
|
||||
webpage, 'relinker URL', group='url'))
|
||||
|
||||
relinker_info = self._extract_relinker_info(
|
||||
urljoin(url, relinker_url), video_id)
|
||||
|
|
413
youtube_dlc/extractor/rcs.py
Normal file
413
youtube_dlc/extractor/rcs.py
Normal file
|
@ -0,0 +1,413 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
base_url,
|
||||
url_basename,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class RCSBaseIE(InfoExtractor):
|
||||
_ALL_REPLACE = {
|
||||
'media2vam.corriere.it.edgesuite.net':
|
||||
'media2vam-corriere-it.akamaized.net',
|
||||
'media.youreporter.it.edgesuite.net':
|
||||
'media-youreporter-it.akamaized.net',
|
||||
'corrierepmd.corriere.it.edgesuite.net':
|
||||
'corrierepmd-corriere-it.akamaized.net',
|
||||
'media2vam-corriere-it.akamaized.net/fcs.quotidiani/vr/videos/':
|
||||
'video.corriere.it/vr360/videos/',
|
||||
'.net//': '.net/',
|
||||
}
|
||||
_MP4_REPLACE = {
|
||||
'media2vam.corbologna.corriere.it.edgesuite.net':
|
||||
'media2vam-bologna-corriere-it.akamaized.net',
|
||||
'media2vam.corfiorentino.corriere.it.edgesuite.net':
|
||||
'media2vam-fiorentino-corriere-it.akamaized.net',
|
||||
'media2vam.cormezzogiorno.corriere.it.edgesuite.net':
|
||||
'media2vam-mezzogiorno-corriere-it.akamaized.net',
|
||||
'media2vam.corveneto.corriere.it.edgesuite.net':
|
||||
'media2vam-veneto-corriere-it.akamaized.net',
|
||||
'media2.oggi.it.edgesuite.net':
|
||||
'media2-oggi-it.akamaized.net',
|
||||
'media2.quimamme.it.edgesuite.net':
|
||||
'media2-quimamme-it.akamaized.net',
|
||||
'media2.amica.it.edgesuite.net':
|
||||
'media2-amica-it.akamaized.net',
|
||||
'media2.living.corriere.it.edgesuite.net':
|
||||
'media2-living-corriere-it.akamaized.net',
|
||||
'media2.style.corriere.it.edgesuite.net':
|
||||
'media2-style-corriere-it.akamaized.net',
|
||||
'media2.iodonna.it.edgesuite.net':
|
||||
'media2-iodonna-it.akamaized.net',
|
||||
'media2.leitv.it.edgesuite.net':
|
||||
'media2-leitv-it.akamaized.net',
|
||||
}
|
||||
_MIGRATION_MAP = {
|
||||
'videoamica-vh.akamaihd': 'amica',
|
||||
'media2-amica-it.akamaized': 'amica',
|
||||
'corrierevam-vh.akamaihd': 'corriere',
|
||||
'media2vam-corriere-it.akamaized': 'corriere',
|
||||
'cormezzogiorno-vh.akamaihd': 'corrieredelmezzogiorno',
|
||||
'media2vam-mezzogiorno-corriere-it.akamaized': 'corrieredelmezzogiorno',
|
||||
'corveneto-vh.akamaihd': 'corrieredelveneto',
|
||||
'media2vam-veneto-corriere-it.akamaized': 'corrieredelveneto',
|
||||
'corbologna-vh.akamaihd': 'corrieredibologna',
|
||||
'media2vam-bologna-corriere-it.akamaized': 'corrieredibologna',
|
||||
'corfiorentino-vh.akamaihd': 'corrierefiorentino',
|
||||
'media2vam-fiorentino-corriere-it.akamaized': 'corrierefiorentino',
|
||||
'corinnovazione-vh.akamaihd': 'corriereinnovazione',
|
||||
'media2-gazzanet-gazzetta-it.akamaized': 'gazzanet',
|
||||
'videogazzanet-vh.akamaihd': 'gazzanet',
|
||||
'videogazzaworld-vh.akamaihd': 'gazzaworld',
|
||||
'gazzettavam-vh.akamaihd': 'gazzetta',
|
||||
'media2vam-gazzetta-it.akamaized': 'gazzetta',
|
||||
'videoiodonna-vh.akamaihd': 'iodonna',
|
||||
'media2-leitv-it.akamaized': 'leitv',
|
||||
'videoleitv-vh.akamaihd': 'leitv',
|
||||
'videoliving-vh.akamaihd': 'living',
|
||||
'media2-living-corriere-it.akamaized': 'living',
|
||||
'media2-oggi-it.akamaized': 'oggi',
|
||||
'videooggi-vh.akamaihd': 'oggi',
|
||||
'media2-quimamme-it.akamaized': 'quimamme',
|
||||
'quimamme-vh.akamaihd': 'quimamme',
|
||||
'videorunning-vh.akamaihd': 'running',
|
||||
'media2-style-corriere-it.akamaized': 'style',
|
||||
'style-vh.akamaihd': 'style',
|
||||
'videostyle-vh.akamaihd': 'style',
|
||||
'media2-stylepiccoli-it.akamaized': 'stylepiccoli',
|
||||
'stylepiccoli-vh.akamaihd': 'stylepiccoli',
|
||||
'doveviaggi-vh.akamaihd': 'viaggi',
|
||||
'media2-doveviaggi-it.akamaized': 'viaggi',
|
||||
'media2-vivimilano-corriere-it.akamaized': 'vivimilano',
|
||||
'vivimilano-vh.akamaihd': 'vivimilano',
|
||||
'media2-youreporter-it.akamaized': 'youreporter'
|
||||
}
|
||||
_MIGRATION_MEDIA = {
|
||||
'advrcs-vh.akamaihd': '',
|
||||
'corriere-f.akamaihd': '',
|
||||
'corrierepmd-corriere-it.akamaized': '',
|
||||
'corrprotetto-vh.akamaihd': '',
|
||||
'gazzetta-f.akamaihd': '',
|
||||
'gazzettapmd-gazzetta-it.akamaized': '',
|
||||
'gazzprotetto-vh.akamaihd': '',
|
||||
'periodici-f.akamaihd': '',
|
||||
'periodicisecure-vh.akamaihd': '',
|
||||
'videocoracademy-vh.akamaihd': ''
|
||||
}
|
||||
|
||||
def _get_video_src(self, video):
|
||||
mediaFiles = video.get('mediaProfile').get('mediaFile')
|
||||
src = {}
|
||||
# audio
|
||||
if video.get('mediaType') == 'AUDIO':
|
||||
for aud in mediaFiles:
|
||||
# todo: check
|
||||
src['mp3'] = aud.get('value')
|
||||
# video
|
||||
else:
|
||||
for vid in mediaFiles:
|
||||
if vid.get('mimeType') == 'application/vnd.apple.mpegurl':
|
||||
src['m3u8'] = vid.get('value')
|
||||
if vid.get('mimeType') == 'video/mp4':
|
||||
src['mp4'] = vid.get('value')
|
||||
|
||||
# replace host
|
||||
for t in src:
|
||||
for s, r in self._ALL_REPLACE.items():
|
||||
src[t] = src[t].replace(s, r)
|
||||
for s, r in self._MP4_REPLACE.items():
|
||||
src[t] = src[t].replace(s, r)
|
||||
|
||||
# switch cdn
|
||||
if 'mp4' in src and 'm3u8' in src:
|
||||
if ('-lh.akamaihd' not in src.get('m3u8')
|
||||
and 'akamai' in src.get('mp4')):
|
||||
if 'm3u8' in src:
|
||||
matches = re.search(r'(?:https*:)?\/\/(?P<host>.*)\.net\/i(?P<path>.*)$', src.get('m3u8'))
|
||||
src['m3u8'] = 'https://vod.rcsobjects.it/hls/%s%s' % (
|
||||
self._MIGRATION_MAP[matches.group('host')],
|
||||
matches.group('path').replace(
|
||||
'///', '/').replace(
|
||||
'//', '/').replace(
|
||||
'.csmil', '.urlset'
|
||||
)
|
||||
)
|
||||
if 'mp4' in src:
|
||||
matches = re.search(r'(?:https*:)?\/\/(?P<host>.*)\.net\/i(?P<path>.*)$', src.get('mp4'))
|
||||
if matches:
|
||||
if matches.group('host') in self._MIGRATION_MEDIA:
|
||||
vh_stream = 'https://media2.corriereobjects.it'
|
||||
if src.get('mp4').find('fcs.quotidiani_!'):
|
||||
vh_stream = 'https://media2-it.corriereobjects.it'
|
||||
src['mp4'] = '%s%s' % (
|
||||
vh_stream,
|
||||
matches.group('path').replace(
|
||||
'///', '/').replace(
|
||||
'//', '/').replace(
|
||||
'/fcs.quotidiani/mediacenter', '').replace(
|
||||
'/fcs.quotidiani_!/mediacenter', '').replace(
|
||||
'corriere/content/mediacenter/', '').replace(
|
||||
'gazzetta/content/mediacenter/', '')
|
||||
)
|
||||
else:
|
||||
src['mp4'] = 'https://vod.rcsobjects.it/%s%s' % (
|
||||
self._MIGRATION_MAP[matches.group('host')],
|
||||
matches.group('path').replace('///', '/').replace('//', '/')
|
||||
)
|
||||
|
||||
if 'mp3' in src:
|
||||
src['mp3'] = src.get('mp3').replace(
|
||||
'media2vam-corriere-it.akamaized.net',
|
||||
'vod.rcsobjects.it/corriere')
|
||||
if 'mp4' in src:
|
||||
if src.get('mp4').find('fcs.quotidiani_!'):
|
||||
src['mp4'] = src.get('mp4').replace('vod.rcsobjects', 'vod-it.rcsobjects')
|
||||
if 'm3u8' in src:
|
||||
if src.get('m3u8').find('fcs.quotidiani_!'):
|
||||
src['m3u8'] = src.get('m3u8').replace('vod.rcsobjects', 'vod-it.rcsobjects')
|
||||
|
||||
if 'geoblocking' in video.get('mediaProfile'):
|
||||
if 'm3u8' in src:
|
||||
src['m3u8'] = src.get('m3u8').replace('vod.rcsobjects', 'vod-it.rcsobjects')
|
||||
if 'mp4' in src:
|
||||
src['mp4'] = src.get('mp4').replace('vod.rcsobjects', 'vod-it.rcsobjects')
|
||||
if 'm3u8' in src:
|
||||
if src.get('m3u8').find('csmil') and src.get('m3u8').find('vod'):
|
||||
src['m3u8'] = src.get('m3u8').replace('.csmil', '.urlset')
|
||||
|
||||
return src
|
||||
|
||||
def _create_formats(self, urls, video_id):
|
||||
formats = []
|
||||
formats = self._extract_m3u8_formats(
|
||||
urls.get('m3u8'), video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
|
||||
if not formats:
|
||||
formats.append({
|
||||
'format_id': 'http-mp4',
|
||||
'url': urls.get('mp4')
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.search(self._VALID_URL, url)
|
||||
|
||||
if 'cdn' not in mobj.groupdict():
|
||||
raise ExtractorError('CDN not found in url: %s' % url)
|
||||
|
||||
# for leitv/youreporter/viaggi don't use the embed page
|
||||
if ((mobj.group('cdn') not in ['leitv.it', 'youreporter.it'])
|
||||
and (mobj.group('vid') == 'video')):
|
||||
url = 'https://video.%s/video-embed/%s' % (mobj.group('cdn'), video_id)
|
||||
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
video_data = None
|
||||
# look for json video data url
|
||||
json = self._search_regex(
|
||||
r'''(?x)var url\s*=\s*["']((?:https?:)?
|
||||
//video\.rcs\.it
|
||||
/fragment-includes/video-includes/.+?\.json)["'];''',
|
||||
page, video_id, default=None)
|
||||
if json:
|
||||
if json.startswith('//'):
|
||||
json = 'https:%s' % json
|
||||
video_data = self._download_json(json, video_id)
|
||||
|
||||
# if json url not found, look for json video data directly in the page
|
||||
else:
|
||||
json = self._search_regex(
|
||||
r'[\s;]video\s*=\s*({[\s\S]+?})(?:;|,playlist=)',
|
||||
page, video_id, default=None)
|
||||
if json:
|
||||
video_data = self._parse_json(
|
||||
json, video_id, transform_source=js_to_json)
|
||||
else:
|
||||
# if no video data found try search for iframes
|
||||
emb = RCSEmbedsIE._extract_url(page)
|
||||
if emb:
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': emb,
|
||||
'ie_key': RCSEmbedsIE.ie_key()
|
||||
}
|
||||
|
||||
if not video_data:
|
||||
raise ExtractorError('Video data not found in the page')
|
||||
|
||||
formats = self._create_formats(
|
||||
self._get_video_src(video_data), video_id)
|
||||
|
||||
description = (video_data.get('description')
|
||||
or clean_html(video_data.get('htmlDescription')))
|
||||
uploader = video_data.get('provider') or mobj.group('cdn')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_data.get('title'),
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
'formats': formats
|
||||
}
|
||||
|
||||
|
||||
class RCSEmbedsIE(RCSBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?P<vid>video)\.
|
||||
(?P<cdn>
|
||||
(?:
|
||||
rcs|
|
||||
(?:corriere\w+\.)?corriere|
|
||||
(?:gazzanet\.)?gazzetta
|
||||
)\.it)
|
||||
/video-embed/(?P<id>[^/=&\?]+?)(?:$|\?)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://video.rcs.it/video-embed/iodonna-0001585037',
|
||||
'md5': '623ecc8ffe7299b2d0c1046d8331a9df',
|
||||
'info_dict': {
|
||||
'id': 'iodonna-0001585037',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sky Arte racconta Madonna nella serie "Artist to icon"',
|
||||
'description': 'md5:65b09633df9ffee57f48b39e34c9e067',
|
||||
'uploader': 'rcs.it',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://video.gazzanet.gazzetta.it/video-embed/gazzanet-mo05-0000260789',
|
||||
'md5': 'a043e3fecbe4d9ed7fc5d888652a5440',
|
||||
'info_dict': {
|
||||
'id': 'gazzanet-mo05-0000260789',
|
||||
'ext': 'mp4',
|
||||
'title': 'Valentino Rossi e papà Graziano si divertono col drifting',
|
||||
'description': 'md5:a8bf90d6adafd9815f70fc74c0fc370a',
|
||||
'uploader': 'rcd',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://video.corriere.it/video-embed/b727632a-f9d0-11ea-91b0-38d50a849abb?player',
|
||||
'match_only': True
|
||||
}, {
|
||||
'url': 'https://video.gazzetta.it/video-embed/49612410-00ca-11eb-bcd8-30d4253e0140',
|
||||
'match_only': True
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _sanitize_urls(urls):
|
||||
# add protocol if missing
|
||||
for i, e in enumerate(urls):
|
||||
if e.startswith('//'):
|
||||
urls[i] = 'https:%s' % e
|
||||
# clean iframes urls
|
||||
for i, e in enumerate(urls):
|
||||
urls[i] = urljoin(base_url(e), url_basename(e))
|
||||
return urls
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
entries = [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(r'''(?x)
|
||||
(?:
|
||||
data-frame-src=|
|
||||
<iframe[^\n]+src=
|
||||
)
|
||||
(["'])
|
||||
(?P<url>(?:https?:)?//video\.
|
||||
(?:
|
||||
rcs|
|
||||
(?:corriere\w+\.)?corriere|
|
||||
(?:gazzanet\.)?gazzetta
|
||||
)
|
||||
\.it/video-embed/.+?)
|
||||
\1''', webpage)]
|
||||
return RCSEmbedsIE._sanitize_urls(entries)
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
urls = RCSEmbedsIE._extract_urls(webpage)
|
||||
return urls[0] if urls else None
|
||||
|
||||
|
||||
class RCSIE(RCSBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://(?P<vid>video|viaggi)\.
|
||||
(?P<cdn>
|
||||
(?:
|
||||
corrieredelmezzogiorno\.
|
||||
|corrieredelveneto\.
|
||||
|corrieredibologna\.
|
||||
|corrierefiorentino\.
|
||||
)?corriere\.it
|
||||
|(?:gazzanet\.)?gazzetta\.it)
|
||||
/(?!video-embed/).+?/(?P<id>[^/\?]+)(?=\?|/$|$)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://video.corriere.it/sport/formula-1/vettel-guida-ferrari-sf90-mugello-suo-fianco-c-elecrerc-bendato-video-esilarante/b727632a-f9d0-11ea-91b0-38d50a849abb',
|
||||
'md5': '0f4ededc202b0f00b6e509d831e2dcda',
|
||||
'info_dict': {
|
||||
'id': 'b727632a-f9d0-11ea-91b0-38d50a849abb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vettel guida la Ferrari SF90 al Mugello e al suo fianco c\'è Leclerc (bendato): il video è esilarante',
|
||||
'description': 'md5:93b51c9161ac8a64fb2f997b054d0152',
|
||||
'uploader': 'Corriere Tv',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://viaggi.corriere.it/video/norvegia-il-nuovo-ponte-spettacolare-sopra-la-cascata-di-voringsfossen/',
|
||||
'md5': 'da378e4918d2afbf7d61c35abb948d4c',
|
||||
'info_dict': {
|
||||
'id': '5b7cd134-e2c1-11ea-89b3-b56dd0df2aa2',
|
||||
'ext': 'mp4',
|
||||
'title': 'La nuova spettacolare attrazione in Norvegia: il ponte sopra Vøringsfossen',
|
||||
'description': 'md5:18b35a291f6746c0c8dacd16e5f5f4f8',
|
||||
'uploader': 'DOVE Viaggi',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://video.gazzetta.it/video-motogp-catalogna-cadute-dovizioso-vale-rossi/49612410-00ca-11eb-bcd8-30d4253e0140?vclk=Videobar',
|
||||
'md5': 'eedc1b5defd18e67383afef51ff7bdf9',
|
||||
'info_dict': {
|
||||
'id': '49612410-00ca-11eb-bcd8-30d4253e0140',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dovizioso, il contatto con Zarco e la caduta. E anche Vale finisce a terra',
|
||||
'description': 'md5:8c6e905dc3b9413218beca11ebd69778',
|
||||
'uploader': 'AMorici',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://video.corriere.it/video-360/metro-copenaghen-tutta-italiana/a248a7f0-e2db-11e9-9830-af2de6b1f945',
|
||||
'match_only': True
|
||||
}]
|
||||
|
||||
|
||||
class RCSVariousIE(RCSBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://www\.
|
||||
(?P<cdn>
|
||||
leitv\.it|
|
||||
youreporter\.it
|
||||
)/(?:video/)?(?P<id>[^/]+?)(?:$|\?|/)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.leitv.it/video/marmellata-di-ciliegie-fatta-in-casa/',
|
||||
'md5': '618aaabac32152199c1af86784d4d554',
|
||||
'info_dict': {
|
||||
'id': 'marmellata-di-ciliegie-fatta-in-casa',
|
||||
'ext': 'mp4',
|
||||
'title': 'Marmellata di ciliegie fatta in casa',
|
||||
'description': 'md5:89133864d6aad456dbcf6e7a29f86263',
|
||||
'uploader': 'leitv.it',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.youreporter.it/fiume-sesia-3-ottobre-2020/',
|
||||
'md5': '8dccd436b47a830bab5b4a88232f391a',
|
||||
'info_dict': {
|
||||
'id': 'fiume-sesia-3-ottobre-2020',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fiume Sesia 3 ottobre 2020',
|
||||
'description': 'md5:0070eef1cc884d13c970a4125063de55',
|
||||
'uploader': 'youreporter.it',
|
||||
}
|
||||
}]
|
67
youtube_dlc/extractor/rumble.py
Normal file
67
youtube_dlc/extractor/rumble.py
Normal file
|
@ -0,0 +1,67 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class RumbleEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?rumble\.com/embed/(?:[0-9a-z]+\.)?(?P<id>[0-9a-z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://rumble.com/embed/v5pv5f',
|
||||
'md5': '36a18a049856720189f30977ccbb2c34',
|
||||
'info_dict': {
|
||||
'id': 'v5pv5f',
|
||||
'ext': 'mp4',
|
||||
'title': 'WMAR 2 News Latest Headlines | October 20, 6pm',
|
||||
'timestamp': 1571611968,
|
||||
'upload_date': '20191020',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://rumble.com/embed/ufe9n.v5pv5f',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video = self._download_json(
|
||||
'https://rumble.com/embedJS/', video_id,
|
||||
query={'request': 'video', 'v': video_id})
|
||||
title = video['title']
|
||||
|
||||
formats = []
|
||||
for height, ua in (video.get('ua') or {}).items():
|
||||
for i in range(2):
|
||||
f_url = try_get(ua, lambda x: x[i], compat_str)
|
||||
if f_url:
|
||||
ext = determine_ext(f_url)
|
||||
f = {
|
||||
'ext': ext,
|
||||
'format_id': '%s-%sp' % (ext, height),
|
||||
'height': int_or_none(height),
|
||||
'url': f_url,
|
||||
}
|
||||
bitrate = try_get(ua, lambda x: x[i + 2]['bitrate'])
|
||||
if bitrate:
|
||||
f['tbr'] = int_or_none(bitrate)
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
author = video.get('author') or {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': video.get('i'),
|
||||
'timestamp': parse_iso8601(video.get('pubDate')),
|
||||
'channel': author.get('name'),
|
||||
'channel_url': author.get('url'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
}
|
|
@ -1,9 +1,15 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
urlencode_postdata,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class ServusIE(InfoExtractor):
|
||||
|
@ -12,20 +18,29 @@ class ServusIE(InfoExtractor):
|
|||
(?:www\.)?
|
||||
(?:
|
||||
servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)|
|
||||
servustv\.com/videos
|
||||
(?:servustv|pm-wissen)\.com/videos
|
||||
)
|
||||
/(?P<id>[aA]{2}-\w+|\d+-\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
# new URL schema
|
||||
'url': 'https://www.servustv.com/videos/aa-1t6vbu5pw1w12/',
|
||||
'md5': '3e1dd16775aa8d5cbef23628cfffc1f4',
|
||||
'md5': '60474d4c21f3eb148838f215c37f02b9',
|
||||
'info_dict': {
|
||||
'id': 'AA-1T6VBU5PW1W12',
|
||||
'ext': 'mp4',
|
||||
'title': 'Die Grünen aus Sicht des Volkes',
|
||||
'alt_title': 'Talk im Hangar-7 Voxpops Gruene',
|
||||
'description': 'md5:1247204d85783afe3682644398ff2ec4',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 62.442,
|
||||
'timestamp': 1605193976,
|
||||
'upload_date': '20201112',
|
||||
'series': 'Talk im Hangar-7',
|
||||
'season': 'Season 9',
|
||||
'season_number': 9,
|
||||
'episode': 'Episode 31 - September 14',
|
||||
'episode_number': 31,
|
||||
}
|
||||
}, {
|
||||
# old URL schema
|
||||
|
@ -40,30 +55,94 @@ class ServusIE(InfoExtractor):
|
|||
}, {
|
||||
'url': 'https://www.servus.com/tv/videos/1380889096408-1235196658/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pm-wissen.com/videos/aa-24mus4g2w2112/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url).upper()
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._search_regex(
|
||||
(r'videoLabel\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||
r'<h\d+[^>]+\bclass=["\']heading--(?:one|two)["\'][^>]*>(?P<title>[^<]+)'),
|
||||
webpage, 'title', default=None,
|
||||
group='title') or self._og_search_title(webpage)
|
||||
title = re.sub(r'\s*-\s*Servus TV\s*$', '', title)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
token = self._download_json(
|
||||
'https://auth.redbullmediahouse.com/token', video_id,
|
||||
'Downloading token', data=urlencode_postdata({
|
||||
'grant_type': 'client_credentials',
|
||||
}), headers={
|
||||
'Authorization': 'Basic SVgtMjJYNEhBNFdEM1cxMTpEdDRVSkFLd2ZOMG5IMjB1NGFBWTBmUFpDNlpoQ1EzNA==',
|
||||
})
|
||||
access_token = token['access_token']
|
||||
token_type = token.get('token_type', 'Bearer')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
'https://stv.rbmbtnx.net/api/v1/manifests/%s.m3u8' % video_id,
|
||||
video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
video = self._download_json(
|
||||
'https://sparkle-api.liiift.io/api/v1/stv/channels/international/assets/%s' % video_id,
|
||||
video_id, 'Downloading video JSON', headers={
|
||||
'Authorization': '%s %s' % (token_type, access_token),
|
||||
})
|
||||
|
||||
formats = []
|
||||
thumbnail = None
|
||||
for resource in video['resources']:
|
||||
if not isinstance(resource, dict):
|
||||
continue
|
||||
format_url = url_or_none(resource.get('url'))
|
||||
if not format_url:
|
||||
continue
|
||||
extension = resource.get('extension')
|
||||
type_ = resource.get('type')
|
||||
if extension == 'jpg' or type_ == 'reference_keyframe':
|
||||
thumbnail = format_url
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if type_ == 'dash' or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', fatal=False))
|
||||
elif type_ == 'hls' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif extension == 'mp4' or ext == 'mp4':
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': type_,
|
||||
'width': int_or_none(resource.get('width')),
|
||||
'height': int_or_none(resource.get('height')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
attrs = {}
|
||||
for attribute in video['attributes']:
|
||||
if not isinstance(attribute, dict):
|
||||
continue
|
||||
key = attribute.get('fieldKey')
|
||||
value = attribute.get('fieldValue')
|
||||
if not key or not value:
|
||||
continue
|
||||
attrs[key] = value
|
||||
|
||||
title = attrs.get('title_stv') or video_id
|
||||
alt_title = attrs.get('title')
|
||||
description = attrs.get('long_description') or attrs.get('short_description')
|
||||
series = attrs.get('label')
|
||||
season = attrs.get('season')
|
||||
episode = attrs.get('chapter')
|
||||
duration = float_or_none(attrs.get('duration'), scale=1000)
|
||||
season_number = int_or_none(self._search_regex(
|
||||
r'Season (\d+)', season or '', 'season number', default=None))
|
||||
episode_number = int_or_none(self._search_regex(
|
||||
r'Episode (\d+)', episode or '', 'episode number', default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'alt_title': alt_title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': unified_timestamp(video.get('lastPublished')),
|
||||
'series': series,
|
||||
'season': season,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
'formats': formats,
|
||||
}
|
||||
|
|
123
youtube_dlc/extractor/skyitalia.py
Normal file
123
youtube_dlc/extractor/skyitalia.py
Normal file
|
@ -0,0 +1,123 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class SkyItaliaBaseIE(InfoExtractor):
|
||||
_GET_VIDEO_DATA = 'https://apid.sky.it/vdp/v1/getVideoData?token={token}&caller=sky&rendition=web&id={id}'
|
||||
_RES = {
|
||||
'low': [426, 240],
|
||||
'med': [640, 360],
|
||||
'high': [854, 480],
|
||||
'hd': [1280, 720]
|
||||
}
|
||||
_GEO_BYPASS = False
|
||||
|
||||
def _extract_video_id(self, url):
|
||||
webpage = self._download_webpage(url, 'skyitalia')
|
||||
video_id = self._html_search_regex(
|
||||
[r'data-videoid=\"(\d+)\"',
|
||||
r'http://player\.sky\.it/social\?id=(\d+)\&'],
|
||||
webpage, 'video_id')
|
||||
if video_id:
|
||||
return video_id
|
||||
raise ExtractorError('Video ID not found.')
|
||||
|
||||
def _get_formats(self, video_id, token):
|
||||
data_url = self._GET_VIDEO_DATA.replace('{id}', video_id)
|
||||
data_url = data_url.replace('{token}', token)
|
||||
video_data = self._parse_json(
|
||||
self._download_webpage(data_url, video_id),
|
||||
video_id)
|
||||
|
||||
formats = []
|
||||
for q, r in self._RES.items():
|
||||
key = 'web_%s_url' % q
|
||||
if key not in video_data:
|
||||
continue
|
||||
formats.append({
|
||||
'url': video_data.get(key),
|
||||
'format_id': q,
|
||||
'width': r[0],
|
||||
'height': r[1]
|
||||
})
|
||||
|
||||
if not formats and video_data.get('geob') == 1:
|
||||
self.raise_geo_restricted(countries=['IT'])
|
||||
|
||||
self._sort_formats(formats)
|
||||
title = video_data.get('title')
|
||||
thumb = video_data.get('thumb')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumb,
|
||||
'formats': formats
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
if video_id == 'None':
|
||||
video_id = self._extract_video_id(url)
|
||||
return self._get_formats(video_id, self._TOKEN)
|
||||
|
||||
|
||||
class SkyItaliaIE(SkyItaliaBaseIE):
|
||||
IE_NAME = 'sky.it'
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?P<ie>sport|tg24|video)
|
||||
\.sky\.it/(?:.+?)
|
||||
(?P<id>[0-9]{6})?
|
||||
(?:$|\?)'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162',
|
||||
'md5': '9c03b590b06e5952d8051f0e02b0feca',
|
||||
'info_dict': {
|
||||
'id': '616162',
|
||||
'ext': 'mp4',
|
||||
'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere',
|
||||
'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://sport.sky.it/motogp/2020/09/18/motogp-gp-emilia-romagna-misano-2020-prove-libere-diretta',
|
||||
'md5': '9c03b590b06e5952d8051f0e02b0feca',
|
||||
'info_dict': {
|
||||
'id': '616162',
|
||||
'ext': 'mp4',
|
||||
'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere',
|
||||
'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://tg24.sky.it/salute-e-benessere/2020/09/18/coronavirus-vaccino-ue-sanofi',
|
||||
'md5': 'caa25e62dadb529bc5e0b078da99f854',
|
||||
'info_dict': {
|
||||
'id': '615904',
|
||||
'ext': 'mp4',
|
||||
'title': 'Covid-19, al Buzzi di Milano tamponi drive-in per studenti',
|
||||
'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/17/1600351405841_error-coronavirus-al-buzzi-di-milano-tamponi_thumbnail_1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162?itm_source=parsely-api',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_TOKEN = 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk'
|
||||
|
||||
|
||||
class SkyArteItaliaIE(SkyItaliaBaseIE):
|
||||
IE_NAME = 'arte.sky.it'
|
||||
_VALID_URL = r'https?://arte\.sky\.it/video/.+?(?P<id>[0-9]{6})?$'
|
||||
_TEST = {
|
||||
'url': 'https://arte.sky.it/video/federico-fellini-maestri-cinema/',
|
||||
'md5': '2f22513a89f45142f2746f878d690647',
|
||||
'info_dict': {
|
||||
'id': '612888',
|
||||
'ext': 'mp4',
|
||||
'title': 'I maestri del cinema Federico Felini',
|
||||
'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/03/1599146747305_i-maestri-del-cinema-federico-felini_thumbnail_1.jpg',
|
||||
}
|
||||
}
|
||||
_TOKEN = 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd'
|
|
@ -649,7 +649,7 @@ def _real_extract(self, url):
|
|||
|
||||
class SoundcloudPagedPlaylistBaseIE(SoundcloudIE):
|
||||
def _extract_playlist(self, base_url, playlist_id, playlist_title):
|
||||
# Per the SoundCloud documentation, the maximum limit for a linked partioning query is 200.
|
||||
# Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200.
|
||||
# https://developers.soundcloud.com/blog/offset-pagination-deprecated
|
||||
COMMON_QUERY = {
|
||||
'limit': 200,
|
||||
|
|
|
@ -44,7 +44,7 @@ class SouthParkEsIE(SouthParkIE):
|
|||
|
||||
class SouthParkDeIE(SouthParkIE):
|
||||
IE_NAME = 'southpark.de'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:videoclip|collections|folgen)/(?P<id>(?P<unique_id>.+?)/.+?)(?:\?|#|$))'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:(en/(videoclip|collections|episodes))|(videoclip|collections|folgen))/(?P<id>(?P<unique_id>.+?)/.+?)(?:\?|#|$))'
|
||||
# _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
|
||||
|
||||
_TESTS = [{
|
||||
|
|
|
@ -1,159 +1,54 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .nexx import (
|
||||
NexxIE,
|
||||
NexxEmbedIE,
|
||||
)
|
||||
from .spiegeltv import SpiegeltvIE
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
strip_or_none,
|
||||
unified_timestamp,
|
||||
)
|
||||
from .jwplatform import JWPlatformIE
|
||||
|
||||
|
||||
class SpiegelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed|-iframe)?(?:\.html)?(?:#.*)?$'
|
||||
_UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:spiegel|manager-magazin)\.de(?:/[^/]+)+/[^/]*-(?P<id>[0-9]+|%s)(?:-embed|-iframe)?(?:\.html)?(?:#.*)?$' % _UUID_RE
|
||||
_TESTS = [{
|
||||
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
|
||||
'md5': 'b57399839d055fccfeb9a0455c439868',
|
||||
'md5': '50c7948883ec85a3e431a0a44b7ad1d6',
|
||||
'info_dict': {
|
||||
'id': '563747',
|
||||
'id': 'II0BUyxY',
|
||||
'display_id': '1259285',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv',
|
||||
'title': 'Vulkan Tungurahua in Ecuador ist wieder aktiv - DER SPIEGEL - Wissenschaft',
|
||||
'description': 'md5:8029d8310232196eb235d27575a8b9f4',
|
||||
'duration': 49,
|
||||
'duration': 48.0,
|
||||
'upload_date': '20130311',
|
||||
'timestamp': 1362994320,
|
||||
'timestamp': 1362997920,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
|
||||
'md5': '5b6c2f4add9d62912ed5fc78a1faed80',
|
||||
'info_dict': {
|
||||
'id': '580988',
|
||||
'ext': 'mp4',
|
||||
'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
|
||||
'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
|
||||
'duration': 983,
|
||||
'upload_date': '20131115',
|
||||
'timestamp': 1384546642,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html',
|
||||
'md5': '97b91083a672d72976faa8433430afb9',
|
||||
'info_dict': {
|
||||
'id': '601883',
|
||||
'ext': 'mp4',
|
||||
'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.',
|
||||
'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"',
|
||||
'upload_date': '20140904',
|
||||
'timestamp': 1409834160,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-iframe.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# nexx video
|
||||
'url': 'https://www.spiegel.de/video/eifel-zoo-aufregung-um-ausgebrochene-raubtiere-video-99018031.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.spiegel.de/panorama/urteile-im-goldmuenzenprozess-haftstrafen-fuer-clanmitglieder-a-aae8df48-43c1-4c61-867d-23f0a2d254b7',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.spiegel.de/video/spiegel-tv-magazin-ueber-guellekrise-in-schleswig-holstein-video-99012776.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
metadata_url = 'http://www.spiegel.de/video/metadata/video-%s.json' % video_id
|
||||
handle = self._request_webpage(metadata_url, video_id)
|
||||
|
||||
# 302 to spiegel.tv, like http://www.spiegel.de/video/der-film-zum-wochenende-die-wahrheit-ueber-maenner-video-99003272.html
|
||||
if SpiegeltvIE.suitable(handle.geturl()):
|
||||
return self.url_result(handle.geturl(), 'Spiegeltv')
|
||||
|
||||
video_data = self._parse_json(self._webpage_read_content(
|
||||
handle, metadata_url, video_id), video_id)
|
||||
title = video_data['title']
|
||||
nexx_id = video_data['nexxOmniaId']
|
||||
domain_id = video_data.get('nexxOmniaDomain') or '748'
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': 'nexx:%s:%s' % (domain_id, nexx_id),
|
||||
'title': title,
|
||||
'description': strip_or_none(video_data.get('teaser')),
|
||||
'duration': parse_duration(video_data.get('duration')),
|
||||
'timestamp': unified_timestamp(video_data.get('datum')),
|
||||
'ie_key': NexxIE.ie_key(),
|
||||
}
|
||||
|
||||
|
||||
class SpiegelArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html'
|
||||
IE_NAME = 'Spiegel:Article'
|
||||
IE_DESC = 'Articles on spiegel.de'
|
||||
_TESTS = [{
|
||||
}, {
|
||||
'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html',
|
||||
'info_dict': {
|
||||
'id': '1516455',
|
||||
'ext': 'mp4',
|
||||
'title': 'Faszination Badminton: Nennt es bloß nicht Federball',
|
||||
'description': 're:^Patrick Kämnitz gehört.{100,}',
|
||||
'upload_date': '20140825',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.spiegel.de/wissenschaft/weltall/astronaut-alexander-gerst-antwortet-spiegel-online-lesern-a-989876.html',
|
||||
'info_dict': {
|
||||
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}, {
|
||||
# Nexx iFrame embed
|
||||
'url': 'http://www.spiegel.de/sptv/spiegeltv/spiegel-tv-ueber-schnellste-katapult-achterbahn-der-welt-taron-a-1137884.html',
|
||||
'info_dict': {
|
||||
'id': '161464',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nervenkitzel Achterbahn',
|
||||
'alt_title': 'Karussellbauer in Deutschland',
|
||||
'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
|
||||
'release_year': 2005,
|
||||
'creator': 'SPIEGEL TV',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 2761,
|
||||
'timestamp': 1394021479,
|
||||
'upload_date': '20140305',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'skip_download': True,
|
||||
},
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# Single video on top of the page
|
||||
video_link = self._search_regex(
|
||||
r'<a href="([^"]+)" onclick="return spOpenVideo\(this,', webpage,
|
||||
'video page URL', default=None)
|
||||
if video_link:
|
||||
video_url = compat_urlparse.urljoin(
|
||||
self.http_scheme() + '//spiegel.de/', video_link)
|
||||
return self.url_result(video_url)
|
||||
|
||||
# Multiple embedded videos
|
||||
embeds = re.findall(
|
||||
r'<div class="vid_holder[0-9]+.*?</div>\s*.*?url\s*=\s*"([^"]+)"',
|
||||
webpage)
|
||||
entries = [
|
||||
self.url_result(compat_urlparse.urljoin(
|
||||
self.http_scheme() + '//spiegel.de/', embed_path))
|
||||
for embed_path in embeds]
|
||||
if embeds:
|
||||
return self.playlist_result(entries)
|
||||
|
||||
return self.playlist_from_matches(
|
||||
NexxEmbedIE._extract_urls(webpage), ie=NexxEmbedIE.ie_key())
|
||||
media_id = self._html_search_regex(
|
||||
r'("|["\'])mediaId\1\s*:\s*("|["\'])(?P<id>(?:(?!\2).)+)\2',
|
||||
webpage, 'media id', group='id')
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'display_id': video_id,
|
||||
'url': 'jwplatform:%s' % media_id,
|
||||
'title': self._og_search_title(webpage, default=None),
|
||||
'ie_key': JWPlatformIE.ie_key(),
|
||||
}
|
||||
|
|
176
youtube_dlc/extractor/spreaker.py
Normal file
176
youtube_dlc/extractor/spreaker.py
Normal file
|
@ -0,0 +1,176 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
def _extract_episode(data, episode_id=None):
|
||||
title = data['title']
|
||||
download_url = data['download_url']
|
||||
|
||||
series = try_get(data, lambda x: x['show']['title'], compat_str)
|
||||
uploader = try_get(data, lambda x: x['author']['fullname'], compat_str)
|
||||
|
||||
thumbnails = []
|
||||
for image in ('image_original', 'image_medium', 'image'):
|
||||
image_url = url_or_none(data.get('%s_url' % image))
|
||||
if image_url:
|
||||
thumbnails.append({'url': image_url})
|
||||
|
||||
def stats(key):
|
||||
return int_or_none(try_get(
|
||||
data,
|
||||
(lambda x: x['%ss_count' % key],
|
||||
lambda x: x['stats']['%ss' % key])))
|
||||
|
||||
def duration(key):
|
||||
return float_or_none(data.get(key), scale=1000)
|
||||
|
||||
return {
|
||||
'id': compat_str(episode_id or data['episode_id']),
|
||||
'url': download_url,
|
||||
'display_id': data.get('permalink'),
|
||||
'title': title,
|
||||
'description': data.get('description'),
|
||||
'timestamp': unified_timestamp(data.get('published_at')),
|
||||
'uploader': uploader,
|
||||
'uploader_id': str_or_none(data.get('author_id')),
|
||||
'creator': uploader,
|
||||
'duration': duration('duration') or duration('length'),
|
||||
'view_count': stats('play'),
|
||||
'like_count': stats('like'),
|
||||
'comment_count': stats('message'),
|
||||
'format': 'MPEG Layer 3',
|
||||
'format_id': 'mp3',
|
||||
'container': 'mp3',
|
||||
'ext': 'mp3',
|
||||
'thumbnails': thumbnails,
|
||||
'series': series,
|
||||
'extractor_key': SpreakerIE.ie_key(),
|
||||
}
|
||||
|
||||
|
||||
class SpreakerIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
api\.spreaker\.com/
|
||||
(?:
|
||||
(?:download/)?episode|
|
||||
v2/episodes
|
||||
)/
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://api.spreaker.com/episode/12534508',
|
||||
'info_dict': {
|
||||
'id': '12534508',
|
||||
'display_id': 'swm-ep15-how-to-market-your-music-part-2',
|
||||
'ext': 'mp3',
|
||||
'title': 'EP:15 | Music Marketing (Likes) - Part 2',
|
||||
'description': 'md5:0588c43e27be46423e183076fa071177',
|
||||
'timestamp': 1502250336,
|
||||
'upload_date': '20170809',
|
||||
'uploader': 'SWM',
|
||||
'uploader_id': '9780658',
|
||||
'duration': 1063.42,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'series': 'Success With Music (SWM)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'https://api.spreaker.com/v2/episodes/%s' % episode_id,
|
||||
episode_id)['response']['episode']
|
||||
return _extract_episode(data, episode_id)
|
||||
|
||||
|
||||
class SpreakerPageIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?spreaker\.com/user/[^/]+/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
episode_id = self._search_regex(
|
||||
(r'data-episode_id=["\'](?P<id>\d+)',
|
||||
r'episode_id\s*:\s*(?P<id>\d+)'), webpage, 'episode id')
|
||||
return self.url_result(
|
||||
'https://api.spreaker.com/episode/%s' % episode_id,
|
||||
ie=SpreakerIE.ie_key(), video_id=episode_id)
|
||||
|
||||
|
||||
class SpreakerShowIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://api\.spreaker\.com/show/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://api.spreaker.com/show/4652058',
|
||||
'info_dict': {
|
||||
'id': '4652058',
|
||||
},
|
||||
'playlist_mincount': 118,
|
||||
}]
|
||||
|
||||
def _entries(self, show_id):
|
||||
for page_num in itertools.count(1):
|
||||
episodes = self._download_json(
|
||||
'https://api.spreaker.com/show/%s/episodes' % show_id,
|
||||
show_id, note='Downloading JSON page %d' % page_num, query={
|
||||
'page': page_num,
|
||||
'max_per_page': 100,
|
||||
})
|
||||
pager = try_get(episodes, lambda x: x['response']['pager'], dict)
|
||||
if not pager:
|
||||
break
|
||||
results = pager.get('results')
|
||||
if not results or not isinstance(results, list):
|
||||
break
|
||||
for result in results:
|
||||
if not isinstance(result, dict):
|
||||
continue
|
||||
yield _extract_episode(result)
|
||||
if page_num == pager.get('last_page'):
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
return self.playlist_result(self._entries(show_id), playlist_id=show_id)
|
||||
|
||||
|
||||
class SpreakerShowPageIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.spreaker.com/show/success-with-music',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
show_id = self._search_regex(
|
||||
r'show_id\s*:\s*(?P<id>\d+)', webpage, 'show id')
|
||||
return self.url_result(
|
||||
'https://api.spreaker.com/show/%s' % show_id,
|
||||
ie=SpreakerShowIE.ie_key(), video_id=show_id)
|
|
@ -9,6 +9,7 @@
|
|||
determine_ext,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
|
@ -44,7 +45,8 @@ def _extract_video(self, video_info, video_id):
|
|||
'format_id': player_type,
|
||||
'url': vurl,
|
||||
})
|
||||
if not formats and video_info.get('rights', {}).get('geoBlockedSweden'):
|
||||
rights = try_get(video_info, lambda x: x['rights'], dict) or {}
|
||||
if not formats and rights.get('geoBlockedSweden'):
|
||||
self.raise_geo_restricted(
|
||||
'This video is only available in Sweden',
|
||||
countries=self._GEO_COUNTRIES)
|
||||
|
@ -70,6 +72,7 @@ def _extract_video(self, video_info, video_id):
|
|||
episode = video_info.get('episodeTitle')
|
||||
episode_number = int_or_none(video_info.get('episodeNumber'))
|
||||
|
||||
timestamp = unified_timestamp(rights.get('validFrom'))
|
||||
duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration')))
|
||||
age_limit = None
|
||||
adult = dict_get(
|
||||
|
@ -84,6 +87,7 @@ def _extract_video(self, video_info, video_id):
|
|||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'age_limit': age_limit,
|
||||
'series': series,
|
||||
'season_number': season_number,
|
||||
|
@ -136,26 +140,39 @@ class SVTPlayIE(SVTPlayBaseIE):
|
|||
IE_DESC = 'SVT Play and Öppet arkiv'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
svt:(?P<svt_id>[^/?#&]+)|
|
||||
(?:
|
||||
svt:|
|
||||
https?://(?:www\.)?svt\.se/barnkanalen/barnplay/[^/]+/
|
||||
)
|
||||
(?P<svt_id>[^/?#&]+)|
|
||||
https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2',
|
||||
'md5': '2b6704fe4a28801e1a098bbf3c5ac611',
|
||||
'url': 'https://www.svtplay.se/video/26194546/det-har-ar-himlen',
|
||||
'md5': '2382036fd6f8c994856c323fe51c426e',
|
||||
'info_dict': {
|
||||
'id': '5996901',
|
||||
'id': 'jNwpV9P',
|
||||
'ext': 'mp4',
|
||||
'title': 'Flygplan till Haile Selassie',
|
||||
'duration': 3527,
|
||||
'thumbnail': r're:^https?://.*[\.-]jpg$',
|
||||
'title': 'Det här är himlen',
|
||||
'timestamp': 1586044800,
|
||||
'upload_date': '20200405',
|
||||
'duration': 3515,
|
||||
'thumbnail': r're:^https?://(?:.*[\.-]jpg|www.svtstatic.se/image/.*)$',
|
||||
'age_limit': 0,
|
||||
'subtitles': {
|
||||
'sv': [{
|
||||
'ext': 'wsrt',
|
||||
'ext': 'vtt',
|
||||
}]
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
# skip for now due to download test asserts that segment is > 10000 bytes and svt uses
|
||||
# init segments that are smaller
|
||||
# AssertionError: Expected test_SVTPlay_jNwpV9P.mp4 to be at least 9.77KiB, but it's only 864.00B
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# geo restricted to Sweden
|
||||
'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
|
||||
|
@ -172,6 +189,12 @@ class SVTPlayIE(SVTPlayBaseIE):
|
|||
}, {
|
||||
'url': 'svt:14278044',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.svt.se/barnkanalen/barnplay/kar/eWv5MLX/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'svt:eWv5MLX',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _adjust_title(self, info):
|
||||
|
@ -236,7 +259,10 @@ def _real_extract(self, url):
|
|||
r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'),
|
||||
webpage, 'video id')
|
||||
|
||||
return self._extract_by_video_id(svt_id, webpage)
|
||||
info_dict = self._extract_by_video_id(svt_id, webpage)
|
||||
info_dict['thumbnail'] = thumbnail
|
||||
|
||||
return info_dict
|
||||
|
||||
|
||||
class SVTSeriesIE(SVTPlayBaseIE):
|
||||
|
@ -360,7 +386,7 @@ class SVTPageIE(InfoExtractor):
|
|||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if SVTIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
|
||||
return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
path, display_id = re.match(self._VALID_URL, url).groups()
|
||||
|
|
|
@ -86,7 +86,7 @@ def _real_extract(self, url):
|
|||
# return self._extract_via_api(kind, video_id)
|
||||
|
||||
# JSON api does not provide some audio formats (e.g. ogg) thus
|
||||
# extractiong audio via webpage
|
||||
# extracting audio via webpage
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
|
|
|
@ -208,7 +208,7 @@ def _extract_urls(cls, webpage):
|
|||
if m:
|
||||
return [m.group('url')]
|
||||
|
||||
# Are whitesapces ignored in URLs?
|
||||
# Are whitespaces ignored in URLs?
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/12044
|
||||
matches = re.findall(
|
||||
r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
|
||||
|
|
97
youtube_dlc/extractor/thisvid.py
Normal file
97
youtube_dlc/extractor/thisvid.py
Normal file
|
@ -0,0 +1,97 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ThisVidIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?thisvid\.com/(?P<type>videos|embed)/(?P<id>[A-Za-z0-9-]+/?)'
|
||||
_TESTS = [{
|
||||
'url': 'https://thisvid.com/videos/french-boy-pantsed/',
|
||||
'md5': '3397979512c682f6b85b3b04989df224',
|
||||
'info_dict': {
|
||||
'id': '2400174',
|
||||
'ext': 'mp4',
|
||||
'title': 'French Boy Pantsed',
|
||||
'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://thisvid.com/embed/2400174/',
|
||||
'md5': '3397979512c682f6b85b3b04989df224',
|
||||
'info_dict': {
|
||||
'id': '2400174',
|
||||
'ext': 'mp4',
|
||||
'title': 'French Boy Pantsed',
|
||||
'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
main_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, main_id)
|
||||
|
||||
# URL decryptor was reversed from version 4.0.4, later verified working with 5.2.0 and may change in the future.
|
||||
kvs_version = self._html_search_regex(r'<script [^>]+?src="https://thisvid\.com/player/kt_player\.js\?v=(\d+(\.\d+)+)">', webpage, 'kvs_version', fatal=False)
|
||||
if not kvs_version.startswith("5."):
|
||||
self.report_warning("Major version change (" + kvs_version + ") in player engine--Download may fail.")
|
||||
|
||||
title = self._html_search_regex(r'<title>(?:Video: )?(.+?)(?: - (?:\w+ porn at )?ThisVid(?:.com| tube))?</title>', webpage, 'title')
|
||||
# video_id, video_url and license_code from the 'flashvars' JSON object:
|
||||
video_id = self._html_search_regex(r"video_id: '([0-9]+)',", webpage, 'video_id')
|
||||
video_url = self._html_search_regex(r"video_url: '(function/0/.+?)',", webpage, 'video_url')
|
||||
license_code = self._html_search_regex(r"license_code: '([0-9$]{16})',", webpage, 'license_code')
|
||||
thumbnail = self._html_search_regex(r"preview_url: '((?:https?:)?//media.thisvid.com/.+?.jpg)',", webpage, 'thumbnail', fatal=False)
|
||||
if thumbnail.startswith("//"):
|
||||
thumbnail = "https:" + thumbnail
|
||||
if (re.match(self._VALID_URL, url).group('type') == "videos"):
|
||||
display_id = main_id
|
||||
else:
|
||||
display_id = self._search_regex(r'<link rel="canonical" href="' + self._VALID_URL + r'">', webpage, 'display_id', fatal=False),
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'url': getrealurl(video_url, license_code),
|
||||
'thumbnail': thumbnail,
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
||||
|
||||
def getrealurl(video_url, license_code):
|
||||
urlparts = video_url.split('/')[2:]
|
||||
license = getlicensetoken(license_code)
|
||||
newmagic = urlparts[5][:32]
|
||||
|
||||
for o in range(len(newmagic) - 1, -1, -1):
|
||||
new = ""
|
||||
l = (o + sum([int(n) for n in license[o:]])) % 32
|
||||
|
||||
for i in range(0, len(newmagic)):
|
||||
if i == o:
|
||||
new += newmagic[l]
|
||||
elif i == l:
|
||||
new += newmagic[o]
|
||||
else:
|
||||
new += newmagic[i]
|
||||
newmagic = new
|
||||
|
||||
urlparts[5] = newmagic + urlparts[5][32:]
|
||||
return "/".join(urlparts)
|
||||
|
||||
|
||||
def getlicensetoken(license):
|
||||
modlicense = license.replace("$", "").replace("0", "1")
|
||||
center = int(len(modlicense) / 2)
|
||||
fronthalf = int(modlicense[:center + 1])
|
||||
backhalf = int(modlicense[center:])
|
||||
|
||||
modlicense = str(4 * abs(fronthalf - backhalf))
|
||||
retval = ""
|
||||
for o in range(0, center + 1):
|
||||
for i in range(1, 5):
|
||||
retval += str((int(license[o + i]) + int(modlicense[o])) % 10)
|
||||
return retval
|
|
@ -56,9 +56,9 @@ def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}):
|
|||
content_id = xpath_text(video_data, 'contentId') or video_id
|
||||
# rtmp_src = xpath_text(video_data, 'akamai/src')
|
||||
# if rtmp_src:
|
||||
# splited_rtmp_src = rtmp_src.split(',')
|
||||
# if len(splited_rtmp_src) == 2:
|
||||
# rtmp_src = splited_rtmp_src[1]
|
||||
# split_rtmp_src = rtmp_src.split(',')
|
||||
# if len(split_rtmp_src) == 2:
|
||||
# rtmp_src = split_rtmp_src[1]
|
||||
# aifp = xpath_text(video_data, 'akamai/aifp', default='')
|
||||
|
||||
urls = []
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
from .spike import ParamountNetworkIE
|
||||
|
||||
# TODO: Remove - Reason not used anymore - Service moved to youtube
|
||||
|
||||
|
||||
class TVLandIE(ParamountNetworkIE):
|
||||
IE_NAME = 'tvland.com'
|
||||
|
|
|
@ -8,8 +8,8 @@
|
|||
|
||||
class TwentyThreeVideoIE(InfoExtractor):
|
||||
IE_NAME = '23video'
|
||||
_VALID_URL = r'https?://video\.(?P<domain>twentythree\.net|23video\.com|filmweb\.no)/v\.ihtml/player\.html\?(?P<query>.*?\bphoto(?:_|%5f)id=(?P<id>\d+).*)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?P<domain>[^.]+\.(?:twentythree\.net|23video\.com|filmweb\.no))/v\.ihtml/player\.html\?(?P<query>.*?\bphoto(?:_|%5f)id=(?P<id>\d+).*)'
|
||||
_TESTS = [{
|
||||
'url': 'https://video.twentythree.net/v.ihtml/player.html?showDescriptions=0&source=site&photo%5fid=20448876&autoPlay=1',
|
||||
'md5': '75fcf216303eb1dae9920d651f85ced4',
|
||||
'info_dict': {
|
||||
|
@ -21,11 +21,14 @@ class TwentyThreeVideoIE(InfoExtractor):
|
|||
'uploader_id': '12258964',
|
||||
'uploader': 'Rasmus Bysted',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://bonnier-publications-danmark.23video.com/v.ihtml/player.html?token=f0dc46476e06e13afd5a1f84a29e31e8&source=embed&photo%5fid=36137620',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, query, photo_id = re.match(self._VALID_URL, url).groups()
|
||||
base_url = 'https://video.%s' % domain
|
||||
base_url = 'https://%s' % domain
|
||||
photo_data = self._download_json(
|
||||
base_url + '/api/photo/list?' + query, photo_id, query={
|
||||
'format': 'json',
|
||||
|
|
|
@ -2,7 +2,11 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_timestamp
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class URPlayIE(InfoExtractor):
|
||||
|
@ -15,8 +19,8 @@ class URPlayIE(InfoExtractor):
|
|||
'ext': 'mp4',
|
||||
'title': 'UR Samtiden - Livet, universum och rymdens märkliga musik : Om vetenskap, kritiskt tänkande och motstånd',
|
||||
'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a',
|
||||
'timestamp': 1513512768,
|
||||
'upload_date': '20171217',
|
||||
'timestamp': 1513292400,
|
||||
'upload_date': '20171214',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde',
|
||||
|
@ -25,7 +29,7 @@ class URPlayIE(InfoExtractor):
|
|||
'ext': 'mp4',
|
||||
'title': 'Tripp, Trapp, Träd : Sovkudde',
|
||||
'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1',
|
||||
'timestamp': 1440093600,
|
||||
'timestamp': 1440086400,
|
||||
'upload_date': '20150820',
|
||||
},
|
||||
}, {
|
||||
|
@ -35,37 +39,65 @@ class URPlayIE(InfoExtractor):
|
|||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
url = url.replace('skola.se/Produkter', 'play.se/program')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
urplayer_data = self._parse_json(self._search_regex(
|
||||
r'urPlayer\.init\(({.+?})\);', webpage, 'urplayer data'), video_id)
|
||||
host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect']
|
||||
urplayer_data = self._parse_json(self._html_search_regex(
|
||||
r'data-react-class="components/Player/Player"[^>]+data-react-props="({.+?})"',
|
||||
webpage, 'urplayer data'), video_id)['currentProduct']
|
||||
episode = urplayer_data['title']
|
||||
|
||||
host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect']
|
||||
formats = []
|
||||
for quality_attr, quality, preference in (('', 'sd', 0), ('_hd', 'hd', 1)):
|
||||
file_http = urplayer_data.get('file_http' + quality_attr) or urplayer_data.get('file_http_sub' + quality_attr)
|
||||
urplayer_streams = urplayer_data.get('streamingInfo', {})
|
||||
|
||||
for k, v in urplayer_streams.get('raw', {}).items():
|
||||
if not (k in ('sd', 'hd') and isinstance(v, dict)):
|
||||
continue
|
||||
file_http = v.get('location')
|
||||
if file_http:
|
||||
formats.extend(self._extract_wowza_formats(
|
||||
'http://%s/%splaylist.m3u8' % (host, file_http), video_id, skip_protocols=['rtmp', 'rtsp']))
|
||||
'http://%s/%splaylist.m3u8' % (host, file_http),
|
||||
video_id, skip_protocols=['f4m', 'rtmp', 'rtsp']))
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for subtitle in urplayer_data.get('subtitles', []):
|
||||
subtitle_url = subtitle.get('file')
|
||||
kind = subtitle.get('kind')
|
||||
if not subtitle_url or (kind and kind != 'captions'):
|
||||
continue
|
||||
subtitles.setdefault(subtitle.get('label', 'Svenska'), []).append({
|
||||
'url': subtitle_url,
|
||||
subs = urplayer_streams.get("sweComplete", {}).get("tt", {}).get("location")
|
||||
if subs:
|
||||
subtitles.setdefault('Svenska', []).append({
|
||||
'url': subs,
|
||||
})
|
||||
|
||||
image = urplayer_data.get('image') or {}
|
||||
thumbnails = []
|
||||
for k, v in image.items():
|
||||
t = {
|
||||
'id': k,
|
||||
'url': v,
|
||||
}
|
||||
wh = k.split('x')
|
||||
if len(wh) == 2:
|
||||
t.update({
|
||||
'width': int_or_none(wh[0]),
|
||||
'height': int_or_none(wh[1]),
|
||||
})
|
||||
thumbnails.append(t)
|
||||
|
||||
series = urplayer_data.get('series') or {}
|
||||
series_title = dict_get(series, ('seriesTitle', 'title')) or dict_get(urplayer_data, ('seriesTitle', 'mainTitle'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': urplayer_data['title'],
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': urplayer_data.get('image'),
|
||||
'timestamp': unified_timestamp(self._html_search_meta(('uploadDate', 'schema:uploadDate'), webpage, 'timestamp')),
|
||||
'series': urplayer_data.get('series_title'),
|
||||
'subtitles': subtitles,
|
||||
'title': '%s : %s' % (series_title, episode) if series_title else episode,
|
||||
'description': urplayer_data.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': unified_timestamp(urplayer_data.get('publishedAt')),
|
||||
'series': series_title,
|
||||
'formats': formats,
|
||||
'duration': int_or_none(urplayer_data.get('duration')),
|
||||
'categories': urplayer_data.get('categories'),
|
||||
'tags': urplayer_data.get('keywords'),
|
||||
'season': series.get('label'),
|
||||
'episode': episode,
|
||||
'episode_number': int_or_none(urplayer_data.get('episodeNumber')),
|
||||
}
|
||||
|
|
|
@ -1,74 +1,24 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
from .nbc import NBCIE
|
||||
|
||||
|
||||
class USANetworkIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?usanetwork\.com/(?:[^/]+/videos|movies)/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.usanetwork.com/mrrobot/videos/hpe-cybersecurity',
|
||||
'md5': '33c0d2ba381571b414024440d08d57fd',
|
||||
class USANetworkIE(NBCIE):
|
||||
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?usanetwork\.com/(?:[^/]+/videos?|movies?)/(?:[^/]+/)?(?P<id>\d+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.usanetwork.com/peacock-trailers/video/intelligence-trailer/4185302',
|
||||
'info_dict': {
|
||||
'id': '3086229',
|
||||
'id': '4185302',
|
||||
'ext': 'mp4',
|
||||
'title': 'HPE Cybersecurity',
|
||||
'description': 'The more we digitize our world, the more vulnerable we are.',
|
||||
'upload_date': '20160818',
|
||||
'timestamp': 1471535460,
|
||||
'uploader': 'NBCU-USA',
|
||||
'title': 'Intelligence (Trailer)',
|
||||
'description': 'A maverick NSA agent enlists the help of a junior systems analyst in a workplace power grab.',
|
||||
'upload_date': '20200715',
|
||||
'timestamp': 1594785600,
|
||||
'uploader': 'NBCU-MPAT',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
def _x(name, default=NO_DEFAULT):
|
||||
return self._search_regex(
|
||||
r'data-%s\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % name,
|
||||
webpage, name, default=default, group='value')
|
||||
|
||||
video_id = _x('mpx-guid')
|
||||
title = _x('episode-title')
|
||||
mpx_account_id = _x('mpx-account-id', '2304992029')
|
||||
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
}
|
||||
if _x('is-full-episode', None) == '1':
|
||||
query['manifest'] = 'm3u'
|
||||
|
||||
if _x('is-entitlement', None) == '1':
|
||||
adobe_pass = {}
|
||||
drupal_settings = self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'drupal settings', fatal=False)
|
||||
if drupal_settings:
|
||||
drupal_settings = self._parse_json(drupal_settings, video_id, fatal=False)
|
||||
if drupal_settings:
|
||||
adobe_pass = drupal_settings.get('adobePass', {})
|
||||
resource = self._get_mvpd_resource(
|
||||
adobe_pass.get('adobePassResourceId', 'usa'),
|
||||
title, video_id, _x('episode-rating', 'TV-14'))
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, adobe_pass.get('adobePassRequestorId', 'usa'), resource)
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(update_url_query(
|
||||
'http://link.theplatform.com/s/HNK2IC/media/guid/%s/%s' % (mpx_account_id, video_id),
|
||||
query), {'force_smil_url': True}),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'series': _x('show-title', None),
|
||||
'episode': title,
|
||||
'ie_key': 'ThePlatform',
|
||||
})
|
||||
return info
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
|
||||
class UstreamIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)'
|
||||
IE_NAME = 'ustream'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ustream.tv/recorded/20274954',
|
||||
|
@ -67,12 +67,15 @@ class UstreamIE(InfoExtractor):
|
|||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.ibm.com/embed/recorded/128240221?&autoplay=true&controls=true&volume=100',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/embed/.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return mobj.group('url')
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
import hmac
|
||||
import itertools
|
||||
|
@ -9,6 +10,10 @@
|
|||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
|
@ -16,6 +21,7 @@
|
|||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
std_headers,
|
||||
)
|
||||
|
||||
|
||||
|
@ -166,19 +172,20 @@ class VikiIE(VikiBaseIE):
|
|||
}, {
|
||||
# episode
|
||||
'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
|
||||
'md5': '5fa476a902e902783ac7a4d615cdbc7a',
|
||||
'md5': '94e0e34fd58f169f40c184f232356cfe',
|
||||
'info_dict': {
|
||||
'id': '44699v',
|
||||
'ext': 'mp4',
|
||||
'title': 'Boys Over Flowers - Episode 1',
|
||||
'description': 'md5:b89cf50038b480b88b5b3c93589a9076',
|
||||
'duration': 4204,
|
||||
'duration': 4172,
|
||||
'timestamp': 1270496524,
|
||||
'upload_date': '20100405',
|
||||
'uploader': 'group8',
|
||||
'like_count': int,
|
||||
'age_limit': 13,
|
||||
}
|
||||
},
|
||||
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
|
||||
}, {
|
||||
# youtube external
|
||||
'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
|
||||
|
@ -195,14 +202,15 @@ class VikiIE(VikiBaseIE):
|
|||
'uploader_id': 'ad14065n',
|
||||
'like_count': int,
|
||||
'age_limit': 13,
|
||||
}
|
||||
},
|
||||
'skip': 'Page not found!',
|
||||
}, {
|
||||
'url': 'http://www.viki.com/player/44699v',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# non-English description
|
||||
'url': 'http://www.viki.com/videos/158036v-love-in-magic',
|
||||
'md5': '1713ae35df5a521b31f6dc40730e7c9c',
|
||||
'md5': 'adf9e321a0ae5d0aace349efaaff7691',
|
||||
'info_dict': {
|
||||
'id': '158036v',
|
||||
'ext': 'mp4',
|
||||
|
@ -218,71 +226,13 @@ class VikiIE(VikiBaseIE):
|
|||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._call_api(
|
||||
'videos/%s.json' % video_id, video_id, 'Downloading video JSON')
|
||||
|
||||
streams = self._call_api(
|
||||
'videos/%s/streams.json' % video_id, video_id,
|
||||
'Downloading video streams JSON')
|
||||
|
||||
formats = []
|
||||
for format_id, stream_dict in streams.items():
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None))
|
||||
for protocol, format_dict in stream_dict.items():
|
||||
# rtmps URLs does not seem to work
|
||||
if protocol == 'rtmps':
|
||||
continue
|
||||
format_url = format_dict.get('url')
|
||||
format_drms = format_dict.get('drms')
|
||||
format_stream_id = format_dict.get('id')
|
||||
if format_id == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native',
|
||||
m3u8_id='m3u8-%s' % protocol, fatal=False)
|
||||
# Despite CODECS metadata in m3u8 all video-only formats
|
||||
# are actually video+audio
|
||||
for f in m3u8_formats:
|
||||
if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
|
||||
f['acodec'] = None
|
||||
formats.extend(m3u8_formats)
|
||||
elif format_id == 'mpd':
|
||||
mpd_formats = self._extract_mpd_formats(
|
||||
format_url, video_id,
|
||||
mpd_id='mpd-%s' % protocol, fatal=False)
|
||||
formats.extend(mpd_formats)
|
||||
elif format_id == 'mpd':
|
||||
|
||||
formats.extend(mpd_formats)
|
||||
elif format_url.startswith('rtmp'):
|
||||
mobj = re.search(
|
||||
r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
|
||||
format_url)
|
||||
if not mobj:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': 'rtmp-%s' % format_id,
|
||||
'ext': 'flv',
|
||||
'url': mobj.group('url'),
|
||||
'play_path': mobj.group('playpath'),
|
||||
'app': mobj.group('app'),
|
||||
'page_url': url,
|
||||
'drms': format_drms,
|
||||
'stream_id': format_stream_id,
|
||||
})
|
||||
else:
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest(format_url), video_id, 'Checking file size', fatal=False)
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': '%s-%s' % (format_id, protocol),
|
||||
'height': height,
|
||||
'drms': format_drms,
|
||||
'stream_id': format_stream_id,
|
||||
'filesize': int_or_none(urlh.headers.get('Content-Length')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
resp = self._download_json(
|
||||
'https://www.viki.com/api/videos/' + video_id,
|
||||
video_id, 'Downloading video JSON', headers={
|
||||
'x-client-user-agent': std_headers['User-Agent'],
|
||||
'x-viki-app-ver': '4.0.57',
|
||||
})
|
||||
video = resp['video']
|
||||
|
||||
self._check_errors(video)
|
||||
|
||||
|
@ -308,19 +258,26 @@ def _real_extract(self, url):
|
|||
'url': thumbnail.get('url'),
|
||||
})
|
||||
|
||||
stream_ids = []
|
||||
for f in formats:
|
||||
s_id = f.get('stream_id')
|
||||
if s_id is not None:
|
||||
stream_ids.append(s_id)
|
||||
|
||||
subtitles = {}
|
||||
for subtitle_lang, _ in video.get('subtitle_completions', {}).items():
|
||||
subtitles[subtitle_lang] = [{
|
||||
'ext': subtitles_format,
|
||||
'url': self._prepare_call(
|
||||
'videos/%s/subtitles/%s.%s?stream_id=%s' % (video_id, subtitle_lang, subtitles_format, stream_ids[0])),
|
||||
} for subtitles_format in ('srt', 'vtt')]
|
||||
try:
|
||||
# New way to fetch subtitles
|
||||
new_video = self._download_json(
|
||||
'https://www.viki.com/api/videos/%s' % video_id, video_id,
|
||||
'Downloading new video JSON to get subtitles', headers={'x-viki-app-ver': '2.2.5.1428709186'}, expected_status=[200, 400, 404])
|
||||
for sub in new_video.get('streamSubtitles').get('dash'):
|
||||
subtitles[sub.get('srclang')] = [{
|
||||
'ext': 'vtt',
|
||||
'url': sub.get('src'),
|
||||
'completion': sub.get('percentage'),
|
||||
}]
|
||||
except AttributeError:
|
||||
# fall-back to the old way if there isn't a streamSubtitles attribute
|
||||
for subtitle_lang, _ in video.get('subtitle_completions', {}).items():
|
||||
subtitles[subtitle_lang] = [{
|
||||
'ext': subtitles_format,
|
||||
'url': self._prepare_call(
|
||||
'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)),
|
||||
} for subtitles_format in ('srt', 'vtt')]
|
||||
|
||||
result = {
|
||||
'id': video_id,
|
||||
|
@ -335,12 +292,84 @@ def _real_extract(self, url):
|
|||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
if 'external' in streams:
|
||||
result.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': streams['external']['url'],
|
||||
})
|
||||
return result
|
||||
formats = []
|
||||
|
||||
def add_format(format_id, format_dict, protocol='http'):
|
||||
# rtmps URLs does not seem to work
|
||||
if protocol == 'rtmps':
|
||||
return
|
||||
format_url = format_dict.get('url')
|
||||
if not format_url:
|
||||
return
|
||||
format_drms = format_dict.get('drms')
|
||||
format_stream_id = format_dict.get('id')
|
||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(format_url).query)
|
||||
stream = qs.get('stream', [None])[0]
|
||||
if stream:
|
||||
format_url = base64.b64decode(stream).decode()
|
||||
if format_id in ('m3u8', 'hls'):
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native',
|
||||
m3u8_id='m3u8-%s' % protocol, fatal=False)
|
||||
# Despite CODECS metadata in m3u8 all video-only formats
|
||||
# are actually video+audio
|
||||
for f in m3u8_formats:
|
||||
if '_drm/index_' in f['url']:
|
||||
continue
|
||||
if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
|
||||
f['acodec'] = None
|
||||
formats.append(f)
|
||||
elif format_id in ('mpd', 'dash'):
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, 'mpd-%s' % protocol, fatal=False))
|
||||
elif format_url.startswith('rtmp'):
|
||||
mobj = re.search(
|
||||
r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
|
||||
format_url)
|
||||
if not mobj:
|
||||
return
|
||||
formats.append({
|
||||
'format_id': 'rtmp-%s' % format_id,
|
||||
'ext': 'flv',
|
||||
'url': mobj.group('url'),
|
||||
'play_path': mobj.group('playpath'),
|
||||
'app': mobj.group('app'),
|
||||
'page_url': url,
|
||||
'drms': format_drms,
|
||||
'stream_id': format_stream_id,
|
||||
})
|
||||
else:
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest(format_url), video_id, 'Checking file size', fatal=False)
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': '%s-%s' % (format_id, protocol),
|
||||
'height': int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None)),
|
||||
'drms': format_drms,
|
||||
'stream_id': format_stream_id,
|
||||
'filesize': int_or_none(urlh.headers.get('Content-Length')),
|
||||
})
|
||||
|
||||
for format_id, format_dict in (resp.get('streams') or {}).items():
|
||||
add_format(format_id, format_dict)
|
||||
if not formats:
|
||||
streams = self._call_api(
|
||||
'videos/%s/streams.json' % video_id, video_id,
|
||||
'Downloading video streams JSON')
|
||||
|
||||
if 'external' in streams:
|
||||
result.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': streams['external']['url'],
|
||||
})
|
||||
return result
|
||||
|
||||
for format_id, stream_dict in streams.items():
|
||||
for protocol, format_dict in stream_dict.items():
|
||||
add_format(format_id, format_dict, protocol)
|
||||
self._sort_formats(formats)
|
||||
|
||||
result['formats'] = formats
|
||||
return result
|
||||
|
|
|
@ -922,7 +922,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
|
|||
}]
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _fetch_page(self, album_id, authorizaion, hashed_pass, page):
|
||||
def _fetch_page(self, album_id, authorization, hashed_pass, page):
|
||||
api_page = page + 1
|
||||
query = {
|
||||
'fields': 'link,uri',
|
||||
|
@ -934,7 +934,7 @@ def _fetch_page(self, album_id, authorizaion, hashed_pass, page):
|
|||
videos = self._download_json(
|
||||
'https://api.vimeo.com/albums/%s/videos' % album_id,
|
||||
album_id, 'Downloading page %d' % api_page, query=query, headers={
|
||||
'Authorization': 'jwt ' + authorizaion,
|
||||
'Authorization': 'jwt ' + authorization,
|
||||
})['data']
|
||||
for video in videos:
|
||||
link = video.get('link')
|
||||
|
@ -946,10 +946,13 @@ def _fetch_page(self, album_id, authorizaion, hashed_pass, page):
|
|||
|
||||
def _real_extract(self, url):
|
||||
album_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, album_id)
|
||||
viewer = self._parse_json(self._search_regex(
|
||||
r'bootstrap_data\s*=\s*({.+?})</script>',
|
||||
webpage, 'bootstrap data'), album_id)['viewer']
|
||||
viewer = self._download_json(
|
||||
'https://vimeo.com/_rv/viewer', album_id, fatal=False)
|
||||
if not viewer:
|
||||
webpage = self._download_webpage(url, album_id)
|
||||
viewer = self._parse_json(self._search_regex(
|
||||
r'bootstrap_data\s*=\s*({.+?})</script>',
|
||||
webpage, 'bootstrap data'), album_id)['viewer']
|
||||
jwt = viewer['jwt']
|
||||
album = self._download_json(
|
||||
'https://api.vimeo.com/albums/' + album_id,
|
||||
|
|
|
@ -1,25 +1,32 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
import itertools
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .naver import NaverBaseIE
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
remove_start,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class VLiveIE(NaverBaseIE):
|
||||
class VLiveBaseIE(NaverBaseIE):
|
||||
_APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
|
||||
|
||||
|
||||
class VLiveIE(VLiveBaseIE):
|
||||
IE_NAME = 'vlive'
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|embed)/(?P<id>[0-9]+)'
|
||||
_NETRC_MACHINE = 'vlive'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.vlive.tv/video/1326',
|
||||
|
@ -27,7 +34,7 @@ class VLiveIE(NaverBaseIE):
|
|||
'info_dict': {
|
||||
'id': '1326',
|
||||
'ext': 'mp4',
|
||||
'title': "[V LIVE] Girl's Day's Broadcast",
|
||||
'title': "Girl's Day's Broadcast",
|
||||
'creator': "Girl's Day",
|
||||
'view_count': int,
|
||||
'uploader_id': 'muploader_a',
|
||||
|
@ -37,7 +44,7 @@ class VLiveIE(NaverBaseIE):
|
|||
'info_dict': {
|
||||
'id': '16937',
|
||||
'ext': 'mp4',
|
||||
'title': '[V LIVE] 첸백시 걍방',
|
||||
'title': '첸백시 걍방',
|
||||
'creator': 'EXO',
|
||||
'view_count': int,
|
||||
'subtitles': 'mincount:12',
|
||||
|
@ -58,12 +65,15 @@ class VLiveIE(NaverBaseIE):
|
|||
'subtitles': 'mincount:10',
|
||||
},
|
||||
'skip': 'This video is only available for CH+ subscribers',
|
||||
}, {
|
||||
'url': 'https://www.vlive.tv/embed/1326',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# works only with gcc=KR
|
||||
'url': 'https://www.vlive.tv/video/225019',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url)
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
|
@ -95,173 +105,199 @@ def is_logged_in():
|
|||
if not is_logged_in():
|
||||
raise ExtractorError('Unable to log in', expected=True)
|
||||
|
||||
def _call_api(self, path_template, video_id, fields=None):
|
||||
query = {'appId': self._APP_ID, 'gcc': 'KR'}
|
||||
if fields:
|
||||
query['fields'] = fields
|
||||
try:
|
||||
return self._download_json(
|
||||
'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
|
||||
'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0],
|
||||
headers={'Referer': 'https://www.vlive.tv/'}, query=query)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
self.raise_login_required(json.loads(e.cause.read().decode())['message'])
|
||||
raise
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://www.vlive.tv/video/%s' % video_id, video_id)
|
||||
post = self._call_api(
|
||||
'post/v1.0/officialVideoPost-%s', video_id,
|
||||
'author{nickname},channel{channelCode,channelName},officialVideo{commentCount,exposeStatus,likeCount,playCount,playTime,status,title,type,vodId}')
|
||||
|
||||
VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)'
|
||||
VIDEO_PARAMS_FIELD = 'video params'
|
||||
video = post['officialVideo']
|
||||
|
||||
params = self._parse_json(self._search_regex(
|
||||
VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD, default=''), video_id,
|
||||
transform_source=lambda s: '[' + s + ']', fatal=False)
|
||||
def get_common_fields():
|
||||
channel = post.get('channel') or {}
|
||||
return {
|
||||
'title': video.get('title'),
|
||||
'creator': post.get('author', {}).get('nickname'),
|
||||
'channel': channel.get('channelName'),
|
||||
'channel_id': channel.get('channelCode'),
|
||||
'duration': int_or_none(video.get('playTime')),
|
||||
'view_count': int_or_none(video.get('playCount')),
|
||||
'like_count': int_or_none(video.get('likeCount')),
|
||||
'comment_count': int_or_none(video.get('commentCount')),
|
||||
}
|
||||
|
||||
if not params or len(params) < 7:
|
||||
params = self._search_regex(
|
||||
VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD)
|
||||
params = [p.strip(r'"') for p in re.split(r'\s*,\s*', params)]
|
||||
|
||||
status, long_video_id, key = params[2], params[5], params[6]
|
||||
status = remove_start(status, 'PRODUCT_')
|
||||
|
||||
if status in ('LIVE_ON_AIR', 'BIG_EVENT_ON_AIR'):
|
||||
return self._live(video_id, webpage)
|
||||
elif status in ('VOD_ON_AIR', 'BIG_EVENT_INTRO'):
|
||||
return self._replay(video_id, webpage, long_video_id, key)
|
||||
|
||||
if status == 'LIVE_END':
|
||||
raise ExtractorError('Uploading for replay. Please wait...',
|
||||
expected=True)
|
||||
elif status == 'COMING_SOON':
|
||||
raise ExtractorError('Coming soon!', expected=True)
|
||||
elif status == 'CANCELED':
|
||||
raise ExtractorError('We are sorry, '
|
||||
'but the live broadcast has been canceled.',
|
||||
expected=True)
|
||||
elif status == 'ONLY_APP':
|
||||
raise ExtractorError('Unsupported video type', expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unknown status %s' % status)
|
||||
|
||||
def _get_common_fields(self, webpage):
|
||||
title = self._og_search_title(webpage)
|
||||
creator = self._html_search_regex(
|
||||
r'<div[^>]+class="info_area"[^>]*>\s*(?:<em[^>]*>.*?</em\s*>\s*)?<a\s+[^>]*>([^<]+)',
|
||||
webpage, 'creator', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
return {
|
||||
'title': title,
|
||||
'creator': creator,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
def _live(self, video_id, webpage):
|
||||
init_page = self._download_init_page(video_id)
|
||||
|
||||
live_params = self._search_regex(
|
||||
r'"liveStreamInfo"\s*:\s*(".*"),',
|
||||
init_page, 'live stream info')
|
||||
live_params = self._parse_json(live_params, video_id)
|
||||
live_params = self._parse_json(live_params, video_id)
|
||||
|
||||
formats = []
|
||||
for vid in live_params.get('resolutions', []):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
vid['cdnUrl'], video_id, 'mp4',
|
||||
m3u8_id=vid.get('name'),
|
||||
fatal=False, live=True))
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = self._get_common_fields(webpage)
|
||||
info.update({
|
||||
'title': self._live_title(info['title']),
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
})
|
||||
return info
|
||||
|
||||
def _replay(self, video_id, webpage, long_video_id, key):
|
||||
if '' in (long_video_id, key):
|
||||
init_page = self._download_init_page(video_id)
|
||||
video_info = self._parse_json(self._search_regex(
|
||||
(r'(?s)oVideoStatus\s*=\s*({.+?})\s*</script',
|
||||
r'(?s)oVideoStatus\s*=\s*({.+})'), init_page, 'video info'),
|
||||
video_id)
|
||||
if video_info.get('status') == 'NEED_CHANNEL_PLUS':
|
||||
self.raise_login_required(
|
||||
'This video is only available for CH+ subscribers')
|
||||
long_video_id, key = video_info['vid'], video_info['inkey']
|
||||
|
||||
return merge_dicts(
|
||||
self._get_common_fields(webpage),
|
||||
self._extract_video_info(video_id, long_video_id, key))
|
||||
|
||||
def _download_init_page(self, video_id):
|
||||
return self._download_webpage(
|
||||
'https://www.vlive.tv/video/init/view',
|
||||
video_id, note='Downloading live webpage',
|
||||
data=urlencode_postdata({'videoSeq': video_id}),
|
||||
headers={
|
||||
'Referer': 'https://www.vlive.tv/video/%s' % video_id,
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
})
|
||||
video_type = video.get('type')
|
||||
if video_type == 'VOD':
|
||||
inkey = self._call_api('video/v1.0/vod/%s/inkey', video_id)['inkey']
|
||||
vod_id = video['vodId']
|
||||
return merge_dicts(
|
||||
get_common_fields(),
|
||||
self._extract_video_info(video_id, vod_id, inkey))
|
||||
elif video_type == 'LIVE':
|
||||
status = video.get('status')
|
||||
if status == 'ON_AIR':
|
||||
stream_url = self._call_api(
|
||||
'old/v3/live/%s/playInfo',
|
||||
video_id)['result']['adaptiveStreamUrl']
|
||||
formats = self._extract_m3u8_formats(stream_url, video_id, 'mp4')
|
||||
info = get_common_fields()
|
||||
info.update({
|
||||
'title': self._live_title(video['title']),
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
})
|
||||
return info
|
||||
elif status == 'ENDED':
|
||||
raise ExtractorError(
|
||||
'Uploading for replay. Please wait...', expected=True)
|
||||
elif status == 'RESERVED':
|
||||
raise ExtractorError('Coming soon!', expected=True)
|
||||
elif video.get('exposeStatus') == 'CANCEL':
|
||||
raise ExtractorError(
|
||||
'We are sorry, but the live broadcast has been canceled.',
|
||||
expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unknown status ' + status)
|
||||
|
||||
|
||||
class VLiveChannelIE(InfoExtractor):
|
||||
class VLivePostIE(VLiveIE):
|
||||
IE_NAME = 'vlive:post'
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/post/(?P<id>\d-\d+)'
|
||||
_TESTS = [{
|
||||
# uploadType = SOS
|
||||
'url': 'https://www.vlive.tv/post/1-20088044',
|
||||
'info_dict': {
|
||||
'id': '1-20088044',
|
||||
'title': 'Hola estrellitas la tierra les dice hola (si era así no?) Ha...',
|
||||
'description': 'md5:fab8a1e50e6e51608907f46c7fa4b407',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
# uploadType = V
|
||||
'url': 'https://www.vlive.tv/post/1-20087926',
|
||||
'info_dict': {
|
||||
'id': '1-20087926',
|
||||
'title': 'James Corden: And so, the baby becamos the Papa💜😭💪😭',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}]
|
||||
_FVIDEO_TMPL = 'fvideo/v1.0/fvideo-%%s/%s'
|
||||
_SOS_TMPL = _FVIDEO_TMPL % 'sosPlayInfo'
|
||||
_INKEY_TMPL = _FVIDEO_TMPL % 'inKey'
|
||||
|
||||
def _real_extract(self, url):
|
||||
post_id = self._match_id(url)
|
||||
|
||||
post = self._call_api(
|
||||
'post/v1.0/post-%s', post_id,
|
||||
'attachments{video},officialVideo{videoSeq},plainBody,title')
|
||||
|
||||
video_seq = str_or_none(try_get(
|
||||
post, lambda x: x['officialVideo']['videoSeq']))
|
||||
if video_seq:
|
||||
return self.url_result(
|
||||
'http://www.vlive.tv/video/' + video_seq,
|
||||
VLiveIE.ie_key(), video_seq)
|
||||
|
||||
title = post['title']
|
||||
entries = []
|
||||
for idx, video in enumerate(post['attachments']['video'].values()):
|
||||
video_id = video.get('videoId')
|
||||
if not video_id:
|
||||
continue
|
||||
upload_type = video.get('uploadType')
|
||||
upload_info = video.get('uploadInfo') or {}
|
||||
entry = None
|
||||
if upload_type == 'SOS':
|
||||
download = self._call_api(
|
||||
self._SOS_TMPL, video_id)['videoUrl']['download']
|
||||
formats = []
|
||||
for f_id, f_url in download.items():
|
||||
formats.append({
|
||||
'format_id': f_id,
|
||||
'url': f_url,
|
||||
'height': int_or_none(f_id[:-1]),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
entry = {
|
||||
'formats': formats,
|
||||
'id': video_id,
|
||||
'thumbnail': upload_info.get('imageUrl'),
|
||||
}
|
||||
elif upload_type == 'V':
|
||||
vod_id = upload_info.get('videoId')
|
||||
if not vod_id:
|
||||
continue
|
||||
inkey = self._call_api(self._INKEY_TMPL, video_id)['inKey']
|
||||
entry = self._extract_video_info(video_id, vod_id, inkey)
|
||||
if entry:
|
||||
entry['title'] = '%s_part%s' % (title, idx)
|
||||
entries.append(entry)
|
||||
return self.playlist_result(
|
||||
entries, post_id, title, strip_or_none(post.get('plainBody')))
|
||||
|
||||
|
||||
class VLiveChannelIE(VLiveBaseIE):
|
||||
IE_NAME = 'vlive:channel'
|
||||
_VALID_URL = r'https?://channels\.vlive\.tv/(?P<id>[0-9A-Z]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P<id>[0-9A-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://channels.vlive.tv/FCD4B',
|
||||
'info_dict': {
|
||||
'id': 'FCD4B',
|
||||
'title': 'MAMAMOO',
|
||||
},
|
||||
'playlist_mincount': 110
|
||||
}
|
||||
_APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
|
||||
}, {
|
||||
'url': 'https://www.vlive.tv/channel/FCD4B',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _call_api(self, path, channel_key_suffix, channel_value, note, query):
|
||||
q = {
|
||||
'app_id': self._APP_ID,
|
||||
'channel' + channel_key_suffix: channel_value,
|
||||
}
|
||||
q.update(query)
|
||||
return self._download_json(
|
||||
'http://api.vfan.vlive.tv/vproxy/channelplus/' + path,
|
||||
channel_value, note='Downloading ' + note, query=q)['result']
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_code = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://channels.vlive.tv/%s/video' % channel_code, channel_code)
|
||||
channel_seq = self._call_api(
|
||||
'decodeChannelCode', 'Code', channel_code,
|
||||
'decode channel code', {})['channelSeq']
|
||||
|
||||
app_id = None
|
||||
|
||||
app_js_url = self._search_regex(
|
||||
r'<script[^>]+src=(["\'])(?P<url>http.+?/app\.js.*?)\1',
|
||||
webpage, 'app js', default=None, group='url')
|
||||
|
||||
if app_js_url:
|
||||
app_js = self._download_webpage(
|
||||
app_js_url, channel_code, 'Downloading app JS', fatal=False)
|
||||
if app_js:
|
||||
app_id = self._search_regex(
|
||||
r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]',
|
||||
app_js, 'app id', default=None)
|
||||
|
||||
app_id = app_id or self._APP_ID
|
||||
|
||||
channel_info = self._download_json(
|
||||
'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode',
|
||||
channel_code, note='Downloading decode channel code',
|
||||
query={
|
||||
'app_id': app_id,
|
||||
'channelCode': channel_code,
|
||||
'_': int(time.time())
|
||||
})
|
||||
|
||||
channel_seq = channel_info['result']['channelSeq']
|
||||
channel_name = None
|
||||
entries = []
|
||||
|
||||
for page_num in itertools.count(1):
|
||||
video_list = self._download_json(
|
||||
'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList',
|
||||
channel_code, note='Downloading channel list page #%d' % page_num,
|
||||
query={
|
||||
'app_id': app_id,
|
||||
'channelSeq': channel_seq,
|
||||
video_list = self._call_api(
|
||||
'getChannelVideoList', 'Seq', channel_seq,
|
||||
'channel list page #%d' % page_num, {
|
||||
# Large values of maxNumOfRows (~300 or above) may cause
|
||||
# empty responses (see [1]), e.g. this happens for [2] that
|
||||
# has more than 300 videos.
|
||||
# 1. https://github.com/ytdl-org/youtube-dl/issues/13830
|
||||
# 2. http://channels.vlive.tv/EDBF.
|
||||
'maxNumOfRows': 100,
|
||||
'_': int(time.time()),
|
||||
'pageNo': page_num
|
||||
}
|
||||
)
|
||||
|
@ -269,99 +305,44 @@ def _real_extract(self, url):
|
|||
if not channel_name:
|
||||
channel_name = try_get(
|
||||
video_list,
|
||||
lambda x: x['result']['channelInfo']['channelName'],
|
||||
lambda x: x['channelInfo']['channelName'],
|
||||
compat_str)
|
||||
|
||||
videos = try_get(
|
||||
video_list, lambda x: x['result']['videoList'], list)
|
||||
video_list, lambda x: x['videoList'], list)
|
||||
if not videos:
|
||||
break
|
||||
|
||||
for video in videos:
|
||||
video_id = video.get('videoSeq')
|
||||
if not video_id:
|
||||
video_type = video.get('videoType')
|
||||
|
||||
if not video_id or not video_type:
|
||||
continue
|
||||
video_id = compat_str(video_id)
|
||||
entries.append(
|
||||
self.url_result(
|
||||
'http://www.vlive.tv/video/%s' % video_id,
|
||||
ie=VLiveIE.ie_key(), video_id=video_id))
|
||||
|
||||
if video_type in ('PLAYLIST'):
|
||||
playlist_videos = try_get(
|
||||
video,
|
||||
lambda x: x['videoPlaylist']['videoList'], list)
|
||||
if not playlist_videos:
|
||||
continue
|
||||
|
||||
for playlist_video in playlist_videos:
|
||||
playlist_video_id = playlist_video.get('videoSeq')
|
||||
if not playlist_video_id:
|
||||
continue
|
||||
playlist_video_id = compat_str(playlist_video_id)
|
||||
|
||||
entries.append(
|
||||
self.url_result(
|
||||
'http://www.vlive.tv/video/%s' % playlist_video_id,
|
||||
ie=VLiveIE.ie_key(), video_id=playlist_video_id))
|
||||
else:
|
||||
entries.append(
|
||||
self.url_result(
|
||||
'http://www.vlive.tv/video/%s' % video_id,
|
||||
ie=VLiveIE.ie_key(), video_id=video_id))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, channel_code, channel_name)
|
||||
|
||||
|
||||
class VLivePlaylistIE(InfoExtractor):
|
||||
IE_NAME = 'vlive:playlist'
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)'
|
||||
_VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
|
||||
_TESTS = [{
|
||||
# regular working playlist
|
||||
'url': 'https://www.vlive.tv/video/117956/playlist/117963',
|
||||
'info_dict': {
|
||||
'id': '117963',
|
||||
'title': '아이돌룸(IDOL ROOM) 41회 - (여자)아이들'
|
||||
},
|
||||
'playlist_mincount': 10
|
||||
}, {
|
||||
# playlist with no playlistVideoSeqs
|
||||
'url': 'http://www.vlive.tv/video/22867/playlist/22912',
|
||||
'info_dict': {
|
||||
'id': '22867',
|
||||
'ext': 'mp4',
|
||||
'title': '[V LIVE] Valentine Day Message from MINA',
|
||||
'creator': 'TWICE',
|
||||
'view_count': int
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}]
|
||||
|
||||
def _build_video_result(self, video_id, message):
|
||||
self.to_screen(message)
|
||||
return self.url_result(
|
||||
self._VIDEO_URL_TEMPLATE % video_id,
|
||||
ie=VLiveIE.ie_key(), video_id=video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id, playlist_id = mobj.group('video_id', 'id')
|
||||
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
return self._build_video_result(
|
||||
video_id,
|
||||
'Downloading just video %s because of --no-playlist'
|
||||
% video_id)
|
||||
|
||||
self.to_screen(
|
||||
'Downloading playlist %s - add --no-playlist to just download video'
|
||||
% playlist_id)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://www.vlive.tv/video/%s/playlist/%s'
|
||||
% (video_id, playlist_id), playlist_id)
|
||||
|
||||
raw_item_ids = self._search_regex(
|
||||
r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage,
|
||||
'playlist video seqs', default=None, fatal=False)
|
||||
|
||||
if not raw_item_ids:
|
||||
return self._build_video_result(
|
||||
video_id,
|
||||
'Downloading just video %s because no playlist was found'
|
||||
% video_id)
|
||||
|
||||
item_ids = self._parse_json(raw_item_ids, playlist_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
self._VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(),
|
||||
video_id=compat_str(item_id))
|
||||
for item_id in item_ids]
|
||||
|
||||
playlist_name = self._html_search_regex(
|
||||
r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)',
|
||||
webpage, 'playlist title', fatal=False)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, playlist_name)
|
||||
|
|
|
@ -54,17 +54,17 @@ def _extract_tracks(self, item_id, referer, typ=None):
|
|||
def _decrypt(origin):
|
||||
n = int(origin[0])
|
||||
origin = origin[1:]
|
||||
short_lenth = len(origin) // n
|
||||
long_num = len(origin) - short_lenth * n
|
||||
short_length = len(origin) // n
|
||||
long_num = len(origin) - short_length * n
|
||||
l = tuple()
|
||||
for i in range(0, n):
|
||||
length = short_lenth
|
||||
length = short_length
|
||||
if i < long_num:
|
||||
length += 1
|
||||
l += (origin[0:length], )
|
||||
origin = origin[length:]
|
||||
ans = ''
|
||||
for i in range(0, short_lenth + 1):
|
||||
for i in range(0, short_length + 1):
|
||||
for j in range(0, n):
|
||||
if len(l[j]) > i:
|
||||
ans += l[j][i]
|
||||
|
|
|
@ -38,22 +38,6 @@ class XTubeIE(InfoExtractor):
|
|||
'comment_count': int,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
# FLV videos with duplicated formats
|
||||
'url': 'http://www.xtube.com/video-watch/A-Super-Run-Part-1-YT-9299752',
|
||||
'md5': 'a406963eb349dd43692ec54631efd88b',
|
||||
'info_dict': {
|
||||
'id': '9299752',
|
||||
'display_id': 'A-Super-Run-Part-1-YT',
|
||||
'ext': 'flv',
|
||||
'title': 'A Super Run - Part 1 (YT)',
|
||||
'description': 'md5:4cc3af1aa1b0413289babc88f0d4f616',
|
||||
'uploader': 'tshirtguy59',
|
||||
'duration': 579,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 18,
|
||||
},
|
||||
}, {
|
||||
# new URL schema
|
||||
'url': 'http://www.xtube.com/video-watch/strange-erotica-625837',
|
||||
|
@ -90,7 +74,7 @@ def _real_extract(self, url):
|
|||
title, thumbnail, duration = [None] * 3
|
||||
|
||||
config = self._parse_json(self._search_regex(
|
||||
r'playerConf\s*=\s*({.+?})\s*,\s*\n', webpage, 'config',
|
||||
r'playerConf\s*=\s*({.+?})\s*,\s*(?:\n|loaderConf)', webpage, 'config',
|
||||
default='{}'), video_id, transform_source=js_to_json, fatal=False)
|
||||
if config:
|
||||
config = config.get('mainRoll')
|
||||
|
|
|
@ -29,7 +29,6 @@ class YouPornIE(InfoExtractor):
|
|||
'upload_date': '20101217',
|
||||
'average_rating': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
'age_limit': 18,
|
||||
|
@ -48,7 +47,6 @@ class YouPornIE(InfoExtractor):
|
|||
'upload_date': '20110418',
|
||||
'average_rating': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
'age_limit': 18,
|
||||
|
@ -156,7 +154,8 @@ def _real_extract(self, url):
|
|||
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
[r'Date\s+[Aa]dded:\s*<span>([^<]+)',
|
||||
[r'UPLOADED:\s*<span>([^<]+)',
|
||||
r'Date\s+[Aa]dded:\s*<span>([^<]+)',
|
||||
r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'],
|
||||
webpage, 'upload date', fatal=False))
|
||||
|
||||
|
@ -171,7 +170,7 @@ def _real_extract(self, url):
|
|||
webpage, 'view count', fatal=False, group='count'))
|
||||
comment_count = str_to_int(self._search_regex(
|
||||
r'>All [Cc]omments? \(([\d,.]+)\)',
|
||||
webpage, 'comment count', fatal=False))
|
||||
webpage, 'comment count', default=None))
|
||||
|
||||
def extract_tag_box(regex, title):
|
||||
tag_box = self._search_regex(regex, webpage, title, default=None)
|
||||
|
|
File diff suppressed because it is too large
Load diff
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue