mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-10-31 23:02:40 +00:00
Merge branch 'yt-dlp:master' into teachable-fix-add-hotmart
This commit is contained in:
commit
931a90e7da
202 changed files with 8502 additions and 2862 deletions
8
.github/ISSUE_TEMPLATE/1_broken_site.yml
vendored
8
.github/ISSUE_TEMPLATE/1_broken_site.yml
vendored
|
@ -18,7 +18,7 @@ body:
|
|||
options:
|
||||
- label: I'm reporting that yt-dlp is broken on a **supported** site
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2023.10.13** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
|
@ -64,7 +64,7 @@ body:
|
|||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2023.10.13 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
|
@ -72,8 +72,8 @@ body:
|
|||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2023.07.06, Current version: 2023.07.06
|
||||
yt-dlp is up to date (2023.07.06)
|
||||
Latest version: 2023.10.13, Current version: 2023.10.13
|
||||
yt-dlp is up to date (2023.10.13)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
|
|
@ -18,7 +18,7 @@ body:
|
|||
options:
|
||||
- label: I'm reporting a new site support request
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2023.10.13** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
|
@ -76,7 +76,7 @@ body:
|
|||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2023.10.13 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
|
@ -84,8 +84,8 @@ body:
|
|||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2023.07.06, Current version: 2023.07.06
|
||||
yt-dlp is up to date (2023.07.06)
|
||||
Latest version: 2023.10.13, Current version: 2023.10.13
|
||||
yt-dlp is up to date (2023.10.13)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
|
|
@ -18,7 +18,7 @@ body:
|
|||
options:
|
||||
- label: I'm requesting a site-specific feature
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2023.10.13** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
|
@ -72,7 +72,7 @@ body:
|
|||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2023.10.13 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
|
@ -80,8 +80,8 @@ body:
|
|||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2023.07.06, Current version: 2023.07.06
|
||||
yt-dlp is up to date (2023.07.06)
|
||||
Latest version: 2023.10.13, Current version: 2023.10.13
|
||||
yt-dlp is up to date (2023.10.13)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
|
8
.github/ISSUE_TEMPLATE/4_bug_report.yml
vendored
8
.github/ISSUE_TEMPLATE/4_bug_report.yml
vendored
|
@ -18,7 +18,7 @@ body:
|
|||
options:
|
||||
- label: I'm reporting a bug unrelated to a specific site
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2023.10.13** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
|
@ -57,7 +57,7 @@ body:
|
|||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2023.10.13 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
|
@ -65,8 +65,8 @@ body:
|
|||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2023.07.06, Current version: 2023.07.06
|
||||
yt-dlp is up to date (2023.07.06)
|
||||
Latest version: 2023.10.13, Current version: 2023.10.13
|
||||
yt-dlp is up to date (2023.10.13)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
|
8
.github/ISSUE_TEMPLATE/5_feature_request.yml
vendored
8
.github/ISSUE_TEMPLATE/5_feature_request.yml
vendored
|
@ -20,7 +20,7 @@ body:
|
|||
required: true
|
||||
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2023.10.13** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
|
||||
required: true
|
||||
|
@ -53,7 +53,7 @@ body:
|
|||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2023.10.13 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
|
@ -61,7 +61,7 @@ body:
|
|||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2023.07.06, Current version: 2023.07.06
|
||||
yt-dlp is up to date (2023.07.06)
|
||||
Latest version: 2023.10.13, Current version: 2023.10.13
|
||||
yt-dlp is up to date (2023.10.13)
|
||||
<more lines>
|
||||
render: shell
|
||||
|
|
8
.github/ISSUE_TEMPLATE/6_question.yml
vendored
8
.github/ISSUE_TEMPLATE/6_question.yml
vendored
|
@ -26,7 +26,7 @@ body:
|
|||
required: true
|
||||
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2023.10.13** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates
|
||||
required: true
|
||||
|
@ -59,7 +59,7 @@ body:
|
|||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2023.10.13 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
|
@ -67,7 +67,7 @@ body:
|
|||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2023.07.06, Current version: 2023.07.06
|
||||
yt-dlp is up to date (2023.07.06)
|
||||
Latest version: 2023.10.13, Current version: 2023.10.13
|
||||
yt-dlp is up to date (2023.10.13)
|
||||
<more lines>
|
||||
render: shell
|
||||
|
|
9
.github/workflows/core.yml
vendored
9
.github/workflows/core.yml
vendored
|
@ -13,13 +13,16 @@ jobs:
|
|||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
# CPython 3.11 is in quick-test
|
||||
python-version: ['3.8', '3.9', '3.10', pypy-3.7, pypy-3.8]
|
||||
python-version: ['3.8', '3.9', '3.10', '3.12', pypy-3.7, pypy-3.8, pypy-3.10]
|
||||
run-tests-ext: [sh]
|
||||
include:
|
||||
# atleast one of each CPython/PyPy tests must be in windows
|
||||
- os: windows-latest
|
||||
python-version: '3.7'
|
||||
run-tests-ext: bat
|
||||
- os: windows-latest
|
||||
python-version: '3.12'
|
||||
run-tests-ext: bat
|
||||
- os: windows-latest
|
||||
python-version: pypy-3.9
|
||||
run-tests-ext: bat
|
||||
|
@ -29,8 +32,8 @@ jobs:
|
|||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install pytest
|
||||
run: pip install pytest
|
||||
- name: Install dependencies
|
||||
run: pip install pytest -r requirements.txt
|
||||
- name: Run tests
|
||||
continue-on-error: False
|
||||
run: |
|
||||
|
|
2
.github/workflows/download.yml
vendored
2
.github/workflows/download.yml
vendored
|
@ -28,7 +28,7 @@ jobs:
|
|||
fail-fast: true
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
python-version: ['3.7', '3.10', 3.11-dev, pypy-3.7, pypy-3.8]
|
||||
python-version: ['3.7', '3.10', '3.12', pypy-3.7, pypy-3.8, pypy-3.10]
|
||||
run-tests-ext: [sh]
|
||||
include:
|
||||
# atleast one of each CPython/PyPy tests must be in windows
|
||||
|
|
|
@ -217,7 +217,7 @@ ## Adding support for a new site
|
|||
1. Add an import in [`yt_dlp/extractor/_extractors.py`](yt_dlp/extractor/_extractors.py). Note that the class name must end with `IE`.
|
||||
1. Run `python test/test_download.py TestDownload.test_YourExtractor` (note that `YourExtractor` doesn't end with `IE`). This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, the tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. You can also run all the tests in one go with `TestDownload.test_YourExtractor_all`
|
||||
1. Make sure you have atleast one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the particular test is disabled from running.
|
||||
1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L91-L426). Add tests and code for as many as you want.
|
||||
1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L119-L440). Add tests and code for as many as you want.
|
||||
1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||
|
||||
$ flake8 yt_dlp/extractor/yourextractor.py
|
||||
|
@ -251,7 +251,7 @@ ## yt-dlp coding conventions
|
|||
|
||||
### Mandatory and optional metafields
|
||||
|
||||
For extraction to work yt-dlp relies on metadata your extractor extracts and provides to yt-dlp expressed by an [information dictionary](yt_dlp/extractor/common.py#L91-L426) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by yt-dlp:
|
||||
For extraction to work yt-dlp relies on metadata your extractor extracts and provides to yt-dlp expressed by an [information dictionary](yt_dlp/extractor/common.py#L119-L440) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by yt-dlp:
|
||||
|
||||
- `id` (media identifier)
|
||||
- `title` (media title)
|
||||
|
@ -696,7 +696,7 @@ #### Examples
|
|||
|
||||
### Use convenience conversion and parsing functions
|
||||
|
||||
Wrap all extracted numeric data into safe functions from [`yt_dlp/utils.py`](yt_dlp/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
Wrap all extracted numeric data into safe functions from [`yt_dlp/utils/`](yt_dlp/utils/): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
|
||||
Use `url_or_none` for safe URL processing.
|
||||
|
||||
|
@ -704,7 +704,7 @@ ### Use convenience conversion and parsing functions
|
|||
|
||||
Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
|
||||
|
||||
Explore [`yt_dlp/utils.py`](yt_dlp/utils.py) for more useful convenience functions.
|
||||
Explore [`yt_dlp/utils/`](yt_dlp/utils/) for more useful convenience functions.
|
||||
|
||||
#### Examples
|
||||
|
||||
|
|
48
CONTRIBUTORS
48
CONTRIBUTORS
|
@ -2,7 +2,6 @@ pukkandan (owner)
|
|||
shirt-dev (collaborator)
|
||||
coletdjnz/colethedj (collaborator)
|
||||
Ashish0804 (collaborator)
|
||||
nao20010128nao/Lesmiscore (collaborator)
|
||||
bashonly (collaborator)
|
||||
Grub4K (collaborator)
|
||||
h-h-h-h
|
||||
|
@ -467,3 +466,50 @@ nnoboa
|
|||
rdamas
|
||||
RfadnjdExt
|
||||
urectanc
|
||||
nao20010128nao/Lesmiscore
|
||||
04-pasha-04
|
||||
aaruni96
|
||||
aky-01
|
||||
AmirAflak
|
||||
ApoorvShah111
|
||||
at-wat
|
||||
davinkevin
|
||||
demon071
|
||||
denhotte
|
||||
FinnRG
|
||||
fireattack
|
||||
Frankgoji
|
||||
GD-Slime
|
||||
hatsomatt
|
||||
ifan-t
|
||||
kshitiz305
|
||||
kylegustavo
|
||||
mabdelfattah
|
||||
nathantouze
|
||||
niemands
|
||||
Rajeshwaran2001
|
||||
RedDeffender
|
||||
Rohxn16
|
||||
sb0stn
|
||||
SevenLives
|
||||
simon300000
|
||||
snixon
|
||||
soundchaser128
|
||||
szabyg
|
||||
trainman261
|
||||
trislee
|
||||
wader
|
||||
Yalab7
|
||||
zhallgato
|
||||
zhong-yiyu
|
||||
Zprokkel
|
||||
AS6939
|
||||
drzraf
|
||||
handlerug
|
||||
jiru
|
||||
madewokherd
|
||||
xofe
|
||||
awalgarg
|
||||
midnightveil
|
||||
naginatana
|
||||
Riteo
|
||||
|
|
253
Changelog.md
253
Changelog.md
|
@ -4,13 +4,262 @@ # Changelog
|
|||
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
|
||||
-->
|
||||
|
||||
### 2023.10.13
|
||||
|
||||
#### Core changes
|
||||
- [Ensure thumbnail output directory exists](https://github.com/yt-dlp/yt-dlp/commit/2acd1d555ef89851c73773776715d3de9a0e30b9) ([#7985](https://github.com/yt-dlp/yt-dlp/issues/7985)) by [Riteo](https://github.com/Riteo)
|
||||
- **utils**
|
||||
- `js_to_json`: [Fix `Date` constructor parsing](https://github.com/yt-dlp/yt-dlp/commit/9d7ded6419089c1bf252496073f73ad90ed71004) ([#8295](https://github.com/yt-dlp/yt-dlp/issues/8295)) by [awalgarg](https://github.com/awalgarg), [Grub4K](https://github.com/Grub4K)
|
||||
- `write_xattr`: [Use `os.setxattr` if available](https://github.com/yt-dlp/yt-dlp/commit/84e26038d4002e763ea51ca1bdce4f7e63c540bf) ([#8205](https://github.com/yt-dlp/yt-dlp/issues/8205)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
|
||||
|
||||
#### Extractor changes
|
||||
- **artetv**: [Support age-restricted content](https://github.com/yt-dlp/yt-dlp/commit/09f815ad52843219a7ee3f2a0dddf6c250c91f0c) ([#8301](https://github.com/yt-dlp/yt-dlp/issues/8301)) by [StefanLobbenmeier](https://github.com/StefanLobbenmeier)
|
||||
- **jtbc**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b286ec68f1f28798b3e371f888a2ed97d399cf77) ([#8314](https://github.com/yt-dlp/yt-dlp/issues/8314)) by [seproDev](https://github.com/seproDev)
|
||||
- **mbn**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e030b6b6fba7b2f4614ad2ab9f7649d40a2dd305) ([#8312](https://github.com/yt-dlp/yt-dlp/issues/8312)) by [seproDev](https://github.com/seproDev)
|
||||
- **nhk**: [Fix Japanese-language VOD extraction](https://github.com/yt-dlp/yt-dlp/commit/4de94b9e165bfd6421a692f5f2eabcdb08edcb71) ([#8309](https://github.com/yt-dlp/yt-dlp/issues/8309)) by [garret1317](https://github.com/garret1317)
|
||||
- **radiko**: [Fix bug with `downloader_options`](https://github.com/yt-dlp/yt-dlp/commit/b9316642313bbc9e209ac0d2276d37ba60bceb49) by [bashonly](https://github.com/bashonly)
|
||||
- **tenplay**: [Add support for seasons](https://github.com/yt-dlp/yt-dlp/commit/88a99c87b680ae59002534a517e191f46c42cbd4) ([#7939](https://github.com/yt-dlp/yt-dlp/issues/7939)) by [midnightveil](https://github.com/midnightveil)
|
||||
- **youku**: [Improve tudou.com support](https://github.com/yt-dlp/yt-dlp/commit/b7098d46b552a9322c6cea39ba80be5229f922de) ([#8160](https://github.com/yt-dlp/yt-dlp/issues/8160)) by [naginatana](https://github.com/naginatana)
|
||||
- **youtube**: [Fix bug with `--extractor-retries inf`](https://github.com/yt-dlp/yt-dlp/commit/feebf6d02fc9651331eee2af5e08e6112288163b) ([#8328](https://github.com/yt-dlp/yt-dlp/issues/8328)) by [Grub4K](https://github.com/Grub4K)
|
||||
|
||||
#### Downloader changes
|
||||
- **fragment**: [Improve progress calculation](https://github.com/yt-dlp/yt-dlp/commit/1c51c520f7b511ebd9e4eb7322285a8c31eedbbd) ([#8241](https://github.com/yt-dlp/yt-dlp/issues/8241)) by [Grub4K](https://github.com/Grub4K)
|
||||
|
||||
#### Misc. changes
|
||||
- **cleanup**: Miscellaneous: [b634ba7](https://github.com/yt-dlp/yt-dlp/commit/b634ba742d8f38ce9ecfa0546485728b0c6c59d1) by [bashonly](https://github.com/bashonly), [gamer191](https://github.com/gamer191)
|
||||
|
||||
### 2023.10.07
|
||||
|
||||
#### Extractor changes
|
||||
- **abc.net.au**: iview: [Improve `episode` extraction](https://github.com/yt-dlp/yt-dlp/commit/a9efb4b8d74f3583450ffda0ee57259a47d39c70) ([#8201](https://github.com/yt-dlp/yt-dlp/issues/8201)) by [xofe](https://github.com/xofe)
|
||||
- **erocast**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/47c598783c98c179e04dd12c2a3fee0f3dc53087) ([#8264](https://github.com/yt-dlp/yt-dlp/issues/8264)) by [madewokherd](https://github.com/madewokherd)
|
||||
- **gofile**: [Fix token cookie bug](https://github.com/yt-dlp/yt-dlp/commit/0730d5a966fa8a937d84bfb7f68be5198acb039b) by [bashonly](https://github.com/bashonly)
|
||||
- **iq.com**: [Fix extraction and subtitles](https://github.com/yt-dlp/yt-dlp/commit/35d9cbaf9638ccc9daf8a863063b2e7c135bc664) ([#8260](https://github.com/yt-dlp/yt-dlp/issues/8260)) by [AS6939](https://github.com/AS6939)
|
||||
- **lbry**
|
||||
- [Add playlist support](https://github.com/yt-dlp/yt-dlp/commit/48cceec1ddb8649b5e771df8df79eb9c39c82b90) ([#8213](https://github.com/yt-dlp/yt-dlp/issues/8213)) by [bashonly](https://github.com/bashonly), [drzraf](https://github.com/drzraf), [Grub4K](https://github.com/Grub4K)
|
||||
- [Extract `uploader_id`](https://github.com/yt-dlp/yt-dlp/commit/0e722f2f3ca42e634fd7b06ee70b16bf833ce132) ([#8244](https://github.com/yt-dlp/yt-dlp/issues/8244)) by [drzraf](https://github.com/drzraf)
|
||||
- **litv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/91a670a4f7babe9c8aa2018f57d8c8952a6f49d8) ([#7785](https://github.com/yt-dlp/yt-dlp/issues/7785)) by [jiru](https://github.com/jiru)
|
||||
- **neteasemusic**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/f980df734cf5c0eaded2f7b38c6c60bccfeebb48) ([#8181](https://github.com/yt-dlp/yt-dlp/issues/8181)) by [c-basalt](https://github.com/c-basalt)
|
||||
- **nhk**: [Fix VOD extraction](https://github.com/yt-dlp/yt-dlp/commit/e831c80e8b2fc025b3b67d82974cc59e3526fdc8) ([#8249](https://github.com/yt-dlp/yt-dlp/issues/8249)) by [garret1317](https://github.com/garret1317)
|
||||
- **radiko**: [Improve extraction](https://github.com/yt-dlp/yt-dlp/commit/2ad3873f0dfa9285c91d2160e36c039e69d597c7) ([#8221](https://github.com/yt-dlp/yt-dlp/issues/8221)) by [garret1317](https://github.com/garret1317)
|
||||
- **substack**
|
||||
- [Fix download cookies bug](https://github.com/yt-dlp/yt-dlp/commit/2f2dda3a7e85148773da3cdbc03ac9949ec1bc45) ([#8219](https://github.com/yt-dlp/yt-dlp/issues/8219)) by [handlerug](https://github.com/handlerug)
|
||||
- [Fix embed extraction](https://github.com/yt-dlp/yt-dlp/commit/fbcc299bd8a19cf8b3c8805d6c268a9110230973) ([#8218](https://github.com/yt-dlp/yt-dlp/issues/8218)) by [handlerug](https://github.com/handlerug)
|
||||
- **theta**: [Remove extractors](https://github.com/yt-dlp/yt-dlp/commit/792f1e64f6a2beac51e85408d142b3118115c4fd) ([#8251](https://github.com/yt-dlp/yt-dlp/issues/8251)) by [alerikaisattera](https://github.com/alerikaisattera)
|
||||
- **wrestleuniversevod**: [Call API with device ID](https://github.com/yt-dlp/yt-dlp/commit/b095fd3fa9d58a65dc9b830bd63b9d909422aa86) ([#8272](https://github.com/yt-dlp/yt-dlp/issues/8272)) by [bashonly](https://github.com/bashonly)
|
||||
- **xhamster**: user: [Support creator urls](https://github.com/yt-dlp/yt-dlp/commit/cc8d8441524ec3442d7c0d3f8f33f15b66aa06f3) ([#8232](https://github.com/yt-dlp/yt-dlp/issues/8232)) by [Grub4K](https://github.com/Grub4K)
|
||||
- **youtube**
|
||||
- [Fix `heatmap` extraction](https://github.com/yt-dlp/yt-dlp/commit/03e85ea99db76a2fddb65bf46f8819bda780aaf3) ([#8299](https://github.com/yt-dlp/yt-dlp/issues/8299)) by [bashonly](https://github.com/bashonly)
|
||||
- [Raise a warning for `Incomplete Data` instead of an error](https://github.com/yt-dlp/yt-dlp/commit/eb5bdbfa70126c7d5355cc0954b63720522e462c) ([#8238](https://github.com/yt-dlp/yt-dlp/issues/8238)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
|
||||
#### Misc. changes
|
||||
- **cleanup**
|
||||
- [Update extractor tests](https://github.com/yt-dlp/yt-dlp/commit/19c90e405b4137c06dfe6f9aaa02396df0da93e5) ([#7718](https://github.com/yt-dlp/yt-dlp/issues/7718)) by [trainman261](https://github.com/trainman261)
|
||||
- Miscellaneous: [377e85a](https://github.com/yt-dlp/yt-dlp/commit/377e85a1797db9e98b78b38203ed9d4ded229991) by [dirkf](https://github.com/dirkf), [gamer191](https://github.com/gamer191), [Grub4K](https://github.com/Grub4K)
|
||||
|
||||
### 2023.09.24
|
||||
|
||||
#### Important changes
|
||||
- **The minimum *recommended* Python version has been raised to 3.8**
|
||||
Since Python 3.7 has reached end-of-life, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/7803)
|
||||
- Security: [[CVE-2023-40581](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-40581)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg)
|
||||
- The shell escape function is now using `""` instead of `\"`.
|
||||
- `utils.Popen` has been patched to properly quote commands.
|
||||
|
||||
#### Core changes
|
||||
- [Fix HTTP headers and cookie handling](https://github.com/yt-dlp/yt-dlp/commit/6c5211cebeacfc53ad5d5ddf4a659be76039656f) by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
|
||||
- [Fix `--check-formats`](https://github.com/yt-dlp/yt-dlp/commit/8cb7fc44db010e965d808ee679ef0725cb6e147c) by [pukkandan](https://github.com/pukkandan)
|
||||
- [Fix support for upcoming Python 3.12](https://github.com/yt-dlp/yt-dlp/commit/836e06d246512f286f30c1371b2c54b72c9ecd93) ([#8130](https://github.com/yt-dlp/yt-dlp/issues/8130)) by [Grub4K](https://github.com/Grub4K)
|
||||
- [Merged with youtube-dl 66ab08](https://github.com/yt-dlp/yt-dlp/commit/9d6254069c75877bc88bc3584f4326fb1853a543) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Prevent RCE when using `--exec` with `%q` (CVE-2023-40581)](https://github.com/yt-dlp/yt-dlp/commit/de015e930747165dbb8fcd360f8775fd973b7d6e) by [Grub4K](https://github.com/Grub4K)
|
||||
- [Raise minimum recommended Python version to 3.8](https://github.com/yt-dlp/yt-dlp/commit/61bdf15fc7400601c3da1aa7a43917310a5bf391) ([#8183](https://github.com/yt-dlp/yt-dlp/issues/8183)) by [Grub4K](https://github.com/Grub4K)
|
||||
- [`FFmpegFixupM3u8PP` may need to run with ffmpeg](https://github.com/yt-dlp/yt-dlp/commit/f73c11803579889dc8e1c99e25dba9a22fef39d8) by [pukkandan](https://github.com/pukkandan)
|
||||
- **compat**
|
||||
- [Add `types.NoneType`](https://github.com/yt-dlp/yt-dlp/commit/e0c4db04dc82a699bdabd9821ddc239ebe17d30a) by [pukkandan](https://github.com/pukkandan) (With fixes in [25b6e8f](https://github.com/yt-dlp/yt-dlp/commit/25b6e8f94679b4458550702b46e61249b875a4fd))
|
||||
- [Deprecate old functions](https://github.com/yt-dlp/yt-dlp/commit/3d2623a898196640f7cc0fc8b70118ff19e6925d) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
|
||||
- [Ensure submodules are imported correctly](https://github.com/yt-dlp/yt-dlp/commit/a250b247334ce9f641e709cbb64974da6034a2b3) by [pukkandan](https://github.com/pukkandan)
|
||||
- **cookies**: [Containers JSON should be opened as utf-8](https://github.com/yt-dlp/yt-dlp/commit/dab87ca23650fd87184ff5286b53e6985b59f71d) ([#7800](https://github.com/yt-dlp/yt-dlp/issues/7800)) by [bashonly](https://github.com/bashonly)
|
||||
- **dependencies**: [Handle deprecation of `sqlite3.version`](https://github.com/yt-dlp/yt-dlp/commit/35f9a306e6934793cff100200cd03f288ec33f11) ([#8167](https://github.com/yt-dlp/yt-dlp/issues/8167)) by [bashonly](https://github.com/bashonly)
|
||||
- **outtmpl**: [Fix replacement for `playlist_index`](https://github.com/yt-dlp/yt-dlp/commit/a264433c9fba147ecae2420091614186cfeeb895) by [pukkandan](https://github.com/pukkandan)
|
||||
- **utils**
|
||||
- [Add temporary shim for logging](https://github.com/yt-dlp/yt-dlp/commit/1b392f905d20ef1f1b300b180f867d43c9ce49b8) by [pukkandan](https://github.com/pukkandan)
|
||||
- [Improve `parse_duration`](https://github.com/yt-dlp/yt-dlp/commit/af86873218c24c3859ccf575a87f2b00a73b49d0) by [bashonly](https://github.com/bashonly)
|
||||
- HTTPHeaderDict: [Handle byte values](https://github.com/yt-dlp/yt-dlp/commit/3f7965105d8d2048359e67c1e8b8ebd51588143b) by [pukkandan](https://github.com/pukkandan)
|
||||
- `clean_podcast_url`: [Handle more trackers](https://github.com/yt-dlp/yt-dlp/commit/2af4eeb77246b8183aae75a0a8d19f18c08115b2) ([#7556](https://github.com/yt-dlp/yt-dlp/issues/7556)) by [bashonly](https://github.com/bashonly), [mabdelfattah](https://github.com/mabdelfattah)
|
||||
- `js_to_json`: [Handle `Array` objects](https://github.com/yt-dlp/yt-dlp/commit/52414d64ca7b92d3f83964cdd68247989b0c4625) by [Grub4K](https://github.com/Grub4K), [std-move](https://github.com/std-move)
|
||||
|
||||
#### Extractor changes
|
||||
- [Extract subtitles from SMIL manifests](https://github.com/yt-dlp/yt-dlp/commit/550e65410a7a1b105923494ac44460a4dc1a15d9) ([#7667](https://github.com/yt-dlp/yt-dlp/issues/7667)) by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
|
||||
- [Fix `--load-pages`](https://github.com/yt-dlp/yt-dlp/commit/81b4712bca608b9015aa68a4d96661d56e9cb894) by [pukkandan](https://github.com/pukkandan)
|
||||
- [Make `_search_nuxt_data` more lenient](https://github.com/yt-dlp/yt-dlp/commit/904a19ee93195ce0bd4b08bd22b186120afb5b17) by [std-move](https://github.com/std-move)
|
||||
- **abematv**
|
||||
- [Fix proxy handling](https://github.com/yt-dlp/yt-dlp/commit/497bbbbd7328cb705f70eced94dbd90993819a46) ([#8046](https://github.com/yt-dlp/yt-dlp/issues/8046)) by [SevenLives](https://github.com/SevenLives)
|
||||
- [Temporary fix for protocol handler](https://github.com/yt-dlp/yt-dlp/commit/9f66247289b9f8ecf931833b3f5f127274dd2161) by [pukkandan](https://github.com/pukkandan)
|
||||
- **amazonminitv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/538d37671a17e0782d17f08df17800e2e3bd57c8) by [bashonly](https://github.com/bashonly), [GautamMKGarg](https://github.com/GautamMKGarg)
|
||||
- **antenna**: [Support antenna.gr](https://github.com/yt-dlp/yt-dlp/commit/665876034c8d3c031443f6b4958bed02ccdf4164) ([#7584](https://github.com/yt-dlp/yt-dlp/issues/7584)) by [stdedos](https://github.com/stdedos)
|
||||
- **artetv**: [Fix HLS formats extraction](https://github.com/yt-dlp/yt-dlp/commit/c2da0b5ea215298135f76e3dc14b972a3c4afacb) by [bashonly](https://github.com/bashonly)
|
||||
- **axs**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/aee6b9b88c0bcccf27fd23b7e00fc0b7b168928f) ([#8094](https://github.com/yt-dlp/yt-dlp/issues/8094)) by [barsnick](https://github.com/barsnick)
|
||||
- **banbye**: [Support video ids containing a hyphen](https://github.com/yt-dlp/yt-dlp/commit/578a82e497502b951036ce9da6fe0dac6937ac27) ([#8059](https://github.com/yt-dlp/yt-dlp/issues/8059)) by [kshitiz305](https://github.com/kshitiz305)
|
||||
- **bbc**: [Extract tracklist as chapters](https://github.com/yt-dlp/yt-dlp/commit/eda0e415d26eb084e570cf5372d38ee1f616b70f) ([#7788](https://github.com/yt-dlp/yt-dlp/issues/7788)) by [garret1317](https://github.com/garret1317)
|
||||
- **bild.de**: [Extract HLS formats](https://github.com/yt-dlp/yt-dlp/commit/b4c1c408c63724339eb12b16c91b253a7ee62cfa) ([#8032](https://github.com/yt-dlp/yt-dlp/issues/8032)) by [barsnick](https://github.com/barsnick)
|
||||
- **bilibili**
|
||||
- [Add support for series, favorites and watch later](https://github.com/yt-dlp/yt-dlp/commit/9e68747f9607f05e92bb7d9b6e79d678b50070e1) ([#7518](https://github.com/yt-dlp/yt-dlp/issues/7518)) by [c-basalt](https://github.com/c-basalt)
|
||||
- [Extract Dolby audio formats](https://github.com/yt-dlp/yt-dlp/commit/b84fda7388dd20d38921e23b469147f3957c1812) ([#8142](https://github.com/yt-dlp/yt-dlp/issues/8142)) by [ClosedPort22](https://github.com/ClosedPort22)
|
||||
- [Extract `format_id`](https://github.com/yt-dlp/yt-dlp/commit/5336bf57a7061e0955a37f0542fc8ebf50d55b17) ([#7555](https://github.com/yt-dlp/yt-dlp/issues/7555)) by [c-basalt](https://github.com/c-basalt)
|
||||
- **bilibilibangumi**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/bdd0b75e3f41ff35440eda6d395008beef19ef2f) ([#7337](https://github.com/yt-dlp/yt-dlp/issues/7337)) by [GD-Slime](https://github.com/GD-Slime)
|
||||
- **bpb**: [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/f659e6439444ac64305b5c80688cd82f59d2279c) ([#8119](https://github.com/yt-dlp/yt-dlp/issues/8119)) by [Grub4K](https://github.com/Grub4K)
|
||||
- **brilliantpala**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/92feb5654c5a4c81ba872904a618700fcbb3e546) ([#6680](https://github.com/yt-dlp/yt-dlp/issues/6680)) by [pzhlkj6612](https://github.com/pzhlkj6612)
|
||||
- **canal1, caracoltvplay**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b3febedbeb662dfdf9b5c1d5799039ad4fc969de) ([#7151](https://github.com/yt-dlp/yt-dlp/issues/7151)) by [elyse0](https://github.com/elyse0)
|
||||
- **cbc**: [Ignore any 426 from API](https://github.com/yt-dlp/yt-dlp/commit/9bf14be775289bd88cc1f5c89fd761ae51879484) ([#7689](https://github.com/yt-dlp/yt-dlp/issues/7689)) by [makew0rld](https://github.com/makew0rld)
|
||||
- **cbcplayer**: [Extract HLS formats and subtitles](https://github.com/yt-dlp/yt-dlp/commit/339c339fec095ff4141b20e6aa83629117fb26df) ([#7484](https://github.com/yt-dlp/yt-dlp/issues/7484)) by [trainman261](https://github.com/trainman261)
|
||||
- **cbcplayerplaylist**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/ed711897814f3ee0b1822e4205e74133467e8f1c) ([#7870](https://github.com/yt-dlp/yt-dlp/issues/7870)) by [trainman261](https://github.com/trainman261)
|
||||
- **cineverse**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/15591940ff102d1ae337d603a46d8f238c83a61f) ([#8146](https://github.com/yt-dlp/yt-dlp/issues/8146)) by [garret1317](https://github.com/garret1317)
|
||||
- **crunchyroll**: [Remove initial state extraction](https://github.com/yt-dlp/yt-dlp/commit/9b16762f48914de9ac914601769c76668e433325) ([#7632](https://github.com/yt-dlp/yt-dlp/issues/7632)) by [Grub4K](https://github.com/Grub4K)
|
||||
- **douyutv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/21f40e75dfc0055ea9cdbd7fe2c46c6f9b561afd) ([#7652](https://github.com/yt-dlp/yt-dlp/issues/7652)) by [c-basalt](https://github.com/c-basalt)
|
||||
- **dropbox**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b9f2bc2dbed2323734a0d18e65e1e2e23dc833d8) ([#7926](https://github.com/yt-dlp/yt-dlp/issues/7926)) by [bashonly](https://github.com/bashonly), [denhotte](https://github.com/denhotte), [nathantouze](https://github.com/nathantouze) (With fixes in [099fb1b](https://github.com/yt-dlp/yt-dlp/commit/099fb1b35cf835303306549f5113d1802d79c9c7) by [bashonly](https://github.com/bashonly))
|
||||
- **eplus**: inbound: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/295fbb3ae3a7d0dd50e286be5c487cf145ed5778) ([#5782](https://github.com/yt-dlp/yt-dlp/issues/5782)) by [pzhlkj6612](https://github.com/pzhlkj6612)
|
||||
- **expressen**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/a5e264d74b4bd60c6e7ec4e38f1a23af4e420531) ([#8153](https://github.com/yt-dlp/yt-dlp/issues/8153)) by [kylegustavo](https://github.com/kylegustavo)
|
||||
- **facebook**
|
||||
- [Add dash manifest URL](https://github.com/yt-dlp/yt-dlp/commit/a854fbec56d5004f5147116a41d1dd050632a579) ([#7743](https://github.com/yt-dlp/yt-dlp/issues/7743)) by [ringus1](https://github.com/ringus1)
|
||||
- [Fix webpage extraction](https://github.com/yt-dlp/yt-dlp/commit/d3d81cc98f554d0adb87d24bfd6fabaaa803944d) ([#7890](https://github.com/yt-dlp/yt-dlp/issues/7890)) by [ringus1](https://github.com/ringus1)
|
||||
- [Improve format sorting](https://github.com/yt-dlp/yt-dlp/commit/308936619c8a4f3a52d73c829c2006ff6c55fea2) ([#8074](https://github.com/yt-dlp/yt-dlp/issues/8074)) by [fireattack](https://github.com/fireattack)
|
||||
- reel: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/bb5d84c9d2f1e978c3eddfb5ccbe138036682a36) ([#7564](https://github.com/yt-dlp/yt-dlp/issues/7564)) by [bashonly](https://github.com/bashonly), [demon071](https://github.com/demon071)
|
||||
- **fox**: [Support foxsports.com](https://github.com/yt-dlp/yt-dlp/commit/30b29f37159e9226e2f2d5434c9a4096ac4efa2e) ([#7724](https://github.com/yt-dlp/yt-dlp/issues/7724)) by [ischmidt20](https://github.com/ischmidt20)
|
||||
- **funker530**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/0ce1f48bf1cb78d40d734ce73ee1c90eccf92274) ([#8040](https://github.com/yt-dlp/yt-dlp/issues/8040)) by [04-pasha-04](https://github.com/04-pasha-04)
|
||||
- **generic**
|
||||
- [Fix KVS thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/53675852195d8dd859555d4789944a6887171ff8) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix generic title for embeds](https://github.com/yt-dlp/yt-dlp/commit/994f7ef8e6003f4b7b258528755d0b6adcc31714) by [pukkandan](https://github.com/pukkandan)
|
||||
- **gofile**: [Update token](https://github.com/yt-dlp/yt-dlp/commit/99c99c7185f5d8e9b3699a6fc7f86ec663d7b97e) by [bashonly](https://github.com/bashonly)
|
||||
- **hotstar**
|
||||
- [Extract `release_year`](https://github.com/yt-dlp/yt-dlp/commit/7237c8dca0590aa7438ade93f927df88c9381ec7) ([#7869](https://github.com/yt-dlp/yt-dlp/issues/7869)) by [Rajeshwaran2001](https://github.com/Rajeshwaran2001)
|
||||
- [Make metadata extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/30ea88591b728cca0896018dbf67c2298070c669) by [bashonly](https://github.com/bashonly)
|
||||
- [Support `/clips/` URLs](https://github.com/yt-dlp/yt-dlp/commit/86eeb044c2342d68c6ef177577f87852e6badd85) ([#7710](https://github.com/yt-dlp/yt-dlp/issues/7710)) by [bashonly](https://github.com/bashonly)
|
||||
- **hungama**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/4b3a6ef1b3e235ba9a45142830b6edb357c71696) ([#7757](https://github.com/yt-dlp/yt-dlp/issues/7757)) by [bashonly](https://github.com/bashonly), [Yalab7](https://github.com/Yalab7)
|
||||
- **indavideoembed**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/63e0c5748c0eb461a2ccca4181616eb930b4b750) ([#8129](https://github.com/yt-dlp/yt-dlp/issues/8129)) by [aky-01](https://github.com/aky-01)
|
||||
- **iprima**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/568f08051841aedea968258889539741e26009e9) ([#7216](https://github.com/yt-dlp/yt-dlp/issues/7216)) by [std-move](https://github.com/std-move)
|
||||
- **lbry**: [Fix original format extraction](https://github.com/yt-dlp/yt-dlp/commit/127a22460658ac39cbe5c4b3fb88d578363e0dfa) ([#7711](https://github.com/yt-dlp/yt-dlp/issues/7711)) by [bashonly](https://github.com/bashonly)
|
||||
- **lecturio**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/efa2339502a37cf13ae7f143bd8b2c28f452d1cd) ([#7649](https://github.com/yt-dlp/yt-dlp/issues/7649)) by [simon300000](https://github.com/simon300000)
|
||||
- **magellantv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f4ea501551526ebcb54d19b84cf0ebe798583a85) ([#7616](https://github.com/yt-dlp/yt-dlp/issues/7616)) by [bashonly](https://github.com/bashonly)
|
||||
- **massengeschmack.tv**: [Fix title extraction](https://github.com/yt-dlp/yt-dlp/commit/81f46ac573dc443ad48560f308582a26784d3015) ([#7813](https://github.com/yt-dlp/yt-dlp/issues/7813)) by [sb0stn](https://github.com/sb0stn)
|
||||
- **media.ccc.de**: lists: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/cf11b40ac40e3d23a6352753296f3a732886efb9) ([#8144](https://github.com/yt-dlp/yt-dlp/issues/8144)) by [Rohxn16](https://github.com/Rohxn16)
|
||||
- **mediaite**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/630a55df8de7747e79aa680959d785dfff2c4b76) ([#7923](https://github.com/yt-dlp/yt-dlp/issues/7923)) by [Grabien](https://github.com/Grabien)
|
||||
- **mediaklikk**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6e07e4bc7e59f5bdb60e93c011e57b18b009f2b5) ([#8086](https://github.com/yt-dlp/yt-dlp/issues/8086)) by [bashonly](https://github.com/bashonly), [zhallgato](https://github.com/zhallgato)
|
||||
- **mediastream**: [Make embed extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/635ae31f68a3ac7f6393d59657ed711e34ee3552) by [bashonly](https://github.com/bashonly)
|
||||
- **mixcloud**: [Update API URL](https://github.com/yt-dlp/yt-dlp/commit/7b71643cc986de9a3768dac4ac9b64f4d05e7f5e) ([#8114](https://github.com/yt-dlp/yt-dlp/issues/8114)) by [garret1317](https://github.com/garret1317)
|
||||
- **monstercat**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/eaee21bf71889d495076037cbe590c8c0b21ef3a) ([#8133](https://github.com/yt-dlp/yt-dlp/issues/8133)) by [garret1317](https://github.com/garret1317)
|
||||
- **motortrendondemand**: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/c03a58ec9933e4a42c2d8fa80b8a0ddb2cde64e6) ([#7683](https://github.com/yt-dlp/yt-dlp/issues/7683)) by [AmirAflak](https://github.com/AmirAflak)
|
||||
- **museai**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/65cfa2b057d7946fbe322155a778fe206556d0c6) ([#7614](https://github.com/yt-dlp/yt-dlp/issues/7614)) by [bashonly](https://github.com/bashonly)
|
||||
- **mzaalo**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/d7aee8e310b2c4f21d50aac0b420e1b3abde21a4) by [bashonly](https://github.com/bashonly)
|
||||
- **n1info**: article: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/8ac5b6d96ae5c60cd5ae2495949e0068a6754c45) ([#7373](https://github.com/yt-dlp/yt-dlp/issues/7373)) by [u-spec-png](https://github.com/u-spec-png)
|
||||
- **nfl.com**: plus, replay: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/1eaca74bc2ca0f5b1ec532f24c61de44f2e8cb2d) ([#7838](https://github.com/yt-dlp/yt-dlp/issues/7838)) by [bashonly](https://github.com/bashonly)
|
||||
- **niconicochannelplus**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/698beb9a497f51693e64d167e572ff9efa4bc25f) ([#5686](https://github.com/yt-dlp/yt-dlp/issues/5686)) by [pzhlkj6612](https://github.com/pzhlkj6612)
|
||||
- **nitter**: [Fix title extraction fallback](https://github.com/yt-dlp/yt-dlp/commit/a83da3717d30697102e76f63a6f29d77f9373c2a) ([#8102](https://github.com/yt-dlp/yt-dlp/issues/8102)) by [ApoorvShah111](https://github.com/ApoorvShah111)
|
||||
- **noodlemagazine**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/bae4834245a708fff97219849ec880c319c88bc6) ([#7830](https://github.com/yt-dlp/yt-dlp/issues/7830)) by [RedDeffender](https://github.com/RedDeffender) (With fixes in [69dbfe0](https://github.com/yt-dlp/yt-dlp/commit/69dbfe01c47cd078682a87f179f5846e2679e927) by [bashonly](https://github.com/bashonly))
|
||||
- **novaembed**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2269065ad60cb0ab62408ae6a7b20283e5252232) ([#7910](https://github.com/yt-dlp/yt-dlp/issues/7910)) by [std-move](https://github.com/std-move)
|
||||
- **patreoncampaign**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/11de6fec9c9b8d34d1f90c8e6218ec58a3471b58) ([#7664](https://github.com/yt-dlp/yt-dlp/issues/7664)) by [bashonly](https://github.com/bashonly)
|
||||
- **pbs**: [Add extractor `PBSKidsIE`](https://github.com/yt-dlp/yt-dlp/commit/6d6081dda1290a85bdab6717f239289e3aa74c8e) ([#7602](https://github.com/yt-dlp/yt-dlp/issues/7602)) by [snixon](https://github.com/snixon)
|
||||
- **piapro**: [Support `/content` URL](https://github.com/yt-dlp/yt-dlp/commit/1bcb9fe8715b1f288efc322be3de409ee0597080) ([#7592](https://github.com/yt-dlp/yt-dlp/issues/7592)) by [FinnRG](https://github.com/FinnRG)
|
||||
- **piaulizaportal**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6636021206dad17c7745ae6bce6cb73d6f2ef319) ([#7903](https://github.com/yt-dlp/yt-dlp/issues/7903)) by [pzhlkj6612](https://github.com/pzhlkj6612)
|
||||
- **picartovod**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/db9743894071760f994f640a4c24358f749a78c0) ([#7727](https://github.com/yt-dlp/yt-dlp/issues/7727)) by [Frankgoji](https://github.com/Frankgoji)
|
||||
- **pornbox**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/40999467f72db074a3f13057da9bf82a857530fe) ([#7386](https://github.com/yt-dlp/yt-dlp/issues/7386)) by [niemands](https://github.com/niemands)
|
||||
- **pornhub**: [Update access cookies for UK](https://github.com/yt-dlp/yt-dlp/commit/1d3d579c2142f69831b6ae140e1d8e824e07fa0e) ([#7591](https://github.com/yt-dlp/yt-dlp/issues/7591)) by [zhong-yiyu](https://github.com/zhong-yiyu)
|
||||
- **pr0gramm**: [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/b532556d0a85e7d76f8f0880861232fb706ddbc5) ([#8151](https://github.com/yt-dlp/yt-dlp/issues/8151)) by [Grub4K](https://github.com/Grub4K)
|
||||
- **radiofrance**: [Add support for livestreams, podcasts, playlists](https://github.com/yt-dlp/yt-dlp/commit/ba8e9eb2c8bbb699f314169fab8e544437ad731e) ([#7006](https://github.com/yt-dlp/yt-dlp/issues/7006)) by [elyse0](https://github.com/elyse0)
|
||||
- **rbgtum**: [Fix extraction and support new URL format](https://github.com/yt-dlp/yt-dlp/commit/5fccabac27ca3c1165ade1b0df6fbadc24258dc2) ([#7690](https://github.com/yt-dlp/yt-dlp/issues/7690)) by [simon300000](https://github.com/simon300000)
|
||||
- **reddit**
|
||||
- [Extract subtitles](https://github.com/yt-dlp/yt-dlp/commit/20c3c9b433dd47faf0dbde6b46e4e34eb76109a5) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/9a04113dfbb69b904e4e2bea736da293505786b8) by [bashonly](https://github.com/bashonly)
|
||||
- **rtvslo**: [Fix format extraction](https://github.com/yt-dlp/yt-dlp/commit/94389b225d9bcf29aa7ba8afaf1bbd7c62204eae) ([#8131](https://github.com/yt-dlp/yt-dlp/issues/8131)) by [bashonly](https://github.com/bashonly)
|
||||
- **rule34video**: [Extract tags](https://github.com/yt-dlp/yt-dlp/commit/58493923e9b6f774947a2131e5258e9f3cf816be) ([#7117](https://github.com/yt-dlp/yt-dlp/issues/7117)) by [soundchaser128](https://github.com/soundchaser128)
|
||||
- **rumble**: [Fix embed extraction](https://github.com/yt-dlp/yt-dlp/commit/23d829a3420450bcfb0788e6fb2cf4f6acdbe596) ([#8035](https://github.com/yt-dlp/yt-dlp/issues/8035)) by [trislee](https://github.com/trislee)
|
||||
- **s4c**
|
||||
- [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/b9de629d78ce31699f2de886071dc257830f9676) ([#7730](https://github.com/yt-dlp/yt-dlp/issues/7730)) by [ifan-t](https://github.com/ifan-t)
|
||||
- [Add series support and extract subs/thumbs](https://github.com/yt-dlp/yt-dlp/commit/fe371dcf0ba5ce8d42480eade54eeeac99ab3cb0) ([#7776](https://github.com/yt-dlp/yt-dlp/issues/7776)) by [ifan-t](https://github.com/ifan-t)
|
||||
- **sohu**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5be7e978867b5f66ad6786c674d79d40e950ae16) ([#7628](https://github.com/yt-dlp/yt-dlp/issues/7628)) by [bashonly](https://github.com/bashonly), [c-basalt](https://github.com/c-basalt)
|
||||
- **stageplus**: [Fix m3u8 extraction](https://github.com/yt-dlp/yt-dlp/commit/56b3dc03354b75be995759d8441d2754c0442b9a) ([#7929](https://github.com/yt-dlp/yt-dlp/issues/7929)) by [bashonly](https://github.com/bashonly)
|
||||
- **streamanity**: [Remove](https://github.com/yt-dlp/yt-dlp/commit/2cfe221fbbe46faa3f46552c08d947a51f424903) ([#7571](https://github.com/yt-dlp/yt-dlp/issues/7571)) by [alerikaisattera](https://github.com/alerikaisattera)
|
||||
- **svtplay**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/2301b5c1b77a65abbb46b72f91e1e4666fd5d985) ([#7789](https://github.com/yt-dlp/yt-dlp/issues/7789)) by [dirkf](https://github.com/dirkf), [wader](https://github.com/wader)
|
||||
- **tbsjp**: [Add episode, program, playlist extractors](https://github.com/yt-dlp/yt-dlp/commit/876b70c8edf4c0147f180bd981fbc4d625cbfb9c) ([#7765](https://github.com/yt-dlp/yt-dlp/issues/7765)) by [garret1317](https://github.com/garret1317)
|
||||
- **tiktok**
|
||||
- [Fix audio-only format extraction](https://github.com/yt-dlp/yt-dlp/commit/b09bd0c19648f60c59fb980cd454cb0069959fb9) ([#7712](https://github.com/yt-dlp/yt-dlp/issues/7712)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix webpage extraction](https://github.com/yt-dlp/yt-dlp/commit/069cbece9dba6384f1cc5fcfc7ce562a31af42fc) by [bashonly](https://github.com/bashonly)
|
||||
- **triller**: [Fix unlisted video extraction](https://github.com/yt-dlp/yt-dlp/commit/39837ae3199aa934299badbd0d63243ed639e6c8) ([#7670](https://github.com/yt-dlp/yt-dlp/issues/7670)) by [bashonly](https://github.com/bashonly)
|
||||
- **tv5mondeplus**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7d3d658f4c558ee7d72b1c01b46f2126948681cd) ([#7952](https://github.com/yt-dlp/yt-dlp/issues/7952)) by [dirkf](https://github.com/dirkf), [korli](https://github.com/korli)
|
||||
- **twitcasting**
|
||||
- [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/cebbd33b1c678149fc8f0e254db6fc0da317ea80) ([#8120](https://github.com/yt-dlp/yt-dlp/issues/8120)) by [c-basalt](https://github.com/c-basalt)
|
||||
- [Support `--wait-for-video`](https://github.com/yt-dlp/yt-dlp/commit/c1d71d0d9f41db5e4306c86af232f5f6220a130b) ([#7975](https://github.com/yt-dlp/yt-dlp/issues/7975)) by [at-wat](https://github.com/at-wat)
|
||||
- **twitter**
|
||||
- [Add fallback, improve error handling](https://github.com/yt-dlp/yt-dlp/commit/6014355c6142f68e20c8374e3787e5b5820f19e2) ([#7621](https://github.com/yt-dlp/yt-dlp/issues/7621)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix GraphQL and legacy API](https://github.com/yt-dlp/yt-dlp/commit/92315c03774cfabb3a921884326beb4b981f786b) ([#7516](https://github.com/yt-dlp/yt-dlp/issues/7516)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix retweet extraction and syndication API](https://github.com/yt-dlp/yt-dlp/commit/a006ce2b27357c15792eb5c18f06765e640b801c) ([#8016](https://github.com/yt-dlp/yt-dlp/issues/8016)) by [bashonly](https://github.com/bashonly)
|
||||
- [Revert 92315c03774cfabb3a921884326beb4b981f786b](https://github.com/yt-dlp/yt-dlp/commit/b03fa7834579a01cc5fba48c0e73488a16683d48) by [pukkandan](https://github.com/pukkandan)
|
||||
- spaces
|
||||
- [Fix format protocol](https://github.com/yt-dlp/yt-dlp/commit/613dbce177d34ffc31053e8e01acf4bb107bcd1e) ([#7550](https://github.com/yt-dlp/yt-dlp/issues/7550)) by [bashonly](https://github.com/bashonly)
|
||||
- [Pass referer header to downloader](https://github.com/yt-dlp/yt-dlp/commit/c6ef553792ed48462f9fd0e78143bef6b1a71c2e) by [bashonly](https://github.com/bashonly)
|
||||
- **unsupported**: [List more sites with DRM](https://github.com/yt-dlp/yt-dlp/commit/e7057383380d7d53815f8feaf90ca3dcbde88983) by [pukkandan](https://github.com/pukkandan)
|
||||
- **videa**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/98eac0e6ba0e510ae7dfdfd249d42ee71fb272b1) ([#8003](https://github.com/yt-dlp/yt-dlp/issues/8003)) by [aky-01](https://github.com/aky-01), [hatsomatt](https://github.com/hatsomatt)
|
||||
- **vrt**: [Update token signing key](https://github.com/yt-dlp/yt-dlp/commit/325191d0c9bf3fe257b8a7c2eb95080f44f6ddfc) ([#7519](https://github.com/yt-dlp/yt-dlp/issues/7519)) by [Zprokkel](https://github.com/Zprokkel)
|
||||
- **wat.tv**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/7cccab79e7d00ed965b48b8cefce1da8a0513409) ([#7898](https://github.com/yt-dlp/yt-dlp/issues/7898)) by [davinkevin](https://github.com/davinkevin)
|
||||
- **wdr**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/5d0395498d7065aa5e55bac85fa9354b4b0d48eb) ([#7979](https://github.com/yt-dlp/yt-dlp/issues/7979)) by [szabyg](https://github.com/szabyg)
|
||||
- **web.archive**: vlive: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/9652bca1bd02f6bc1b8cb1e186f2ccbf32225561) ([#8132](https://github.com/yt-dlp/yt-dlp/issues/8132)) by [bashonly](https://github.com/bashonly)
|
||||
- **weibo**: [Fix extractor and support user extraction](https://github.com/yt-dlp/yt-dlp/commit/69b03f84f8378b0b5a2fbae56f9b7d860b2f529e) ([#7657](https://github.com/yt-dlp/yt-dlp/issues/7657)) by [c-basalt](https://github.com/c-basalt)
|
||||
- **weverse**: [Support extraction without auth](https://github.com/yt-dlp/yt-dlp/commit/c2d8ee0000302aba63476b7d5bd8793e57b6c8c6) ([#7924](https://github.com/yt-dlp/yt-dlp/issues/7924)) by [seproDev](https://github.com/seproDev)
|
||||
- **wimbledon**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a15fcd299e767a510debd8dc1646fe863b96ce0e) ([#7551](https://github.com/yt-dlp/yt-dlp/issues/7551)) by [nnoboa](https://github.com/nnoboa)
|
||||
- **wrestleuniverseppv**: [Fix HLS AES key extraction](https://github.com/yt-dlp/yt-dlp/commit/dae349da97cafe7357106a8f3187fd48a2ad1210) by [bashonly](https://github.com/bashonly)
|
||||
- **youtube**
|
||||
- [Add `player_params` extractor arg](https://github.com/yt-dlp/yt-dlp/commit/ba06d77a316650ff057347d224b5afa8b203ad65) ([#7719](https://github.com/yt-dlp/yt-dlp/issues/7719)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Fix `player_params` arg being converted to lowercase](https://github.com/yt-dlp/yt-dlp/commit/546b2c28a106cf8101d481b215b676d1b091d276) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Fix consent cookie](https://github.com/yt-dlp/yt-dlp/commit/378ae9f9fb8e8c86e6ac89c4c5b815b48ce93620) ([#7774](https://github.com/yt-dlp/yt-dlp/issues/7774)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- tab: [Detect looping feeds](https://github.com/yt-dlp/yt-dlp/commit/1ba6fe9db5f660d5538588315c23ad6cf0371c5f) ([#6621](https://github.com/yt-dlp/yt-dlp/issues/6621)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- **zaiko**: [Improve thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/ecef42c3adbcb6a84405139047923c4967316f28) ([#8054](https://github.com/yt-dlp/yt-dlp/issues/8054)) by [pzhlkj6612](https://github.com/pzhlkj6612)
|
||||
- **zee5**: [Update access token endpoint](https://github.com/yt-dlp/yt-dlp/commit/a0de8bb8601146b8f87bf7cd562eef8bfb4690be) ([#7914](https://github.com/yt-dlp/yt-dlp/issues/7914)) by [bashonly](https://github.com/bashonly)
|
||||
- **zoom**: [Extract duration](https://github.com/yt-dlp/yt-dlp/commit/66cc64ff6696f9921ff112a278542f8d999ffea4) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
#### Downloader changes
|
||||
- **external**
|
||||
- [Fix ffmpeg input from stdin](https://github.com/yt-dlp/yt-dlp/commit/e57eb98222d29cc4c09ee975d3c492274a6e5be3) ([#7655](https://github.com/yt-dlp/yt-dlp/issues/7655)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fixes to cookie handling](https://github.com/yt-dlp/yt-dlp/commit/42ded0a429c20ec13dc006825e1508d9a02f0ad4) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
#### Postprocessor changes
|
||||
- **embedthumbnail**: [Support `m4v`](https://github.com/yt-dlp/yt-dlp/commit/8a4cd12c8f8e93292e3e95200b9d17a3af39624c) ([#7583](https://github.com/yt-dlp/yt-dlp/issues/7583)) by [Neurognostic](https://github.com/Neurognostic)
|
||||
|
||||
#### Networking changes
|
||||
- [Add module](https://github.com/yt-dlp/yt-dlp/commit/c365dba8430ee33abda85d31f95128605bf240eb) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [pukkandan](https://github.com/pukkandan)
|
||||
- [Add request handler preference framework](https://github.com/yt-dlp/yt-dlp/commit/db7b054a6111ca387220d0eb87bf342f9c130eb8) ([#7603](https://github.com/yt-dlp/yt-dlp/issues/7603)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Add strict Request extension checking](https://github.com/yt-dlp/yt-dlp/commit/86aea0d3a213da3be1da638b9b828e6f0ee1d59f) ([#7604](https://github.com/yt-dlp/yt-dlp/issues/7604)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Fix POST requests with zero-length payloads](https://github.com/yt-dlp/yt-dlp/commit/71baa490ebd3655746430f208a9b605d120cd315) ([#7648](https://github.com/yt-dlp/yt-dlp/issues/7648)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix `--legacy-server-connect`](https://github.com/yt-dlp/yt-dlp/commit/75dc8e673b481a82d0688aeec30f6c65d82bb359) ([#7645](https://github.com/yt-dlp/yt-dlp/issues/7645)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix various socks proxy bugs](https://github.com/yt-dlp/yt-dlp/commit/20fbbd9249a2f26c7ae579bde5ba5d69aa8fac69) ([#8065](https://github.com/yt-dlp/yt-dlp/issues/8065)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Ignore invalid proxies in env](https://github.com/yt-dlp/yt-dlp/commit/bbeacff7fcaa3b521066088a5ccbf34ef5070d1d) ([#7704](https://github.com/yt-dlp/yt-dlp/issues/7704)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Rewrite architecture](https://github.com/yt-dlp/yt-dlp/commit/227bf1a33be7b89cd7d44ad046844c4ccba104f4) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- **Request Handler**
|
||||
- urllib
|
||||
- [Remove dot segments during URL normalization](https://github.com/yt-dlp/yt-dlp/commit/4bf912282a34b58b6b35d8f7e6be535770c89c76) ([#7662](https://github.com/yt-dlp/yt-dlp/issues/7662)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Simplify gzip decoding](https://github.com/yt-dlp/yt-dlp/commit/59e92b1f1833440bb2190f847eb735cf0f90bc85) ([#7611](https://github.com/yt-dlp/yt-dlp/issues/7611)) by [Grub4K](https://github.com/Grub4K) (With fixes in [77bff23](https://github.com/yt-dlp/yt-dlp/commit/77bff23ee97565bab2e0d75b893a21bf7983219a))
|
||||
|
||||
#### Misc. changes
|
||||
- **build**: [Make sure deprecated modules are added](https://github.com/yt-dlp/yt-dlp/commit/131d132da5c98c6c78bd7eed4b37f4458561b3d9) by [pukkandan](https://github.com/pukkandan)
|
||||
- **cleanup**
|
||||
- [Add color to `download-archive` message](https://github.com/yt-dlp/yt-dlp/commit/2b029ca0a9f9105c4f7626993fa60e54c9782749) ([#5138](https://github.com/yt-dlp/yt-dlp/issues/5138)) by [aaruni96](https://github.com/aaruni96), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan)
|
||||
- Miscellaneous
|
||||
- [6148833](https://github.com/yt-dlp/yt-dlp/commit/6148833f5ceb7674142ddb8d761ffe03cee7df69), [62b5c94](https://github.com/yt-dlp/yt-dlp/commit/62b5c94cadaa5f596dc1a7083db9db12efe357be) by [pukkandan](https://github.com/pukkandan)
|
||||
- [5ca095c](https://github.com/yt-dlp/yt-dlp/commit/5ca095cbcde3e32642a4fe5b2d69e8e3c785a021) by [barsnick](https://github.com/barsnick), [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [gamer191](https://github.com/gamer191), [Grub4K](https://github.com/Grub4K), [sqrtNOT](https://github.com/sqrtNOT)
|
||||
- [088add9](https://github.com/yt-dlp/yt-dlp/commit/088add9567d39b758737e4299a0e619fd89d2e8f) by [Grub4K](https://github.com/Grub4K)
|
||||
- **devscripts**: `make_changelog`: [Fix changelog grouping and add networking group](https://github.com/yt-dlp/yt-dlp/commit/30ba233d4cee945756ed7344e7ddb3a90d2ae608) ([#8124](https://github.com/yt-dlp/yt-dlp/issues/8124)) by [Grub4K](https://github.com/Grub4K)
|
||||
- **docs**: [Update collaborators](https://github.com/yt-dlp/yt-dlp/commit/1be0a96a4d14f629097509fcc89d15f69a8243c7) by [Grub4K](https://github.com/Grub4K)
|
||||
- **test**
|
||||
- [Add tests for socks proxies](https://github.com/yt-dlp/yt-dlp/commit/fcd6a76adc49d5cd8783985c7ce35384b72e545f) ([#7908](https://github.com/yt-dlp/yt-dlp/issues/7908)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Fix `httplib_validation_errors` test for old Python versions](https://github.com/yt-dlp/yt-dlp/commit/95abea9a03289da1384e5bda3d590223ccc0a238) ([#7677](https://github.com/yt-dlp/yt-dlp/issues/7677)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Fix `test_load_certifi`](https://github.com/yt-dlp/yt-dlp/commit/de20687ee6b742646128a7629b57096631a20619) by [pukkandan](https://github.com/pukkandan)
|
||||
- download: [Test for `expected_exception`](https://github.com/yt-dlp/yt-dlp/commit/661c9a1d029296b28e0b2f8be8a72a43abaf6536) by [at-wat](https://github.com/at-wat)
|
||||
|
||||
### 2023.07.06
|
||||
|
||||
#### Important changes
|
||||
- Security: [[CVE-2023-35934](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-35934)] Fix [Cookie leak](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj)
|
||||
- `--add-header Cookie:` is deprecated and auto-scoped to input URL domains
|
||||
- Cookies are scoped when passed to external downloaders
|
||||
- Add `cookie` field to info.json and deprecate `http_headers.Cookie`
|
||||
- Add `cookies` field to info.json and deprecate `http_headers.Cookie`
|
||||
|
||||
#### Core changes
|
||||
- [Allow extractors to mark formats as potentially DRM](https://github.com/yt-dlp/yt-dlp/commit/bc344cd456380999c1ee74554dfd432a38f32ec7) ([#7396](https://github.com/yt-dlp/yt-dlp/issues/7396)) by [pukkandan](https://github.com/pukkandan)
|
||||
|
@ -51,7 +300,7 @@ #### Downloader changes
|
|||
- **http**: [Avoid infinite loop when no data is received](https://github.com/yt-dlp/yt-dlp/commit/662ef1e910b72e57957f06589925b2332ba52821) by [pukkandan](https://github.com/pukkandan)
|
||||
|
||||
#### Misc. changes
|
||||
- [Add CodeQL workflow](https://github.com/yt-dlp/yt-dlp/commit/6355b5f1e1e8e7f4ef866d71d51e03baf0e82f17) ([#7497](https://github.com/yt-dlp/yt-dlp/issues/7497)) by [pukkandan](https://github.com/pukkandan)
|
||||
- [Add CodeQL workflow](https://github.com/yt-dlp/yt-dlp/commit/6355b5f1e1e8e7f4ef866d71d51e03baf0e82f17) ([#7497](https://github.com/yt-dlp/yt-dlp/issues/7497)) by [jorgectf](https://github.com/jorgectf)
|
||||
- **cleanup**: Miscellaneous: [337734d](https://github.com/yt-dlp/yt-dlp/commit/337734d4a8a6500bc65434843db346b5cbd05e81) by [pukkandan](https://github.com/pukkandan)
|
||||
- **docs**: [Minor fixes](https://github.com/yt-dlp/yt-dlp/commit/b532a3481046e1eabb6232ee8196fb696c356ff6) by [pukkandan](https://github.com/pukkandan)
|
||||
- **make_changelog**: [Skip reverted commits](https://github.com/yt-dlp/yt-dlp/commit/fa44802809d189fca0f4782263d48d6533384503) by [pukkandan](https://github.com/pukkandan)
|
||||
|
|
|
@ -44,16 +44,6 @@ ## [Ashish0804](https://github.com/Ashish0804) <sub><sup>[Inactive]</sup></sub>
|
|||
* Improved/fixed support for HiDive, HotStar, Hungama, LBRY, LinkedInLearning, Mxplayer, SonyLiv, TV2, Vimeo, VLive etc
|
||||
|
||||
|
||||
## [Lesmiscore](https://github.com/Lesmiscore)
|
||||
|
||||
**Bitcoin**: bc1qfd02r007cutfdjwjmyy9w23rjvtls6ncve7r3s
|
||||
**Monacoin**: mona1q3tf7dzvshrhfe3md379xtvt2n22duhglv5dskr
|
||||
|
||||
* Download live from start to end for YouTube
|
||||
* Added support for new websites AbemaTV, mildom, PixivSketch, skeb, radiko, voicy, mirrativ, openrec, whowatch, damtomo, 17.live, mixch etc
|
||||
* Improved/fixed support for fc2, YahooJapanNews, tver, iwara etc
|
||||
|
||||
|
||||
## [bashonly](https://github.com/bashonly)
|
||||
|
||||
* `--update-to`, automated release, nightly builds
|
||||
|
|
27
README.md
27
README.md
|
@ -76,7 +76,7 @@
|
|||
|
||||
# NEW FEATURES
|
||||
|
||||
* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@42f2d4**](https://github.com/ytdl-org/youtube-dl/commit/07af47960f3bb262ead02490ce65c8c45c01741e) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
|
||||
* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@66ab08**](https://github.com/ytdl-org/youtube-dl/commit/66ab0814c4baa2dc79c2dd5287bc0ad61a37c5b9) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
|
||||
|
||||
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
|
||||
|
||||
|
@ -89,7 +89,6 @@ # NEW FEATURES
|
|||
* Fix for [n-sig based throttling](https://github.com/ytdl-org/youtube-dl/issues/29326) **\***
|
||||
* Supports some (but not all) age-gated content without cookies
|
||||
* Download livestreams from the start using `--live-from-start` (*experimental*)
|
||||
* `255kbps` audio is extracted (if available) from YouTube Music when premium cookies are given
|
||||
* Channel URLs download all uploads of the channel, including shorts and live
|
||||
|
||||
* **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER]`
|
||||
|
@ -158,6 +157,7 @@ ### Differences in default behavior
|
|||
* yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior
|
||||
* yt-dlp tries to parse the external downloader outputs into the standard progress output if possible (Currently implemented: [~~aria2c~~](https://github.com/yt-dlp/yt-dlp/issues/5931)). You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is
|
||||
* yt-dlp versions between 2021.09.01 and 2023.01.02 applies `--match-filter` to nested playlists. This was an unintentional side-effect of [8f18ac](https://github.com/yt-dlp/yt-dlp/commit/8f18aca8717bb0dd49054555af8d386e5eda3a88) and is fixed in [d7b460](https://github.com/yt-dlp/yt-dlp/commit/d7b460d0e5fc710950582baed2e3fc616ed98a80). Use `--compat-options playlist-match-filter` to revert this
|
||||
* yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests.
|
||||
|
||||
For ease of use, a few more compat options are available:
|
||||
|
||||
|
@ -165,7 +165,7 @@ ### Differences in default behavior
|
|||
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter`
|
||||
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter`
|
||||
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
|
||||
* `--compat-options 2022`: Same as `--compat-options playlist-match-filter,no-external-downloader-progress`. Use this to enable all future compat options
|
||||
* `--compat-options 2022`: Same as `--compat-options playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler`. Use this to enable all future compat options
|
||||
|
||||
|
||||
# INSTALLATION
|
||||
|
@ -275,12 +275,13 @@ ### Networking
|
|||
* [**certifi**](https://github.com/certifi/python-certifi)\* - Provides Mozilla's root certificate bundle. Licensed under [MPLv2](https://github.com/certifi/python-certifi/blob/master/LICENSE)
|
||||
* [**brotli**](https://github.com/google/brotli)\* or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT <sup>[1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) </sup>
|
||||
* [**websockets**](https://github.com/aaugustin/websockets)\* - For downloading over websocket. Licensed under [BSD-3-Clause](https://github.com/aaugustin/websockets/blob/main/LICENSE)
|
||||
* [**requests**](https://github.com/psf/requests)\* - HTTP library. For HTTPS proxy and persistent connections support. Licensed under [Apache-2.0](https://github.com/psf/requests/blob/main/LICENSE)
|
||||
|
||||
### Metadata
|
||||
|
||||
* [**mutagen**](https://github.com/quodlibet/mutagen)\* - For `--embed-thumbnail` in certain formats. Licensed under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING)
|
||||
* [**AtomicParsley**](https://github.com/wez/atomicparsley) - For `--embed-thumbnail` in `mp4`/`m4a` files when `mutagen`/`ffmpeg` cannot. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING)
|
||||
* [**xattr**](https://github.com/xattr/xattr), [**pyxattr**](https://github.com/iustin/pyxattr) or [**setfattr**](http://savannah.nongnu.org/projects/attr) - For writing xattr metadata (`--xattr`) on **Linux**. Licensed under [MIT](https://github.com/xattr/xattr/blob/master/LICENSE.txt), [LGPL2.1](https://github.com/iustin/pyxattr/blob/master/COPYING) and [GPLv2+](http://git.savannah.nongnu.org/cgit/attr.git/tree/doc/COPYING) respectively
|
||||
* [**xattr**](https://github.com/xattr/xattr), [**pyxattr**](https://github.com/iustin/pyxattr) or [**setfattr**](http://savannah.nongnu.org/projects/attr) - For writing xattr metadata (`--xattr`) on **Mac** and **BSD**. Licensed under [MIT](https://github.com/xattr/xattr/blob/master/LICENSE.txt), [LGPL2.1](https://github.com/iustin/pyxattr/blob/master/COPYING) and [GPLv2+](http://git.savannah.nongnu.org/cgit/attr.git/tree/doc/COPYING) respectively
|
||||
|
||||
### Misc
|
||||
|
||||
|
@ -913,7 +914,7 @@ ## Authentication Options:
|
|||
Defaults to ~/.netrc
|
||||
--netrc-cmd NETRC_CMD Command to execute to get the credentials
|
||||
for an extractor.
|
||||
--video-password PASSWORD Video password (vimeo, youku)
|
||||
--video-password PASSWORD Video-specific password
|
||||
--ap-mso MSO Adobe Pass multiple-system operator (TV
|
||||
provider) identifier, use --ap-list-mso for
|
||||
a list of available MSOs
|
||||
|
@ -1569,7 +1570,7 @@ ## Sorting Formats
|
|||
- `aext`: Audio Extension (`m4a` > `aac` > `mp3` > `ogg` > `opus` > `webm` > other). If `--prefer-free-formats` is used, the order changes to `ogg` > `opus` > `webm` > `mp3` > `m4a` > `aac`
|
||||
- `ext`: Equivalent to `vext,aext`
|
||||
- `filesize`: Exact filesize, if known in advance
|
||||
- `fs_approx`: Approximate filesize calculated from the manifests
|
||||
- `fs_approx`: Approximate filesize
|
||||
- `size`: Exact filesize if available, otherwise approximate filesize
|
||||
- `height`: Height of video
|
||||
- `width`: Width of video
|
||||
|
@ -1580,7 +1581,7 @@ ## Sorting Formats
|
|||
- `tbr`: Total average bitrate in KBit/s
|
||||
- `vbr`: Average video bitrate in KBit/s
|
||||
- `abr`: Average audio bitrate in KBit/s
|
||||
- `br`: Equivalent to using `tbr,vbr,abr`
|
||||
- `br`: Average bitrate in KBit/s, `tbr`/`vbr`/`abr`
|
||||
- `asr`: Audio sample rate in Hz
|
||||
|
||||
**Deprecation warning**: Many of these fields have (currently undocumented) aliases, that may be removed in a future version. It is recommended to use only the documented field names.
|
||||
|
@ -1800,14 +1801,16 @@ # EXTRACTOR ARGUMENTS
|
|||
#### youtube
|
||||
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
||||
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
|
||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
|
||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
|
||||
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
||||
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`
|
||||
* E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
|
||||
* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8)
|
||||
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
|
||||
* `innertube_key`: Innertube API key to use for all API requests
|
||||
* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
|
||||
|
||||
#### youtubetab (YouTube playlists, channels, feeds, etc.)
|
||||
* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)
|
||||
|
@ -1844,6 +1847,9 @@ #### hotstar
|
|||
* `vcodec`: vcodec to ignore - one or more of `h264`, `h265`, `dvh265`
|
||||
* `dr`: dynamic range to ignore - one or more of `sdr`, `hdr10`, `dv`
|
||||
|
||||
#### niconicochannelplus
|
||||
* `max_comments`: Maximum number of comments to extract - default is `120`
|
||||
|
||||
#### tiktok
|
||||
* `api_hostname`: Hostname to use for mobile API requests, e.g. `api-h2.tiktokv.com`
|
||||
* `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`, e.g. `20.2.1`
|
||||
|
@ -1853,7 +1859,7 @@ #### rokfinchannel
|
|||
* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
|
||||
|
||||
#### twitter
|
||||
* `legacy_api`: Force usage of the legacy Twitter API instead of the GraphQL API for tweet extraction. Has no effect if login cookies are passed
|
||||
* `api`: Select one of `graphql` (default), `legacy` or `syndication` as the API for tweet extraction. Has no effect if logged in
|
||||
|
||||
#### stacommu, wrestleuniverse
|
||||
* `device_id`: UUID value assigned by the website and used to enforce device limits for paid livestream content. Can be found in browser local storage
|
||||
|
@ -1864,6 +1870,9 @@ #### twitch
|
|||
#### nhkradirulive (NHK らじる★らじる LIVE)
|
||||
* `area`: Which regional variation to extract. Valid areas are: `sapporo`, `sendai`, `tokyo`, `nagoya`, `osaka`, `hiroshima`, `matsuyama`, `fukuoka`. Defaults to `tokyo`
|
||||
|
||||
#### nflplusreplay
|
||||
* `type`: Type(s) of game replays to extract. Valid types are: `full_game`, `full_game_spanish`, `condensed_game` and `all_22`. You can use `all` to extract all available replay types, which is the default
|
||||
|
||||
**Note**: These options may be changed/removed in the future without concern for backward compatibility
|
||||
|
||||
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
|
||||
|
|
|
@ -63,6 +63,40 @@
|
|||
{
|
||||
"action": "add",
|
||||
"when": "1ceb657bdd254ad961489e5060f2ccc7d556b729",
|
||||
"short": "[priority] Security: [[CVE-2023-35934](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-35934)] Fix [Cookie leak](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj)\n - `--add-header Cookie:` is deprecated and auto-scoped to input URL domains\n - Cookies are scoped when passed to external downloaders\n - Add `cookie` field to info.json and deprecate `http_headers.Cookie`"
|
||||
"short": "[priority] Security: [[CVE-2023-35934](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-35934)] Fix [Cookie leak](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj)\n - `--add-header Cookie:` is deprecated and auto-scoped to input URL domains\n - Cookies are scoped when passed to external downloaders\n - Add `cookies` field to info.json and deprecate `http_headers.Cookie`"
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "b03fa7834579a01cc5fba48c0e73488a16683d48",
|
||||
"short": "[ie/twitter] Revert 92315c03774cfabb3a921884326beb4b981f786b",
|
||||
"authors": ["pukkandan"]
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "fcd6a76adc49d5cd8783985c7ce35384b72e545f",
|
||||
"short": "[test] Add tests for socks proxies (#7908)",
|
||||
"authors": ["coletdjnz"]
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "4bf912282a34b58b6b35d8f7e6be535770c89c76",
|
||||
"short": "[rh:urllib] Remove dot segments during URL normalization (#7662)",
|
||||
"authors": ["coletdjnz"]
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "59e92b1f1833440bb2190f847eb735cf0f90bc85",
|
||||
"short": "[rh:urllib] Simplify gzip decoding (#7611)",
|
||||
"authors": ["Grub4K"]
|
||||
},
|
||||
{
|
||||
"action": "add",
|
||||
"when": "c1d71d0d9f41db5e4306c86af232f5f6220a130b",
|
||||
"short": "[priority] **The minimum *recommended* Python version has been raised to 3.8**\nSince Python 3.7 has reached end-of-life, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/7803)"
|
||||
},
|
||||
{
|
||||
"action": "add",
|
||||
"when": "61bdf15fc7400601c3da1aa7a43917310a5bf391",
|
||||
"short": "[priority] Security: [[CVE-2023-40581](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-40581)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg)\n - The shell escape function is now using `\"\"` instead of `\\\"`.\n - `utils.Popen` has been patched to properly quote commands."
|
||||
}
|
||||
]
|
||||
|
|
|
@ -31,59 +31,69 @@ class CommitGroup(enum.Enum):
|
|||
EXTRACTOR = 'Extractor'
|
||||
DOWNLOADER = 'Downloader'
|
||||
POSTPROCESSOR = 'Postprocessor'
|
||||
NETWORKING = 'Networking'
|
||||
MISC = 'Misc.'
|
||||
|
||||
@classmethod
|
||||
@property
|
||||
def ignorable_prefixes(cls):
|
||||
return ('core', 'downloader', 'extractor', 'misc', 'postprocessor', 'upstream')
|
||||
|
||||
@classmethod
|
||||
@lru_cache
|
||||
def commit_lookup(cls):
|
||||
def subgroup_lookup(cls):
|
||||
return {
|
||||
name: group
|
||||
for group, names in {
|
||||
cls.PRIORITY: {'priority'},
|
||||
cls.CORE: {
|
||||
'aes',
|
||||
'cache',
|
||||
'compat_utils',
|
||||
'compat',
|
||||
'cookies',
|
||||
'core',
|
||||
'dependencies',
|
||||
'jsinterp',
|
||||
'networking',
|
||||
'outtmpl',
|
||||
'formats',
|
||||
'jsinterp',
|
||||
'outtmpl',
|
||||
'plugins',
|
||||
'update',
|
||||
'upstream',
|
||||
'utils',
|
||||
},
|
||||
cls.MISC: {
|
||||
'build',
|
||||
'ci',
|
||||
'cleanup',
|
||||
'devscripts',
|
||||
'docs',
|
||||
'misc',
|
||||
'test',
|
||||
},
|
||||
cls.EXTRACTOR: {'extractor', 'ie'},
|
||||
cls.DOWNLOADER: {'downloader', 'fd'},
|
||||
cls.POSTPROCESSOR: {'postprocessor', 'pp'},
|
||||
cls.NETWORKING: {
|
||||
'rh',
|
||||
},
|
||||
}.items()
|
||||
for name in names
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get(cls, value):
|
||||
result = cls.commit_lookup().get(value)
|
||||
if result:
|
||||
logger.debug(f'Mapped {value!r} => {result.name}')
|
||||
@lru_cache
|
||||
def group_lookup(cls):
|
||||
result = {
|
||||
'fd': cls.DOWNLOADER,
|
||||
'ie': cls.EXTRACTOR,
|
||||
'pp': cls.POSTPROCESSOR,
|
||||
'upstream': cls.CORE,
|
||||
}
|
||||
result.update({item.name.lower(): item for item in iter(cls)})
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def get(cls, value: str) -> tuple[CommitGroup | None, str | None]:
|
||||
group, _, subgroup = (group.strip().lower() for group in value.partition('/'))
|
||||
|
||||
result = cls.group_lookup().get(group)
|
||||
if not result:
|
||||
if subgroup:
|
||||
return None, value
|
||||
subgroup = group
|
||||
result = cls.subgroup_lookup().get(subgroup)
|
||||
|
||||
return result, subgroup or None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Commit:
|
||||
|
@ -198,19 +208,23 @@ def _prepare_cleanup_misc_items(self, items):
|
|||
for commit_infos in cleanup_misc_items.values():
|
||||
sorted_items.append(CommitInfo(
|
||||
'cleanup', ('Miscellaneous',), ', '.join(
|
||||
self._format_message_link(None, info.commit.hash).strip()
|
||||
self._format_message_link(None, info.commit.hash)
|
||||
for info in sorted(commit_infos, key=lambda item: item.commit.hash or '')),
|
||||
[], Commit(None, '', commit_infos[0].commit.authors), []))
|
||||
|
||||
return sorted_items
|
||||
|
||||
def format_single_change(self, info):
|
||||
message = self._format_message_link(info.message, info.commit.hash)
|
||||
def format_single_change(self, info: CommitInfo):
|
||||
message, sep, rest = info.message.partition('\n')
|
||||
if '[' not in message:
|
||||
# If the message doesn't already contain markdown links, try to add a link to the commit
|
||||
message = self._format_message_link(message, info.commit.hash)
|
||||
|
||||
if info.issues:
|
||||
message = message.replace('\n', f' ({self._format_issues(info.issues)})\n', 1)
|
||||
message = f'{message} ({self._format_issues(info.issues)})'
|
||||
|
||||
if info.commit.authors:
|
||||
message = message.replace('\n', f' by {self._format_authors(info.commit.authors)}\n', 1)
|
||||
message = f'{message} by {self._format_authors(info.commit.authors)}'
|
||||
|
||||
if info.fixes:
|
||||
fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes)
|
||||
|
@ -219,16 +233,14 @@ def format_single_change(self, info):
|
|||
if authors != info.commit.authors:
|
||||
fix_message = f'{fix_message} by {self._format_authors(authors)}'
|
||||
|
||||
message = message.replace('\n', f' (With fixes in {fix_message})\n', 1)
|
||||
message = f'{message} (With fixes in {fix_message})'
|
||||
|
||||
return message[:-1]
|
||||
return message if not sep else f'{message}{sep}{rest}'
|
||||
|
||||
def _format_message_link(self, message, hash):
|
||||
assert message or hash, 'Improperly defined commit message or override'
|
||||
message = message if message else hash[:HASH_LENGTH]
|
||||
if not hash:
|
||||
return f'{message}\n'
|
||||
return f'[{message}\n'.replace('\n', f']({self.repo_url}/commit/{hash})\n', 1)
|
||||
return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message
|
||||
|
||||
def _format_issues(self, issues):
|
||||
return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues)
|
||||
|
@ -249,12 +261,12 @@ class CommitRange:
|
|||
AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE)
|
||||
MESSAGE_RE = re.compile(r'''
|
||||
(?:\[(?P<prefix>[^\]]+)\]\ )?
|
||||
(?:(?P<sub_details>`?[^:`]+`?): )?
|
||||
(?:(?P<sub_details>`?[\w.-]+`?): )?
|
||||
(?P<message>.+?)
|
||||
(?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))?
|
||||
''', re.VERBOSE | re.DOTALL)
|
||||
EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
|
||||
REVERT_RE = re.compile(r'(?i:Revert)\s+([\da-f]{40})')
|
||||
REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})')
|
||||
FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert)\s+([\da-f]{40})')
|
||||
UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
|
||||
|
||||
|
@ -318,7 +330,7 @@ def _get_commits_and_fixes(self, default_author):
|
|||
for commitish, revert_commit in reverts.items():
|
||||
reverted = commits.pop(commitish, None)
|
||||
if reverted:
|
||||
logger.debug(f'{commit} fully reverted {reverted}')
|
||||
logger.debug(f'{commitish} fully reverted {reverted}')
|
||||
else:
|
||||
commits[revert_commit.hash] = revert_commit
|
||||
|
||||
|
@ -337,7 +349,7 @@ def apply_overrides(self, overrides):
|
|||
for override in overrides:
|
||||
when = override.get('when')
|
||||
if when and when not in self and when != self._start:
|
||||
logger.debug(f'Ignored {when!r}, not in commits {self._start!r}')
|
||||
logger.debug(f'Ignored {when!r} override')
|
||||
continue
|
||||
|
||||
override_hash = override.get('hash') or when
|
||||
|
@ -365,7 +377,7 @@ def groups(self):
|
|||
for commit in self:
|
||||
upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short)
|
||||
if upstream_re:
|
||||
commit.short = f'[core/upstream] Merged with youtube-dl {upstream_re.group(1)}'
|
||||
commit.short = f'[upstream] Merged with youtube-dl {upstream_re.group(1)}'
|
||||
|
||||
match = self.MESSAGE_RE.fullmatch(commit.short)
|
||||
if not match:
|
||||
|
@ -410,25 +422,20 @@ def details_from_prefix(prefix):
|
|||
if not prefix:
|
||||
return CommitGroup.CORE, None, ()
|
||||
|
||||
prefix, _, details = prefix.partition('/')
|
||||
prefix = prefix.strip()
|
||||
details = details.strip()
|
||||
prefix, *sub_details = prefix.split(':')
|
||||
|
||||
group = CommitGroup.get(prefix.lower())
|
||||
if group is CommitGroup.PRIORITY:
|
||||
prefix, _, details = details.partition('/')
|
||||
group, details = CommitGroup.get(prefix)
|
||||
if group is CommitGroup.PRIORITY and details:
|
||||
details = details.partition('/')[2].strip()
|
||||
|
||||
if not details and prefix and prefix not in CommitGroup.ignorable_prefixes:
|
||||
logger.debug(f'Replaced details with {prefix!r}')
|
||||
details = prefix or None
|
||||
if details and '/' in details:
|
||||
logger.error(f'Prefix is overnested, using first part: {prefix}')
|
||||
details = details.partition('/')[0].strip()
|
||||
|
||||
if details == 'common':
|
||||
details = None
|
||||
|
||||
if details:
|
||||
details, *sub_details = details.split(':')
|
||||
else:
|
||||
sub_details = []
|
||||
elif group is CommitGroup.NETWORKING and details == 'rh':
|
||||
details = 'Request Handler'
|
||||
|
||||
return group, details, sub_details
|
||||
|
||||
|
|
|
@ -10,14 +10,14 @@
|
|||
import argparse
|
||||
import contextlib
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from devscripts.utils import read_version, run_process, write_file
|
||||
|
||||
|
||||
def get_new_version(version, revision):
|
||||
if not version:
|
||||
version = datetime.utcnow().strftime('%Y.%m.%d')
|
||||
version = datetime.now(timezone.utc).strftime('%Y.%m.%d')
|
||||
|
||||
if revision:
|
||||
assert revision.isdigit(), 'Revision must be a number'
|
||||
|
|
|
@ -4,3 +4,5 @@ websockets
|
|||
brotli; platform_python_implementation=='CPython'
|
||||
brotlicffi; platform_python_implementation!='CPython'
|
||||
certifi
|
||||
requests>=2.31.0,<3
|
||||
urllib3>=1.26.17,<3
|
9
setup.py
9
setup.py
|
@ -62,7 +62,14 @@ def py2exe_params():
|
|||
'compressed': 1,
|
||||
'optimize': 2,
|
||||
'dist_dir': './dist',
|
||||
'excludes': ['Crypto', 'Cryptodome'], # py2exe cannot import Crypto
|
||||
'excludes': [
|
||||
# py2exe cannot import Crypto
|
||||
'Crypto',
|
||||
'Cryptodome',
|
||||
# py2exe appears to confuse this with our socks library.
|
||||
# We don't use pysocks and urllib3.contrib.socks would fail to import if tried.
|
||||
'urllib3.contrib.socks'
|
||||
],
|
||||
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
|
||||
# Modules that are only imported dynamically must be added here
|
||||
'includes': ['yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated',
|
||||
|
|
|
@ -77,7 +77,7 @@ # Supported sites
|
|||
- **AnimalPlanet**
|
||||
- **ant1newsgr:article**: ant1news.gr articles
|
||||
- **ant1newsgr:embed**: ant1news.gr embedded videos
|
||||
- **ant1newsgr:watch**: ant1news.gr videos
|
||||
- **antenna:watch**: antenna.gr and ant1news.gr videos
|
||||
- **Anvato**
|
||||
- **aol.com**: Yahoo screen and movies
|
||||
- **APA**
|
||||
|
@ -98,8 +98,6 @@ # Supported sites
|
|||
- **ArteTVCategory**
|
||||
- **ArteTVEmbed**
|
||||
- **ArteTVPlaylist**
|
||||
- **AsianCrush**
|
||||
- **AsianCrushPlaylist**
|
||||
- **AtresPlayer**: [*atresplayer*](## "netrc machine")
|
||||
- **AtScaleConfEvent**
|
||||
- **ATTTechChannel**
|
||||
|
@ -118,6 +116,7 @@ # Supported sites
|
|||
- **awaan:live**
|
||||
- **awaan:season**
|
||||
- **awaan:video**
|
||||
- **axs.tv**
|
||||
- **AZMedien**: AZ Medien videos
|
||||
- **BaiduVideo**: 百度视频
|
||||
- **BanBye**
|
||||
|
@ -162,11 +161,16 @@ # Supported sites
|
|||
- **BilibiliAudioAlbum**
|
||||
- **BiliBiliBangumi**
|
||||
- **BiliBiliBangumiMedia**
|
||||
- **BiliBiliBangumiSeason**
|
||||
- **BilibiliCollectionList**
|
||||
- **BilibiliFavoritesList**
|
||||
- **BiliBiliPlayer**
|
||||
- **BilibiliPlaylist**
|
||||
- **BiliBiliSearch**: Bilibili video search; "bilisearch:" prefix
|
||||
- **BilibiliSeriesList**
|
||||
- **BilibiliSpaceAudio**
|
||||
- **BilibiliSpacePlaylist**
|
||||
- **BilibiliSpaceVideo**
|
||||
- **BilibiliWatchlater**
|
||||
- **BiliIntl**: [*biliintl*](## "netrc machine")
|
||||
- **biliIntl:series**: [*biliintl*](## "netrc machine")
|
||||
- **BiliLive**
|
||||
|
@ -201,6 +205,8 @@ # Supported sites
|
|||
- **BreitBart**
|
||||
- **brightcove:legacy**
|
||||
- **brightcove:new**
|
||||
- **Brilliantpala:Classes**: [*brilliantpala*](## "netrc machine") VoD on classes.brilliantpala.org
|
||||
- **Brilliantpala:Elearn**: [*brilliantpala*](## "netrc machine") VoD on elearn.brilliantpala.org
|
||||
- **BRMediathek**: Bayerischer Rundfunk Mediathek
|
||||
- **bt:article**: Bergens Tidende Articles
|
||||
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen
|
||||
|
@ -220,14 +226,17 @@ # Supported sites
|
|||
- **Camsoda**
|
||||
- **CamtasiaEmbed**
|
||||
- **CamWithHer**
|
||||
- **Canal1**
|
||||
- **CanalAlpha**
|
||||
- **canalc2.tv**
|
||||
- **Canalplus**: mycanal.fr and piwiplus.fr
|
||||
- **CaracolTvPlay**: [*caracoltv-play*](## "netrc machine")
|
||||
- **CarambaTV**
|
||||
- **CarambaTVPage**
|
||||
- **CartoonNetwork**
|
||||
- **cbc.ca**
|
||||
- **cbc.ca:player**
|
||||
- **cbc.ca:player:playlist**
|
||||
- **CBS**
|
||||
- **CBSInteractive**
|
||||
- **CBSLocal**
|
||||
|
@ -257,6 +266,8 @@ # Supported sites
|
|||
- **Cinchcast**
|
||||
- **Cinemax**
|
||||
- **CinetecaMilano**
|
||||
- **Cineverse**
|
||||
- **CineverseDetails**
|
||||
- **CiscoLiveSearch**
|
||||
- **CiscoLiveSession**
|
||||
- **ciscowebex**: Cisco Webex
|
||||
|
@ -365,7 +376,7 @@ # Supported sites
|
|||
- **Dotsub**
|
||||
- **Douyin**
|
||||
- **DouyuShow**
|
||||
- **DouyuTV**: 斗鱼
|
||||
- **DouyuTV**: 斗鱼直播
|
||||
- **DPlay**
|
||||
- **DRBonanza**
|
||||
- **Drooble**
|
||||
|
@ -408,8 +419,10 @@ # Supported sites
|
|||
- **Engadget**
|
||||
- **Epicon**
|
||||
- **EpiconSeries**
|
||||
- **eplus:inbound**: e+ (イープラス) overseas
|
||||
- **Epoch**
|
||||
- **Eporner**
|
||||
- **Erocast**
|
||||
- **EroProfile**: [*eroprofile*](## "netrc machine")
|
||||
- **EroProfile:album**
|
||||
- **ertflix**: ERTFLIX videos
|
||||
|
@ -644,6 +657,8 @@ # Supported sites
|
|||
- **Joj**
|
||||
- **Jove**
|
||||
- **JStream**
|
||||
- **JTBC**: jtbc.co.kr
|
||||
- **JTBC:program**
|
||||
- **JWPlatform**
|
||||
- **Kakao**
|
||||
- **Kaltura**
|
||||
|
@ -687,6 +702,7 @@ # Supported sites
|
|||
- **LastFMUser**
|
||||
- **lbry**
|
||||
- **lbry:channel**
|
||||
- **lbry:playlist**
|
||||
- **LCI**
|
||||
- **Lcp**
|
||||
- **LcpPlay**
|
||||
|
@ -732,6 +748,7 @@ # Supported sites
|
|||
- **lynda**: [*lynda*](## "netrc machine") lynda.com videos
|
||||
- **lynda:course**: [*lynda*](## "netrc machine") lynda.com online courses
|
||||
- **m6**
|
||||
- **MagellanTV**
|
||||
- **MagentaMusik360**
|
||||
- **mailru**: Видео@Mail.Ru
|
||||
- **mailru:music**: Музыка@Mail.Ru
|
||||
|
@ -751,6 +768,7 @@ # Supported sites
|
|||
- **massengeschmack.tv**
|
||||
- **Masters**
|
||||
- **MatchTV**
|
||||
- **MBN**: mbn.co.kr (매일방송)
|
||||
- **MDR**: MDR.DE and KiKA
|
||||
- **MedalTV**
|
||||
- **media.ccc.de**
|
||||
|
@ -812,6 +830,7 @@ # Supported sites
|
|||
- **Mofosex**
|
||||
- **MofosexEmbed**
|
||||
- **Mojvideo**
|
||||
- **Monstercat**
|
||||
- **MonsterSirenHypergryphMusic**
|
||||
- **Morningstar**: morningstar.com
|
||||
- **Motherless**
|
||||
|
@ -840,6 +859,7 @@ # Supported sites
|
|||
- **MujRozhlas**
|
||||
- **Murrtube**
|
||||
- **MurrtubeUser**: Murrtube user profile
|
||||
- **MuseAI**
|
||||
- **MuseScore**
|
||||
- **MusicdexAlbum**
|
||||
- **MusicdexArtist**
|
||||
|
@ -944,6 +964,9 @@ # Supported sites
|
|||
- **niconico:playlist**
|
||||
- **niconico:series**
|
||||
- **niconico:tag**: NicoNico video tag URLs
|
||||
- **NiconicoChannelPlus**: ニコニコチャンネルプラス
|
||||
- **NiconicoChannelPlus:channel:lives**: ニコニコチャンネルプラス - チャンネル - ライブリスト. nicochannel.jp/channel/lives
|
||||
- **NiconicoChannelPlus:channel:videos**: ニコニコチャンネルプラス - チャンネル - 動画リスト. nicochannel.jp/channel/videos
|
||||
- **NiconicoUser**
|
||||
- **nicovideo:search**: Nico video search; "nicosearch:" prefix
|
||||
- **nicovideo:search:date**: Nico video search, newest first; "nicosearchdate:" prefix
|
||||
|
@ -1046,6 +1069,7 @@ # Supported sites
|
|||
- **Patreon**
|
||||
- **PatreonCampaign**
|
||||
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
|
||||
- **PBSKids**
|
||||
- **PearVideo**
|
||||
- **PeekVids**
|
||||
- **peer.tv**
|
||||
|
@ -1062,6 +1086,7 @@ # Supported sites
|
|||
- **phoenix.de**
|
||||
- **Photobucket**
|
||||
- **Piapro**: [*piapro*](## "netrc machine")
|
||||
- **PIAULIZAPortal**: ulizaportal.jp - PIA LIVE STREAM
|
||||
- **Picarto**
|
||||
- **PicartoVod**
|
||||
- **Piksel**
|
||||
|
@ -1105,6 +1130,7 @@ # Supported sites
|
|||
- **polskieradio:podcast:list**
|
||||
- **Popcorntimes**
|
||||
- **PopcornTV**
|
||||
- **Pornbox**
|
||||
- **PornCom**
|
||||
- **PornerBros**
|
||||
- **Pornez**
|
||||
|
@ -1121,7 +1147,6 @@ # Supported sites
|
|||
- **PornTop**
|
||||
- **PornTube**
|
||||
- **Pr0gramm**
|
||||
- **Pr0grammStatic**
|
||||
- **PrankCast**
|
||||
- **PremiershipRugby**
|
||||
- **PressTV**
|
||||
|
@ -1156,6 +1181,10 @@ # Supported sites
|
|||
- **radiocanada**
|
||||
- **radiocanada:audiovideo**
|
||||
- **radiofrance**
|
||||
- **RadioFranceLive**
|
||||
- **RadioFrancePodcast**
|
||||
- **RadioFranceProfile**
|
||||
- **RadioFranceProgramSchedule**
|
||||
- **RadioJavan**
|
||||
- **radiokapital**
|
||||
- **radiokapital:show**
|
||||
|
@ -1177,6 +1206,7 @@ # Supported sites
|
|||
- **RayWenderlichCourse**
|
||||
- **RbgTum**
|
||||
- **RbgTumCourse**
|
||||
- **RbgTumNewCourse**
|
||||
- **RBMARadio**
|
||||
- **RCS**
|
||||
- **RCSEmbeds**
|
||||
|
@ -1259,6 +1289,8 @@ # Supported sites
|
|||
- **Ruutu**
|
||||
- **Ruv**
|
||||
- **ruv.is:spila**
|
||||
- **S4C**
|
||||
- **S4CSeries**
|
||||
- **safari**: [*safari*](## "netrc machine") safaribooksonline.com online video
|
||||
- **safari:api**: [*safari*](## "netrc machine")
|
||||
- **safari:course**: [*safari*](## "netrc machine") safaribooksonline.com online courses
|
||||
|
@ -1325,6 +1357,7 @@ # Supported sites
|
|||
- **Smotrim**
|
||||
- **Snotr**
|
||||
- **Sohu**
|
||||
- **SohuV**
|
||||
- **SonyLIV**: [*sonyliv*](## "netrc machine")
|
||||
- **SonyLIVSeries**
|
||||
- **soundcloud**: [*soundcloud*](## "netrc machine")
|
||||
|
@ -1378,7 +1411,6 @@ # Supported sites
|
|||
- **StoryFireSeries**
|
||||
- **StoryFireUser**
|
||||
- **Streamable**
|
||||
- **Streamanity**
|
||||
- **streamcloud.eu**
|
||||
- **StreamCZ**
|
||||
- **StreamFF**
|
||||
|
@ -1403,6 +1435,9 @@ # Supported sites
|
|||
- **Tagesschau**
|
||||
- **Tass**
|
||||
- **TBS**
|
||||
- **TBSJPEpisode**
|
||||
- **TBSJPPlaylist**
|
||||
- **TBSJPProgram**
|
||||
- **TDSLifeway**
|
||||
- **Teachable**: [*teachable*](## "netrc machine")
|
||||
- **TeachableCourse**: [*teachable*](## "netrc machine")
|
||||
|
@ -1436,6 +1471,7 @@ # Supported sites
|
|||
- **Tempo**
|
||||
- **TennisTV**: [*tennistv*](## "netrc machine")
|
||||
- **TenPlay**: [*10play*](## "netrc machine")
|
||||
- **TenPlaySeason**
|
||||
- **TF1**
|
||||
- **TFO**
|
||||
- **TheHoleTv**
|
||||
|
@ -1444,8 +1480,6 @@ # Supported sites
|
|||
- **ThePlatformFeed**
|
||||
- **TheStar**
|
||||
- **TheSun**
|
||||
- **ThetaStream**
|
||||
- **ThetaVideo**
|
||||
- **TheWeatherChannel**
|
||||
- **ThisAmericanLife**
|
||||
- **ThisAV**
|
||||
|
@ -1702,7 +1736,6 @@ # Supported sites
|
|||
- **wdr:mobile**: (**Currently broken**)
|
||||
- **WDRElefant**
|
||||
- **WDRPage**
|
||||
- **web.archive:vlive**: web.archive.org saved vlive videos
|
||||
- **web.archive:youtube**: web.archive.org saved youtube videos, "ytarchive:" prefix
|
||||
- **Webcamerapl**
|
||||
- **Webcaster**
|
||||
|
@ -1710,7 +1743,8 @@ # Supported sites
|
|||
- **WebOfStories**
|
||||
- **WebOfStoriesPlaylist**
|
||||
- **Weibo**
|
||||
- **WeiboMobile**
|
||||
- **WeiboUser**
|
||||
- **WeiboVideo**
|
||||
- **WeiqiTV**: WQTV
|
||||
- **wetv:episode**
|
||||
- **WeTvSeries**
|
||||
|
@ -1726,6 +1760,7 @@ # Supported sites
|
|||
- **Whyp**
|
||||
- **wikimedia.org**
|
||||
- **Willow**
|
||||
- **Wimbledon**
|
||||
- **WimTV**
|
||||
- **WinSportsVideo**
|
||||
- **Wistia**
|
||||
|
|
21
test/conftest.py
Normal file
21
test/conftest.py
Normal file
|
@ -0,0 +1,21 @@
|
|||
import functools
|
||||
import inspect
|
||||
|
||||
import pytest
|
||||
|
||||
from yt_dlp.networking import RequestHandler
|
||||
from yt_dlp.networking.common import _REQUEST_HANDLERS
|
||||
from yt_dlp.utils._utils import _YDLLogger as FakeLogger
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def handler(request):
|
||||
RH_KEY = request.param
|
||||
if inspect.isclass(RH_KEY) and issubclass(RH_KEY, RequestHandler):
|
||||
handler = RH_KEY
|
||||
elif RH_KEY in _REQUEST_HANDLERS:
|
||||
handler = _REQUEST_HANDLERS[RH_KEY]
|
||||
else:
|
||||
pytest.skip(f'{RH_KEY} request handler is not available')
|
||||
|
||||
return functools.partial(handler, logger=FakeLogger)
|
|
@ -26,7 +26,6 @@
|
|||
)
|
||||
from yt_dlp.utils.traversal import traverse_obj
|
||||
|
||||
|
||||
TEST_URL = 'http://localhost/sample.mp4'
|
||||
|
||||
|
||||
|
@ -632,7 +631,6 @@ def test_add_extra_info(self):
|
|||
self.assertEqual(test_dict['playlist'], 'funny videos')
|
||||
|
||||
outtmpl_info = {
|
||||
'id': '1234',
|
||||
'id': '1234',
|
||||
'ext': 'mp4',
|
||||
'width': None,
|
||||
|
@ -687,6 +685,7 @@ def test(tmpl, expected, *, info=None, **params):
|
|||
test('%(duration_string)s', ('27:46:40', '27-46-40'))
|
||||
test('%(resolution)s', '1080p')
|
||||
test('%(playlist_index|)s', '001')
|
||||
test('%(playlist_index&{}!)s', '1!')
|
||||
test('%(playlist_autonumber)s', '02')
|
||||
test('%(autonumber)s', '00001')
|
||||
test('%(autonumber+2)03d', '005', autonumber_start=3)
|
||||
|
@ -785,9 +784,9 @@ def expect_same_infodict(out):
|
|||
test('%(title4)#S', 'foo_bar_test')
|
||||
test('%(title4).10S', ('foo "bar" ', 'foo "bar"' + ('#' if compat_os_name == 'nt' else ' ')))
|
||||
if compat_os_name == 'nt':
|
||||
test('%(title4)q', ('"foo \\"bar\\" test"', ""foo ⧹"bar⧹" test""))
|
||||
test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', '"id 1" "id 2" "id 3"'))
|
||||
test('%(formats.0.id)#q', ('"id 1"', '"id 1"'))
|
||||
test('%(title4)q', ('"foo ""bar"" test"', None))
|
||||
test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', None))
|
||||
test('%(formats.0.id)#q', ('"id 1"', None))
|
||||
else:
|
||||
test('%(title4)q', ('\'foo "bar" test\'', '\'foo "bar" test\''))
|
||||
test('%(formats.:.id)#q', "'id 1' 'id 2' 'id 3'")
|
||||
|
@ -831,6 +830,7 @@ def expect_same_infodict(out):
|
|||
test('%(id&hi {:>10} {}|)s', 'hi 1234 1234')
|
||||
test(R'%(id&{0} {}|)s', 'NA')
|
||||
test(R'%(id&{0.1}|)s', 'NA')
|
||||
test('%(height&{:,d})S', '1,080')
|
||||
|
||||
# Laziness
|
||||
def gen():
|
||||
|
|
|
@ -17,10 +17,10 @@
|
|||
class TestYoutubeDLCookieJar(unittest.TestCase):
|
||||
def test_keep_session_cookies(self):
|
||||
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
|
||||
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||
cookiejar.load()
|
||||
tf = tempfile.NamedTemporaryFile(delete=False)
|
||||
try:
|
||||
cookiejar.save(filename=tf.name, ignore_discard=True, ignore_expires=True)
|
||||
cookiejar.save(filename=tf.name)
|
||||
temp = tf.read().decode()
|
||||
self.assertTrue(re.search(
|
||||
r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpiresEmpty\s+YoutubeDLExpiresEmptyValue', temp))
|
||||
|
@ -32,7 +32,7 @@ def test_keep_session_cookies(self):
|
|||
|
||||
def test_strip_httponly_prefix(self):
|
||||
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
|
||||
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||
cookiejar.load()
|
||||
|
||||
def assert_cookie_has_value(key):
|
||||
self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE')
|
||||
|
@ -42,20 +42,20 @@ def assert_cookie_has_value(key):
|
|||
|
||||
def test_malformed_cookies(self):
|
||||
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/malformed_cookies.txt')
|
||||
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||
cookiejar.load()
|
||||
# Cookies should be empty since all malformed cookie file entries
|
||||
# will be ignored
|
||||
self.assertFalse(cookiejar._cookies)
|
||||
|
||||
def test_get_cookie_header(self):
|
||||
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
|
||||
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||
cookiejar.load()
|
||||
header = cookiejar.get_cookie_header('https://www.foobar.foobar')
|
||||
self.assertIn('HTTPONLY_COOKIE', header)
|
||||
|
||||
def test_get_cookies_for_url(self):
|
||||
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
|
||||
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||
cookiejar.load()
|
||||
cookies = cookiejar.get_cookies_for_url('https://www.foobar.foobar/')
|
||||
self.assertEqual(len(cookies), 2)
|
||||
cookies = cookiejar.get_cookies_for_url('https://foobar.foobar/')
|
||||
|
|
|
@ -9,15 +9,16 @@
|
|||
|
||||
|
||||
import struct
|
||||
import urllib.parse
|
||||
|
||||
from yt_dlp import compat
|
||||
from yt_dlp.compat import urllib # isort: split
|
||||
from yt_dlp.compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_expanduser,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from yt_dlp.compat.urllib.request import getproxies
|
||||
|
||||
|
||||
class TestCompat(unittest.TestCase):
|
||||
|
@ -28,8 +29,7 @@ def test_compat_passthrough(self):
|
|||
with self.assertWarns(DeprecationWarning):
|
||||
compat.WINDOWS_VT_MODE
|
||||
|
||||
# TODO: Test submodule
|
||||
# compat.asyncio.events # Must not raise error
|
||||
self.assertEqual(urllib.request.getproxies, getproxies)
|
||||
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
compat.compat_pycrypto_AES # Must not raise error
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
DownloadError,
|
||||
ExtractorError,
|
||||
UnavailableVideoError,
|
||||
YoutubeDLError,
|
||||
format_bytes,
|
||||
join_nonempty,
|
||||
)
|
||||
|
@ -100,6 +101,8 @@ def print_skipping(reason):
|
|||
print_skipping('IE marked as not _WORKING')
|
||||
|
||||
for tc in test_cases:
|
||||
if tc.get('expected_exception'):
|
||||
continue
|
||||
info_dict = tc.get('info_dict', {})
|
||||
params = tc.get('params', {})
|
||||
if not info_dict.get('id'):
|
||||
|
@ -139,6 +142,17 @@ def get_tc_filename(tc):
|
|||
|
||||
res_dict = None
|
||||
|
||||
def match_exception(err):
|
||||
expected_exception = test_case.get('expected_exception')
|
||||
if not expected_exception:
|
||||
return False
|
||||
if err.__class__.__name__ == expected_exception:
|
||||
return True
|
||||
for exc in err.exc_info:
|
||||
if exc.__class__.__name__ == expected_exception:
|
||||
return True
|
||||
return False
|
||||
|
||||
def try_rm_tcs_files(tcs=None):
|
||||
if tcs is None:
|
||||
tcs = test_cases
|
||||
|
@ -161,6 +175,8 @@ def try_rm_tcs_files(tcs=None):
|
|||
except (DownloadError, ExtractorError) as err:
|
||||
# Check if the exception is not a network related one
|
||||
if not isinstance(err.exc_info[1], (TransportError, UnavailableVideoError)) or (isinstance(err.exc_info[1], HTTPError) and err.exc_info[1].status == 503):
|
||||
if match_exception(err):
|
||||
return
|
||||
err.msg = f'{getattr(err, "msg", err)} ({tname})'
|
||||
raise
|
||||
|
||||
|
@ -171,6 +187,10 @@ def try_rm_tcs_files(tcs=None):
|
|||
print(f'Retrying: {try_num} failed tries\n\n##########\n\n')
|
||||
|
||||
try_num += 1
|
||||
except YoutubeDLError as err:
|
||||
if match_exception(err):
|
||||
return
|
||||
raise
|
||||
else:
|
||||
break
|
||||
|
||||
|
|
|
@ -129,6 +129,11 @@ def test_make_cmd(self):
|
|||
'ffmpeg', '-y', '-hide_banner', '-cookies', 'test=ytdlp; path=/; domain=.example.com;\r\n',
|
||||
'-i', 'http://www.example.com/', '-c', 'copy', '-f', 'mp4', 'file:test'])
|
||||
|
||||
# Test with non-url input (ffmpeg reads from stdin '-' for websockets)
|
||||
downloader._call_downloader('test', {'url': 'x', 'ext': 'mp4'})
|
||||
self.assertEqual(self._args, [
|
||||
'ffmpeg', '-y', '-hide_banner', '-i', 'x', '-c', 'copy', '-f', 'mp4', 'file:test'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -45,6 +45,9 @@ def test_lazy_extractors(self):
|
|||
self.assertTrue(os.path.exists(LAZY_EXTRACTORS))
|
||||
|
||||
_, stderr = self.run_yt_dlp(opts=('-s', 'test:'))
|
||||
# `MIN_RECOMMENDED` emits a deprecated feature warning for deprecated python versions
|
||||
if stderr and stderr.startswith('Deprecated Feature: Support for Python'):
|
||||
stderr = ''
|
||||
self.assertFalse(stderr)
|
||||
|
||||
subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=subprocess.DEVNULL)
|
||||
|
|
|
@ -8,12 +8,10 @@
|
|||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import functools
|
||||
import gzip
|
||||
import http.client
|
||||
import http.cookiejar
|
||||
import http.server
|
||||
import inspect
|
||||
import io
|
||||
import pathlib
|
||||
import random
|
||||
|
@ -29,7 +27,8 @@
|
|||
from http.cookiejar import CookieJar
|
||||
|
||||
from test.helper import FakeYDL, http_server_port
|
||||
from yt_dlp.dependencies import brotli
|
||||
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||
from yt_dlp.dependencies import brotli, requests, urllib3
|
||||
from yt_dlp.networking import (
|
||||
HEADRequest,
|
||||
PUTRequest,
|
||||
|
@ -39,12 +38,12 @@
|
|||
Response,
|
||||
)
|
||||
from yt_dlp.networking._urllib import UrllibRH
|
||||
from yt_dlp.networking.common import _REQUEST_HANDLERS
|
||||
from yt_dlp.networking.exceptions import (
|
||||
CertificateVerifyError,
|
||||
HTTPError,
|
||||
IncompleteRead,
|
||||
NoSupportingHandlers,
|
||||
ProxyError,
|
||||
RequestError,
|
||||
SSLError,
|
||||
TransportError,
|
||||
|
@ -173,6 +172,12 @@ def do_GET(self):
|
|||
self.send_header('Location', self.path)
|
||||
self.send_header('Content-Length', '0')
|
||||
self.end_headers()
|
||||
elif self.path == '/redirect_dotsegments':
|
||||
self.send_response(301)
|
||||
# redirect to /headers but with dot segments before
|
||||
self.send_header('Location', '/a/b/./../../headers')
|
||||
self.send_header('Content-Length', '0')
|
||||
self.end_headers()
|
||||
elif self.path.startswith('/redirect_'):
|
||||
self._redirect()
|
||||
elif self.path.startswith('/method'):
|
||||
|
@ -300,21 +305,8 @@ def setup_class(cls):
|
|||
cls.https_server_thread.start()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def handler(request):
|
||||
RH_KEY = request.param
|
||||
if inspect.isclass(RH_KEY) and issubclass(RH_KEY, RequestHandler):
|
||||
handler = RH_KEY
|
||||
elif RH_KEY in _REQUEST_HANDLERS:
|
||||
handler = _REQUEST_HANDLERS[RH_KEY]
|
||||
else:
|
||||
pytest.skip(f'{RH_KEY} request handler is not available')
|
||||
|
||||
return functools.partial(handler, logger=FakeLogger)
|
||||
|
||||
|
||||
class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_verify_cert(self, handler):
|
||||
with handler() as rh:
|
||||
with pytest.raises(CertificateVerifyError):
|
||||
|
@ -325,7 +317,7 @@ def test_verify_cert(self, handler):
|
|||
assert r.status == 200
|
||||
r.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_ssl_error(self, handler):
|
||||
# HTTPS server with too old TLS version
|
||||
# XXX: is there a better way to test this than to create a new server?
|
||||
|
@ -343,7 +335,7 @@ def test_ssl_error(self, handler):
|
|||
validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
|
||||
assert not issubclass(exc_info.type, CertificateVerifyError)
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_percent_encode(self, handler):
|
||||
with handler() as rh:
|
||||
# Unicode characters should be encoded with uppercase percent-encoding
|
||||
|
@ -355,14 +347,29 @@ def test_percent_encode(self, handler):
|
|||
assert res.status == 200
|
||||
res.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_remove_dot_segments(self, handler):
|
||||
with handler() as rh:
|
||||
# This isn't a comprehensive test,
|
||||
# but it should be enough to check whether the handler is removing dot segments
|
||||
res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/a/b/./../../headers'))
|
||||
assert res.status == 200
|
||||
assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
|
||||
res.close()
|
||||
|
||||
res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_dotsegments'))
|
||||
assert res.status == 200
|
||||
assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
|
||||
res.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_unicode_path_redirection(self, handler):
|
||||
with handler() as rh:
|
||||
r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
|
||||
assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
|
||||
r.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_raise_http_error(self, handler):
|
||||
with handler() as rh:
|
||||
for bad_status in (400, 500, 599, 302):
|
||||
|
@ -372,7 +379,7 @@ def test_raise_http_error(self, handler):
|
|||
# Should not raise an error
|
||||
validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_response_url(self, handler):
|
||||
with handler() as rh:
|
||||
# Response url should be that of the last url in redirect chain
|
||||
|
@ -383,7 +390,7 @@ def test_response_url(self, handler):
|
|||
assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200'
|
||||
res2.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_redirect(self, handler):
|
||||
with handler() as rh:
|
||||
def do_req(redirect_status, method, assert_no_content=False):
|
||||
|
@ -438,7 +445,7 @@ def do_req(redirect_status, method, assert_no_content=False):
|
|||
with pytest.raises(HTTPError):
|
||||
do_req(code, 'GET')
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_request_cookie_header(self, handler):
|
||||
# We should accept a Cookie header being passed as in normal headers and handle it appropriately.
|
||||
with handler() as rh:
|
||||
|
@ -457,7 +464,7 @@ def test_request_cookie_header(self, handler):
|
|||
assert 'Cookie: test=test' not in res
|
||||
|
||||
# Specified Cookie header should override global cookiejar for that request
|
||||
cookiejar = http.cookiejar.CookieJar()
|
||||
cookiejar = YoutubeDLCookieJar()
|
||||
cookiejar.set_cookie(http.cookiejar.Cookie(
|
||||
version=0, name='test', value='ytdlp', port=None, port_specified=False,
|
||||
domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/',
|
||||
|
@ -470,21 +477,21 @@ def test_request_cookie_header(self, handler):
|
|||
assert b'Cookie: test=ytdlp' not in data
|
||||
assert b'Cookie: test=test' in data
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_redirect_loop(self, handler):
|
||||
with handler() as rh:
|
||||
with pytest.raises(HTTPError, match='redirect loop'):
|
||||
validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_incompleteread(self, handler):
|
||||
with handler(timeout=2) as rh:
|
||||
with pytest.raises(IncompleteRead):
|
||||
validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_cookies(self, handler):
|
||||
cookiejar = http.cookiejar.CookieJar()
|
||||
cookiejar = YoutubeDLCookieJar()
|
||||
cookiejar.set_cookie(http.cookiejar.Cookie(
|
||||
0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
|
||||
False, '/headers', True, False, None, False, None, None, {}))
|
||||
|
@ -499,7 +506,7 @@ def test_cookies(self, handler):
|
|||
rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
|
||||
assert b'Cookie: test=ytdlp' in data
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_headers(self, handler):
|
||||
|
||||
with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
|
||||
|
@ -515,7 +522,7 @@ def test_headers(self, handler):
|
|||
assert b'Test2: test2' not in data
|
||||
assert b'Test3: test3' in data
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_timeout(self, handler):
|
||||
with handler() as rh:
|
||||
# Default timeout is 20 seconds, so this should go through
|
||||
|
@ -531,7 +538,7 @@ def test_timeout(self, handler):
|
|||
validate_and_send(
|
||||
rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_source_address(self, handler):
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
with handler(source_address=source_address) as rh:
|
||||
|
@ -539,13 +546,13 @@ def test_source_address(self, handler):
|
|||
rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
|
||||
assert source_address == data
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_gzip_trailing_garbage(self, handler):
|
||||
with handler() as rh:
|
||||
data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
|
||||
assert data == '<html><video src="/vid.mp4" /></html>'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
@pytest.mark.skipif(not brotli, reason='brotli support is not installed')
|
||||
def test_brotli(self, handler):
|
||||
with handler() as rh:
|
||||
|
@ -556,7 +563,7 @@ def test_brotli(self, handler):
|
|||
assert res.headers.get('Content-Encoding') == 'br'
|
||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_deflate(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(
|
||||
|
@ -566,7 +573,7 @@ def test_deflate(self, handler):
|
|||
assert res.headers.get('Content-Encoding') == 'deflate'
|
||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_gzip(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(
|
||||
|
@ -576,7 +583,7 @@ def test_gzip(self, handler):
|
|||
assert res.headers.get('Content-Encoding') == 'gzip'
|
||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_multiple_encodings(self, handler):
|
||||
with handler() as rh:
|
||||
for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
|
||||
|
@ -587,7 +594,7 @@ def test_multiple_encodings(self, handler):
|
|||
assert res.headers.get('Content-Encoding') == pair
|
||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_unsupported_encoding(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(
|
||||
|
@ -597,7 +604,7 @@ def test_unsupported_encoding(self, handler):
|
|||
assert res.headers.get('Content-Encoding') == 'unsupported'
|
||||
assert res.read() == b'raw'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_read(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(
|
||||
|
@ -627,7 +634,7 @@ def setup_class(cls):
|
|||
cls.geo_proxy_thread.daemon = True
|
||||
cls.geo_proxy_thread.start()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_http_proxy(self, handler):
|
||||
http_proxy = f'http://127.0.0.1:{self.proxy_port}'
|
||||
geo_proxy = f'http://127.0.0.1:{self.geo_port}'
|
||||
|
@ -653,7 +660,7 @@ def test_http_proxy(self, handler):
|
|||
assert res != f'normal: {real_url}'
|
||||
assert 'Accept' in res
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_noproxy(self, handler):
|
||||
with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh:
|
||||
# NO_PROXY
|
||||
|
@ -663,7 +670,7 @@ def test_noproxy(self, handler):
|
|||
'utf-8')
|
||||
assert 'Accept' in nop_response
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_allproxy(self, handler):
|
||||
url = 'http://foo.com/bar'
|
||||
with handler() as rh:
|
||||
|
@ -671,7 +678,7 @@ def test_allproxy(self, handler):
|
|||
'utf-8')
|
||||
assert response == f'normal: {url}'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_http_proxy_with_idn(self, handler):
|
||||
with handler(proxies={
|
||||
'http': f'http://127.0.0.1:{self.proxy_port}',
|
||||
|
@ -709,27 +716,27 @@ def _run_test(self, handler, **handler_kwargs):
|
|||
) as rh:
|
||||
validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_certificate_combined_nopass(self, handler):
|
||||
self._run_test(handler, client_cert={
|
||||
'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
|
||||
})
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_certificate_nocombined_nopass(self, handler):
|
||||
self._run_test(handler, client_cert={
|
||||
'client_certificate': os.path.join(self.certdir, 'client.crt'),
|
||||
'client_certificate_key': os.path.join(self.certdir, 'client.key'),
|
||||
})
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_certificate_combined_pass(self, handler):
|
||||
self._run_test(handler, client_cert={
|
||||
'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
|
||||
'client_certificate_password': 'foobar',
|
||||
})
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_certificate_nocombined_pass(self, handler):
|
||||
self._run_test(handler, client_cert={
|
||||
'client_certificate': os.path.join(self.certdir, 'client.crt'),
|
||||
|
@ -785,29 +792,107 @@ def test_verify_cert_error_text(self, handler):
|
|||
validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
def test_httplib_validation_errors(self, handler):
|
||||
@pytest.mark.parametrize('req,match,version_check', [
|
||||
# https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
|
||||
# bpo-39603: Check implemented in 3.7.9+, 3.8.5+
|
||||
(
|
||||
Request('http://127.0.0.1', method='GET\n'),
|
||||
'method can\'t contain control characters',
|
||||
lambda v: v < (3, 7, 9) or (3, 8, 0) <= v < (3, 8, 5)
|
||||
),
|
||||
# https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265
|
||||
# bpo-38576: Check implemented in 3.7.8+, 3.8.3+
|
||||
(
|
||||
Request('http://127.0.0. 1', method='GET'),
|
||||
'URL can\'t contain control characters',
|
||||
lambda v: v < (3, 7, 8) or (3, 8, 0) <= v < (3, 8, 3)
|
||||
),
|
||||
# https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50
|
||||
(Request('http://127.0.0.1', headers={'foo\n': 'bar'}), 'Invalid header name', None),
|
||||
])
|
||||
def test_httplib_validation_errors(self, handler, req, match, version_check):
|
||||
if version_check and version_check(sys.version_info):
|
||||
pytest.skip(f'Python {sys.version} version does not have the required validation for this test.')
|
||||
|
||||
with handler() as rh:
|
||||
|
||||
# https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
|
||||
with pytest.raises(RequestError, match='method can\'t contain control characters') as exc_info:
|
||||
validate_and_send(rh, Request('http://127.0.0.1', method='GET\n'))
|
||||
assert not isinstance(exc_info.value, TransportError)
|
||||
|
||||
# https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265
|
||||
with pytest.raises(RequestError, match='URL can\'t contain control characters') as exc_info:
|
||||
validate_and_send(rh, Request('http://127.0.0. 1', method='GET\n'))
|
||||
assert not isinstance(exc_info.value, TransportError)
|
||||
|
||||
# https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50
|
||||
with pytest.raises(RequestError, match='Invalid header name') as exc_info:
|
||||
validate_and_send(rh, Request('http://127.0.0.1', headers={'foo\n': 'bar'}))
|
||||
with pytest.raises(RequestError, match=match) as exc_info:
|
||||
validate_and_send(rh, req)
|
||||
assert not isinstance(exc_info.value, TransportError)
|
||||
|
||||
|
||||
def run_validation(handler, fail, req, **handler_kwargs):
|
||||
class TestRequestsRequestHandler(TestRequestHandlerBase):
|
||||
@pytest.mark.parametrize('raised,expected', [
|
||||
(lambda: requests.exceptions.ConnectTimeout(), TransportError),
|
||||
(lambda: requests.exceptions.ReadTimeout(), TransportError),
|
||||
(lambda: requests.exceptions.Timeout(), TransportError),
|
||||
(lambda: requests.exceptions.ConnectionError(), TransportError),
|
||||
(lambda: requests.exceptions.ProxyError(), ProxyError),
|
||||
(lambda: requests.exceptions.SSLError('12[CERTIFICATE_VERIFY_FAILED]34'), CertificateVerifyError),
|
||||
(lambda: requests.exceptions.SSLError(), SSLError),
|
||||
(lambda: requests.exceptions.InvalidURL(), RequestError),
|
||||
(lambda: requests.exceptions.InvalidHeader(), RequestError),
|
||||
# catch-all: https://github.com/psf/requests/blob/main/src/requests/adapters.py#L535
|
||||
(lambda: urllib3.exceptions.HTTPError(), TransportError),
|
||||
(lambda: requests.exceptions.RequestException(), RequestError)
|
||||
# (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object
|
||||
])
|
||||
@pytest.mark.parametrize('handler', ['Requests'], indirect=True)
|
||||
def test_request_error_mapping(self, handler, monkeypatch, raised, expected):
|
||||
with handler() as rh:
|
||||
def mock_get_instance(*args, **kwargs):
|
||||
class MockSession:
|
||||
def request(self, *args, **kwargs):
|
||||
raise raised()
|
||||
return MockSession()
|
||||
|
||||
monkeypatch.setattr(rh, '_get_instance', mock_get_instance)
|
||||
|
||||
with pytest.raises(expected) as exc_info:
|
||||
rh.send(Request('http://fake'))
|
||||
|
||||
assert exc_info.type is expected
|
||||
|
||||
@pytest.mark.parametrize('raised,expected,match', [
|
||||
(lambda: urllib3.exceptions.SSLError(), SSLError, None),
|
||||
(lambda: urllib3.exceptions.TimeoutError(), TransportError, None),
|
||||
(lambda: urllib3.exceptions.ReadTimeoutError(None, None, None), TransportError, None),
|
||||
(lambda: urllib3.exceptions.ProtocolError(), TransportError, None),
|
||||
(lambda: urllib3.exceptions.DecodeError(), TransportError, None),
|
||||
(lambda: urllib3.exceptions.HTTPError(), TransportError, None), # catch-all
|
||||
(
|
||||
lambda: urllib3.exceptions.ProtocolError('error', http.client.IncompleteRead(partial=b'abc', expected=4)),
|
||||
IncompleteRead,
|
||||
'3 bytes read, 4 more expected'
|
||||
),
|
||||
(
|
||||
lambda: urllib3.exceptions.ProtocolError('error', urllib3.exceptions.IncompleteRead(partial=3, expected=5)),
|
||||
IncompleteRead,
|
||||
'3 bytes read, 5 more expected'
|
||||
),
|
||||
])
|
||||
@pytest.mark.parametrize('handler', ['Requests'], indirect=True)
|
||||
def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
|
||||
from urllib3.response import HTTPResponse as Urllib3Response
|
||||
from requests.models import Response as RequestsResponse
|
||||
from yt_dlp.networking._requests import RequestsResponseAdapter
|
||||
requests_res = RequestsResponse()
|
||||
requests_res.raw = Urllib3Response(body=b'', status=200)
|
||||
res = RequestsResponseAdapter(requests_res)
|
||||
|
||||
def mock_read(*args, **kwargs):
|
||||
raise raised()
|
||||
monkeypatch.setattr(res.fp, 'read', mock_read)
|
||||
|
||||
with pytest.raises(expected, match=match) as exc_info:
|
||||
res.read()
|
||||
|
||||
assert exc_info.type is expected
|
||||
|
||||
|
||||
def run_validation(handler, error, req, **handler_kwargs):
|
||||
with handler(**handler_kwargs) as rh:
|
||||
if fail:
|
||||
with pytest.raises(UnsupportedRequest):
|
||||
if error:
|
||||
with pytest.raises(error):
|
||||
rh.validate(req)
|
||||
else:
|
||||
rh.validate(req)
|
||||
|
@ -824,6 +909,9 @@ class NoCheckRH(ValidationRH):
|
|||
_SUPPORTED_PROXY_SCHEMES = None
|
||||
_SUPPORTED_URL_SCHEMES = None
|
||||
|
||||
def _check_extensions(self, extensions):
|
||||
extensions.clear()
|
||||
|
||||
class HTTPSupportedRH(ValidationRH):
|
||||
_SUPPORTED_URL_SCHEMES = ('http',)
|
||||
|
||||
|
@ -834,26 +922,38 @@ class HTTPSupportedRH(ValidationRH):
|
|||
('https', False, {}),
|
||||
('data', False, {}),
|
||||
('ftp', False, {}),
|
||||
('file', True, {}),
|
||||
('file', UnsupportedRequest, {}),
|
||||
('file', False, {'enable_file_urls': True}),
|
||||
]),
|
||||
('Requests', [
|
||||
('http', False, {}),
|
||||
('https', False, {}),
|
||||
]),
|
||||
(NoCheckRH, [('http', False, {})]),
|
||||
(ValidationRH, [('http', True, {})])
|
||||
(ValidationRH, [('http', UnsupportedRequest, {})])
|
||||
]
|
||||
|
||||
PROXY_SCHEME_TESTS = [
|
||||
# scheme, expected to fail
|
||||
('Urllib', [
|
||||
('http', False),
|
||||
('https', True),
|
||||
('https', UnsupportedRequest),
|
||||
('socks4', False),
|
||||
('socks4a', False),
|
||||
('socks5', False),
|
||||
('socks5h', False),
|
||||
('socks', UnsupportedRequest),
|
||||
]),
|
||||
('Requests', [
|
||||
('http', False),
|
||||
('https', False),
|
||||
('socks4', False),
|
||||
('socks4a', False),
|
||||
('socks5', False),
|
||||
('socks5h', False),
|
||||
('socks', True),
|
||||
]),
|
||||
(NoCheckRH, [('http', False)]),
|
||||
(HTTPSupportedRH, [('http', True)]),
|
||||
(HTTPSupportedRH, [('http', UnsupportedRequest)]),
|
||||
]
|
||||
|
||||
PROXY_KEY_TESTS = [
|
||||
|
@ -862,9 +962,35 @@ class HTTPSupportedRH(ValidationRH):
|
|||
('all', False),
|
||||
('unrelated', False),
|
||||
]),
|
||||
('Requests', [
|
||||
('all', False),
|
||||
('unrelated', False),
|
||||
]),
|
||||
(NoCheckRH, [('all', False)]),
|
||||
(HTTPSupportedRH, [('all', True)]),
|
||||
(HTTPSupportedRH, [('no', True)]),
|
||||
(HTTPSupportedRH, [('all', UnsupportedRequest)]),
|
||||
(HTTPSupportedRH, [('no', UnsupportedRequest)]),
|
||||
]
|
||||
|
||||
EXTENSION_TESTS = [
|
||||
('Urllib', [
|
||||
({'cookiejar': 'notacookiejar'}, AssertionError),
|
||||
({'cookiejar': YoutubeDLCookieJar()}, False),
|
||||
({'cookiejar': CookieJar()}, AssertionError),
|
||||
({'timeout': 1}, False),
|
||||
({'timeout': 'notatimeout'}, AssertionError),
|
||||
({'unsupported': 'value'}, UnsupportedRequest),
|
||||
]),
|
||||
('Requests', [
|
||||
({'cookiejar': 'notacookiejar'}, AssertionError),
|
||||
({'cookiejar': YoutubeDLCookieJar()}, False),
|
||||
({'timeout': 1}, False),
|
||||
({'timeout': 'notatimeout'}, AssertionError),
|
||||
({'unsupported': 'value'}, UnsupportedRequest),
|
||||
]),
|
||||
(NoCheckRH, [
|
||||
({'cookiejar': 'notacookiejar'}, False),
|
||||
({'somerandom': 'test'}, False), # but any extension is allowed through
|
||||
]),
|
||||
]
|
||||
|
||||
@pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
|
||||
|
@ -876,7 +1002,7 @@ class HTTPSupportedRH(ValidationRH):
|
|||
def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
|
||||
run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
|
||||
|
||||
@pytest.mark.parametrize('handler,fail', [('Urllib', False)], indirect=['handler'])
|
||||
@pytest.mark.parametrize('handler,fail', [('Urllib', False), ('Requests', False)], indirect=['handler'])
|
||||
def test_no_proxy(self, handler, fail):
|
||||
run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'}))
|
||||
run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'})
|
||||
|
@ -899,23 +1025,24 @@ def test_proxy_scheme(self, handler, scheme, fail):
|
|||
run_validation(handler, fail, Request('http://', proxies={'http': f'{scheme}://example.com'}))
|
||||
run_validation(handler, fail, Request('http://'), proxies={'http': f'{scheme}://example.com'})
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH, 'Requests'], indirect=True)
|
||||
def test_empty_proxy(self, handler):
|
||||
run_validation(handler, False, Request('http://', proxies={'http': None}))
|
||||
run_validation(handler, False, Request('http://'), proxies={'http': None})
|
||||
|
||||
@pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1'])
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
def test_missing_proxy_scheme(self, handler, proxy_url):
|
||||
run_validation(handler, True, Request('http://', proxies={'http': 'example.com'}))
|
||||
@pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_invalid_proxy_url(self, handler, proxy_url):
|
||||
run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
def test_cookiejar_extension(self, handler):
|
||||
run_validation(handler, True, Request('http://', extensions={'cookiejar': 'notacookiejar'}))
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
def test_timeout_extension(self, handler):
|
||||
run_validation(handler, True, Request('http://', extensions={'timeout': 'notavalidtimeout'}))
|
||||
@pytest.mark.parametrize('handler,extensions,fail', [
|
||||
(handler_tests[0], extensions, fail)
|
||||
for handler_tests in EXTENSION_TESTS
|
||||
for extensions, fail in handler_tests[1]
|
||||
], indirect=['handler'])
|
||||
def test_extension(self, handler, extensions, fail):
|
||||
run_validation(
|
||||
handler, fail, Request('http://', extensions=extensions))
|
||||
|
||||
def test_invalid_request_type(self):
|
||||
rh = self.ValidationRH(logger=FakeLogger())
|
||||
|
@ -985,17 +1112,17 @@ def test_send(self):
|
|||
assert isinstance(director.send(Request('http://')), FakeResponse)
|
||||
|
||||
def test_unsupported_handlers(self):
|
||||
director = RequestDirector(logger=FakeLogger())
|
||||
director.add_handler(FakeRH(logger=FakeLogger()))
|
||||
|
||||
class SupportedRH(RequestHandler):
|
||||
_SUPPORTED_URL_SCHEMES = ['http']
|
||||
|
||||
def _send(self, request: Request):
|
||||
return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
|
||||
|
||||
# This handler should by default take preference over FakeRH
|
||||
director = RequestDirector(logger=FakeLogger())
|
||||
director.add_handler(SupportedRH(logger=FakeLogger()))
|
||||
director.add_handler(FakeRH(logger=FakeLogger()))
|
||||
|
||||
# First should take preference
|
||||
assert director.send(Request('http://')).read() == b'supported'
|
||||
assert director.send(Request('any://')).read() == b''
|
||||
|
||||
|
@ -1022,6 +1149,27 @@ def _send(self, request: Request):
|
|||
director.add_handler(UnexpectedRH(logger=FakeLogger))
|
||||
assert director.send(Request('any://'))
|
||||
|
||||
def test_preference(self):
|
||||
director = RequestDirector(logger=FakeLogger())
|
||||
director.add_handler(FakeRH(logger=FakeLogger()))
|
||||
|
||||
class SomeRH(RequestHandler):
|
||||
_SUPPORTED_URL_SCHEMES = ['http']
|
||||
|
||||
def _send(self, request: Request):
|
||||
return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
|
||||
|
||||
def some_preference(rh, request):
|
||||
return (0 if not isinstance(rh, SomeRH)
|
||||
else 100 if 'prefer' in request.headers
|
||||
else -1)
|
||||
|
||||
director.add_handler(SomeRH(logger=FakeLogger()))
|
||||
director.preferences.add(some_preference)
|
||||
|
||||
assert director.send(Request('http://')).read() == b''
|
||||
assert director.send(Request('http://', headers={'prefer': '1'})).read() == b'supported'
|
||||
|
||||
|
||||
# XXX: do we want to move this to test_YoutubeDL.py?
|
||||
class TestYoutubeDLNetworking:
|
||||
|
@ -1099,9 +1247,11 @@ def test_legacy_server_connect_error(self):
|
|||
('http', '__noproxy__', None),
|
||||
('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'),
|
||||
('https', 'example.com', 'http://example.com'),
|
||||
('https', '//example.com', 'http://example.com'),
|
||||
('https', 'socks5://example.com', 'socks5h://example.com'),
|
||||
('http', 'socks://example.com', 'socks4://example.com'),
|
||||
('http', 'socks4://example.com', 'socks4://example.com'),
|
||||
('unrelated', '/bad/proxy', '/bad/proxy'), # clean_proxies should ignore bad proxies
|
||||
])
|
||||
def test_clean_proxy(self, proxy_key, proxy_url, expected):
|
||||
# proxies should be cleaned in urlopen()
|
||||
|
@ -1185,6 +1335,13 @@ def test_urllib_file_urls(self):
|
|||
rh = self.build_handler(ydl, UrllibRH)
|
||||
assert rh.enable_file_urls is True
|
||||
|
||||
def test_compat_opt_prefer_urllib(self):
|
||||
# This assumes urllib only has a preference when this compat opt is given
|
||||
with FakeYDL({'compat_opts': ['prefer-legacy-http-handler']}) as ydl:
|
||||
director = ydl.build_request_director([UrllibRH])
|
||||
assert len(director.preferences) == 1
|
||||
assert director.preferences.pop()(UrllibRH, None)
|
||||
|
||||
|
||||
class TestRequest:
|
||||
|
||||
|
|
|
@ -95,17 +95,20 @@ def test_make_socks_proxy_unknown(self):
|
|||
|
||||
@pytest.mark.skipif(not certifi, reason='certifi is not installed')
|
||||
def test_load_certifi(self):
|
||||
context_certifi = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||
context_certifi.load_verify_locations(cafile=certifi.where())
|
||||
context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||
context2 = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||
ssl_load_certs(context, use_certifi=True)
|
||||
context2.load_verify_locations(cafile=certifi.where())
|
||||
assert context.get_ca_certs() == context2.get_ca_certs()
|
||||
assert context.get_ca_certs() == context_certifi.get_ca_certs()
|
||||
|
||||
# Test load normal certs
|
||||
# XXX: could there be a case where system certs are the same as certifi?
|
||||
context3 = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||
ssl_load_certs(context3, use_certifi=False)
|
||||
assert context3.get_ca_certs() != context.get_ca_certs()
|
||||
context_default = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||
context_default.load_default_certs()
|
||||
context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||
ssl_load_certs(context, use_certifi=False)
|
||||
assert context.get_ca_certs() == context_default.get_ca_certs()
|
||||
|
||||
if context_default.get_ca_certs() == context_certifi.get_ca_certs():
|
||||
pytest.skip('System uses certifi as default. The test is not valid')
|
||||
|
||||
@pytest.mark.parametrize('method,status,expected', [
|
||||
('GET', 303, 'GET'),
|
||||
|
@ -266,14 +269,14 @@ def test_compat_http_error_autoclose(self):
|
|||
assert not response.closed
|
||||
|
||||
def test_incomplete_read_error(self):
|
||||
error = IncompleteRead(b'test', 3, cause='test')
|
||||
error = IncompleteRead(4, 3, cause='test')
|
||||
assert isinstance(error, IncompleteRead)
|
||||
assert repr(error) == '<IncompleteRead: 4 bytes read, 3 more expected>'
|
||||
assert str(error) == error.msg == '4 bytes read, 3 more expected'
|
||||
assert error.partial == b'test'
|
||||
assert error.partial == 4
|
||||
assert error.expected == 3
|
||||
assert error.cause == 'test'
|
||||
|
||||
error = IncompleteRead(b'aaa')
|
||||
error = IncompleteRead(3)
|
||||
assert repr(error) == '<IncompleteRead: 3 bytes read>'
|
||||
assert str(error) == '3 bytes read'
|
||||
|
|
|
@ -1,113 +1,450 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
import unittest
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
import abc
|
||||
import contextlib
|
||||
import enum
|
||||
import functools
|
||||
import http.server
|
||||
import json
|
||||
import random
|
||||
import subprocess
|
||||
import urllib.request
|
||||
import socket
|
||||
import struct
|
||||
import time
|
||||
from socketserver import (
|
||||
BaseRequestHandler,
|
||||
StreamRequestHandler,
|
||||
ThreadingTCPServer,
|
||||
)
|
||||
|
||||
from test.helper import FakeYDL, get_params, is_download_test
|
||||
from test.helper import http_server_port
|
||||
from yt_dlp.networking import Request
|
||||
from yt_dlp.networking.exceptions import ProxyError, TransportError
|
||||
from yt_dlp.socks import (
|
||||
SOCKS4_REPLY_VERSION,
|
||||
SOCKS4_VERSION,
|
||||
SOCKS5_USER_AUTH_SUCCESS,
|
||||
SOCKS5_USER_AUTH_VERSION,
|
||||
SOCKS5_VERSION,
|
||||
Socks5AddressType,
|
||||
Socks5Auth,
|
||||
)
|
||||
|
||||
SOCKS5_USER_AUTH_FAILURE = 0x1
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestMultipleSocks(unittest.TestCase):
|
||||
@staticmethod
|
||||
def _check_params(attrs):
|
||||
params = get_params()
|
||||
for attr in attrs:
|
||||
if attr not in params:
|
||||
print('Missing %s. Skipping.' % attr)
|
||||
class Socks4CD(enum.IntEnum):
|
||||
REQUEST_GRANTED = 90
|
||||
REQUEST_REJECTED_OR_FAILED = 91
|
||||
REQUEST_REJECTED_CANNOT_CONNECT_TO_IDENTD = 92
|
||||
REQUEST_REJECTED_DIFFERENT_USERID = 93
|
||||
|
||||
|
||||
class Socks5Reply(enum.IntEnum):
|
||||
SUCCEEDED = 0x0
|
||||
GENERAL_FAILURE = 0x1
|
||||
CONNECTION_NOT_ALLOWED = 0x2
|
||||
NETWORK_UNREACHABLE = 0x3
|
||||
HOST_UNREACHABLE = 0x4
|
||||
CONNECTION_REFUSED = 0x5
|
||||
TTL_EXPIRED = 0x6
|
||||
COMMAND_NOT_SUPPORTED = 0x7
|
||||
ADDRESS_TYPE_NOT_SUPPORTED = 0x8
|
||||
|
||||
|
||||
class SocksTestRequestHandler(BaseRequestHandler):
|
||||
|
||||
def __init__(self, *args, socks_info=None, **kwargs):
|
||||
self.socks_info = socks_info
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class SocksProxyHandler(BaseRequestHandler):
|
||||
def __init__(self, request_handler_class, socks_server_kwargs, *args, **kwargs):
|
||||
self.socks_kwargs = socks_server_kwargs or {}
|
||||
self.request_handler_class = request_handler_class
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class Socks5ProxyHandler(StreamRequestHandler, SocksProxyHandler):
|
||||
|
||||
# SOCKS5 protocol https://tools.ietf.org/html/rfc1928
|
||||
# SOCKS5 username/password authentication https://tools.ietf.org/html/rfc1929
|
||||
|
||||
def handle(self):
|
||||
sleep = self.socks_kwargs.get('sleep')
|
||||
if sleep:
|
||||
time.sleep(sleep)
|
||||
version, nmethods = self.connection.recv(2)
|
||||
assert version == SOCKS5_VERSION
|
||||
methods = list(self.connection.recv(nmethods))
|
||||
|
||||
auth = self.socks_kwargs.get('auth')
|
||||
|
||||
if auth is not None and Socks5Auth.AUTH_USER_PASS not in methods:
|
||||
self.connection.sendall(struct.pack('!BB', SOCKS5_VERSION, Socks5Auth.AUTH_NO_ACCEPTABLE))
|
||||
self.server.close_request(self.request)
|
||||
return
|
||||
|
||||
elif Socks5Auth.AUTH_USER_PASS in methods:
|
||||
self.connection.sendall(struct.pack("!BB", SOCKS5_VERSION, Socks5Auth.AUTH_USER_PASS))
|
||||
|
||||
_, user_len = struct.unpack('!BB', self.connection.recv(2))
|
||||
username = self.connection.recv(user_len).decode()
|
||||
pass_len = ord(self.connection.recv(1))
|
||||
password = self.connection.recv(pass_len).decode()
|
||||
|
||||
if username == auth[0] and password == auth[1]:
|
||||
self.connection.sendall(struct.pack('!BB', SOCKS5_USER_AUTH_VERSION, SOCKS5_USER_AUTH_SUCCESS))
|
||||
else:
|
||||
self.connection.sendall(struct.pack('!BB', SOCKS5_USER_AUTH_VERSION, SOCKS5_USER_AUTH_FAILURE))
|
||||
self.server.close_request(self.request)
|
||||
return
|
||||
return params
|
||||
|
||||
def test_proxy_http(self):
|
||||
params = self._check_params(['primary_proxy', 'primary_server_ip'])
|
||||
if params is None:
|
||||
return
|
||||
ydl = FakeYDL({
|
||||
'proxy': params['primary_proxy']
|
||||
})
|
||||
self.assertEqual(
|
||||
ydl.urlopen('http://yt-dl.org/ip').read().decode(),
|
||||
params['primary_server_ip'])
|
||||
|
||||
def test_proxy_https(self):
|
||||
params = self._check_params(['primary_proxy', 'primary_server_ip'])
|
||||
if params is None:
|
||||
return
|
||||
ydl = FakeYDL({
|
||||
'proxy': params['primary_proxy']
|
||||
})
|
||||
self.assertEqual(
|
||||
ydl.urlopen('https://yt-dl.org/ip').read().decode(),
|
||||
params['primary_server_ip'])
|
||||
|
||||
def test_secondary_proxy_http(self):
|
||||
params = self._check_params(['secondary_proxy', 'secondary_server_ip'])
|
||||
if params is None:
|
||||
return
|
||||
ydl = FakeYDL()
|
||||
req = urllib.request.Request('http://yt-dl.org/ip')
|
||||
req.add_header('Ytdl-request-proxy', params['secondary_proxy'])
|
||||
self.assertEqual(
|
||||
ydl.urlopen(req).read().decode(),
|
||||
params['secondary_server_ip'])
|
||||
|
||||
def test_secondary_proxy_https(self):
|
||||
params = self._check_params(['secondary_proxy', 'secondary_server_ip'])
|
||||
if params is None:
|
||||
return
|
||||
ydl = FakeYDL()
|
||||
req = urllib.request.Request('https://yt-dl.org/ip')
|
||||
req.add_header('Ytdl-request-proxy', params['secondary_proxy'])
|
||||
self.assertEqual(
|
||||
ydl.urlopen(req).read().decode(),
|
||||
params['secondary_server_ip'])
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestSocks(unittest.TestCase):
|
||||
_SKIP_SOCKS_TEST = True
|
||||
|
||||
def setUp(self):
|
||||
if self._SKIP_SOCKS_TEST:
|
||||
elif Socks5Auth.AUTH_NONE in methods:
|
||||
self.connection.sendall(struct.pack('!BB', SOCKS5_VERSION, Socks5Auth.AUTH_NONE))
|
||||
else:
|
||||
self.connection.sendall(struct.pack('!BB', SOCKS5_VERSION, Socks5Auth.AUTH_NO_ACCEPTABLE))
|
||||
self.server.close_request(self.request)
|
||||
return
|
||||
|
||||
self.port = random.randint(20000, 30000)
|
||||
self.server_process = subprocess.Popen([
|
||||
'srelay', '-f', '-i', '127.0.0.1:%d' % self.port],
|
||||
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
version, command, _, address_type = struct.unpack('!BBBB', self.connection.recv(4))
|
||||
socks_info = {
|
||||
'version': version,
|
||||
'auth_methods': methods,
|
||||
'command': command,
|
||||
'client_address': self.client_address,
|
||||
'ipv4_address': None,
|
||||
'domain_address': None,
|
||||
'ipv6_address': None,
|
||||
}
|
||||
if address_type == Socks5AddressType.ATYP_IPV4:
|
||||
socks_info['ipv4_address'] = socket.inet_ntoa(self.connection.recv(4))
|
||||
elif address_type == Socks5AddressType.ATYP_DOMAINNAME:
|
||||
socks_info['domain_address'] = self.connection.recv(ord(self.connection.recv(1))).decode()
|
||||
elif address_type == Socks5AddressType.ATYP_IPV6:
|
||||
socks_info['ipv6_address'] = socket.inet_ntop(socket.AF_INET6, self.connection.recv(16))
|
||||
else:
|
||||
self.server.close_request(self.request)
|
||||
|
||||
def tearDown(self):
|
||||
if self._SKIP_SOCKS_TEST:
|
||||
socks_info['port'] = struct.unpack('!H', self.connection.recv(2))[0]
|
||||
|
||||
# dummy response, the returned IP is just a placeholder
|
||||
self.connection.sendall(struct.pack(
|
||||
'!BBBBIH', SOCKS5_VERSION, self.socks_kwargs.get('reply', Socks5Reply.SUCCEEDED), 0x0, 0x1, 0x7f000001, 40000))
|
||||
|
||||
self.request_handler_class(self.request, self.client_address, self.server, socks_info=socks_info)
|
||||
|
||||
|
||||
class Socks4ProxyHandler(StreamRequestHandler, SocksProxyHandler):
|
||||
|
||||
# SOCKS4 protocol http://www.openssh.com/txt/socks4.protocol
|
||||
# SOCKS4A protocol http://www.openssh.com/txt/socks4a.protocol
|
||||
|
||||
def _read_until_null(self):
|
||||
return b''.join(iter(functools.partial(self.connection.recv, 1), b'\x00'))
|
||||
|
||||
def handle(self):
|
||||
sleep = self.socks_kwargs.get('sleep')
|
||||
if sleep:
|
||||
time.sleep(sleep)
|
||||
socks_info = {
|
||||
'version': SOCKS4_VERSION,
|
||||
'command': None,
|
||||
'client_address': self.client_address,
|
||||
'ipv4_address': None,
|
||||
'port': None,
|
||||
'domain_address': None,
|
||||
}
|
||||
version, command, dest_port, dest_ip = struct.unpack('!BBHI', self.connection.recv(8))
|
||||
socks_info['port'] = dest_port
|
||||
socks_info['command'] = command
|
||||
if version != SOCKS4_VERSION:
|
||||
self.server.close_request(self.request)
|
||||
return
|
||||
use_remote_dns = False
|
||||
if 0x0 < dest_ip <= 0xFF:
|
||||
use_remote_dns = True
|
||||
else:
|
||||
socks_info['ipv4_address'] = socket.inet_ntoa(struct.pack("!I", dest_ip))
|
||||
|
||||
user_id = self._read_until_null().decode()
|
||||
if user_id != (self.socks_kwargs.get('user_id') or ''):
|
||||
self.connection.sendall(struct.pack(
|
||||
'!BBHI', SOCKS4_REPLY_VERSION, Socks4CD.REQUEST_REJECTED_DIFFERENT_USERID, 0x00, 0x00000000))
|
||||
self.server.close_request(self.request)
|
||||
return
|
||||
|
||||
self.server_process.terminate()
|
||||
self.server_process.communicate()
|
||||
if use_remote_dns:
|
||||
socks_info['domain_address'] = self._read_until_null().decode()
|
||||
|
||||
def _get_ip(self, protocol):
|
||||
if self._SKIP_SOCKS_TEST:
|
||||
return '127.0.0.1'
|
||||
# dummy response, the returned IP is just a placeholder
|
||||
self.connection.sendall(
|
||||
struct.pack(
|
||||
'!BBHI', SOCKS4_REPLY_VERSION,
|
||||
self.socks_kwargs.get('cd_reply', Socks4CD.REQUEST_GRANTED), 40000, 0x7f000001))
|
||||
|
||||
ydl = FakeYDL({
|
||||
'proxy': '%s://127.0.0.1:%d' % (protocol, self.port),
|
||||
})
|
||||
return ydl.urlopen('http://yt-dl.org/ip').read().decode()
|
||||
self.request_handler_class(self.request, self.client_address, self.server, socks_info=socks_info)
|
||||
|
||||
def test_socks4(self):
|
||||
self.assertTrue(isinstance(self._get_ip('socks4'), str))
|
||||
|
||||
def test_socks4a(self):
|
||||
self.assertTrue(isinstance(self._get_ip('socks4a'), str))
|
||||
class IPv6ThreadingTCPServer(ThreadingTCPServer):
|
||||
address_family = socket.AF_INET6
|
||||
|
||||
def test_socks5(self):
|
||||
self.assertTrue(isinstance(self._get_ip('socks5'), str))
|
||||
|
||||
class SocksHTTPTestRequestHandler(http.server.BaseHTTPRequestHandler, SocksTestRequestHandler):
|
||||
def do_GET(self):
|
||||
if self.path == '/socks_info':
|
||||
payload = json.dumps(self.socks_info.copy())
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'application/json; charset=utf-8')
|
||||
self.send_header('Content-Length', str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload.encode())
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def socks_server(socks_server_class, request_handler, bind_ip=None, **socks_server_kwargs):
|
||||
server = server_thread = None
|
||||
try:
|
||||
bind_address = bind_ip or '127.0.0.1'
|
||||
server_type = ThreadingTCPServer if '.' in bind_address else IPv6ThreadingTCPServer
|
||||
server = server_type(
|
||||
(bind_address, 0), functools.partial(socks_server_class, request_handler, socks_server_kwargs))
|
||||
server_port = http_server_port(server)
|
||||
server_thread = threading.Thread(target=server.serve_forever)
|
||||
server_thread.daemon = True
|
||||
server_thread.start()
|
||||
if '.' not in bind_address:
|
||||
yield f'[{bind_address}]:{server_port}'
|
||||
else:
|
||||
yield f'{bind_address}:{server_port}'
|
||||
finally:
|
||||
server.shutdown()
|
||||
server.server_close()
|
||||
server_thread.join(2.0)
|
||||
|
||||
|
||||
class SocksProxyTestContext(abc.ABC):
|
||||
REQUEST_HANDLER_CLASS = None
|
||||
|
||||
def socks_server(self, server_class, *args, **kwargs):
|
||||
return socks_server(server_class, self.REQUEST_HANDLER_CLASS, *args, **kwargs)
|
||||
|
||||
@abc.abstractmethod
|
||||
def socks_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs) -> dict:
|
||||
"""return a dict of socks_info"""
|
||||
|
||||
|
||||
class HTTPSocksTestProxyContext(SocksProxyTestContext):
|
||||
REQUEST_HANDLER_CLASS = SocksHTTPTestRequestHandler
|
||||
|
||||
def socks_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs):
|
||||
request = Request(f'http://{target_domain or "127.0.0.1"}:{target_port or "40000"}/socks_info', **req_kwargs)
|
||||
handler.validate(request)
|
||||
return json.loads(handler.send(request).read().decode())
|
||||
|
||||
|
||||
CTX_MAP = {
|
||||
'http': HTTPSocksTestProxyContext,
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def ctx(request):
|
||||
return CTX_MAP[request.param]()
|
||||
|
||||
|
||||
class TestSocks4Proxy:
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_socks4_no_auth(self, handler, ctx):
|
||||
with handler() as rh:
|
||||
with ctx.socks_server(Socks4ProxyHandler) as server_address:
|
||||
response = ctx.socks_info_request(
|
||||
rh, proxies={'all': f'socks4://{server_address}'})
|
||||
assert response['version'] == 4
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_socks4_auth(self, handler, ctx):
|
||||
with handler() as rh:
|
||||
with ctx.socks_server(Socks4ProxyHandler, user_id='user') as server_address:
|
||||
with pytest.raises(ProxyError):
|
||||
ctx.socks_info_request(rh, proxies={'all': f'socks4://{server_address}'})
|
||||
response = ctx.socks_info_request(
|
||||
rh, proxies={'all': f'socks4://user:@{server_address}'})
|
||||
assert response['version'] == 4
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_socks4a_ipv4_target(self, handler, ctx):
|
||||
with ctx.socks_server(Socks4ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks4a://{server_address}'}) as rh:
|
||||
response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
|
||||
assert response['version'] == 4
|
||||
assert (response['ipv4_address'] == '127.0.0.1') != (response['domain_address'] == '127.0.0.1')
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_socks4a_domain_target(self, handler, ctx):
|
||||
with ctx.socks_server(Socks4ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks4a://{server_address}'}) as rh:
|
||||
response = ctx.socks_info_request(rh, target_domain='localhost')
|
||||
assert response['version'] == 4
|
||||
assert response['ipv4_address'] is None
|
||||
assert response['domain_address'] == 'localhost'
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_ipv4_client_source_address(self, handler, ctx):
|
||||
with ctx.socks_server(Socks4ProxyHandler) as server_address:
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
with handler(proxies={'all': f'socks4://{server_address}'},
|
||||
source_address=source_address) as rh:
|
||||
response = ctx.socks_info_request(rh)
|
||||
assert response['client_address'][0] == source_address
|
||||
assert response['version'] == 4
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
@pytest.mark.parametrize('reply_code', [
|
||||
Socks4CD.REQUEST_REJECTED_OR_FAILED,
|
||||
Socks4CD.REQUEST_REJECTED_CANNOT_CONNECT_TO_IDENTD,
|
||||
Socks4CD.REQUEST_REJECTED_DIFFERENT_USERID,
|
||||
])
|
||||
def test_socks4_errors(self, handler, ctx, reply_code):
|
||||
with ctx.socks_server(Socks4ProxyHandler, cd_reply=reply_code) as server_address:
|
||||
with handler(proxies={'all': f'socks4://{server_address}'}) as rh:
|
||||
with pytest.raises(ProxyError):
|
||||
ctx.socks_info_request(rh)
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_ipv6_socks4_proxy(self, handler, ctx):
|
||||
with ctx.socks_server(Socks4ProxyHandler, bind_ip='::1') as server_address:
|
||||
with handler(proxies={'all': f'socks4://{server_address}'}) as rh:
|
||||
response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
|
||||
assert response['client_address'][0] == '::1'
|
||||
assert response['ipv4_address'] == '127.0.0.1'
|
||||
assert response['version'] == 4
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_timeout(self, handler, ctx):
|
||||
with ctx.socks_server(Socks4ProxyHandler, sleep=2) as server_address:
|
||||
with handler(proxies={'all': f'socks4://{server_address}'}, timeout=0.5) as rh:
|
||||
with pytest.raises(TransportError):
|
||||
ctx.socks_info_request(rh)
|
||||
|
||||
|
||||
class TestSocks5Proxy:
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_socks5_no_auth(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
||||
response = ctx.socks_info_request(rh)
|
||||
assert response['auth_methods'] == [0x0]
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_socks5_user_pass(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler, auth=('test', 'testpass')) as server_address:
|
||||
with handler() as rh:
|
||||
with pytest.raises(ProxyError):
|
||||
ctx.socks_info_request(rh, proxies={'all': f'socks5://{server_address}'})
|
||||
|
||||
response = ctx.socks_info_request(
|
||||
rh, proxies={'all': f'socks5://test:testpass@{server_address}'})
|
||||
|
||||
assert response['auth_methods'] == [Socks5Auth.AUTH_NONE, Socks5Auth.AUTH_USER_PASS]
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_socks5_ipv4_target(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
||||
response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
|
||||
assert response['ipv4_address'] == '127.0.0.1'
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_socks5_domain_target(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
||||
response = ctx.socks_info_request(rh, target_domain='localhost')
|
||||
assert (response['ipv4_address'] == '127.0.0.1') != (response['ipv6_address'] == '::1')
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_socks5h_domain_target(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks5h://{server_address}'}) as rh:
|
||||
response = ctx.socks_info_request(rh, target_domain='localhost')
|
||||
assert response['ipv4_address'] is None
|
||||
assert response['domain_address'] == 'localhost'
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_socks5h_ip_target(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks5h://{server_address}'}) as rh:
|
||||
response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
|
||||
assert response['ipv4_address'] == '127.0.0.1'
|
||||
assert response['domain_address'] is None
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_socks5_ipv6_destination(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
||||
response = ctx.socks_info_request(rh, target_domain='[::1]')
|
||||
assert response['ipv6_address'] == '::1'
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_ipv6_socks5_proxy(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler, bind_ip='::1') as server_address:
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
||||
response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
|
||||
assert response['client_address'][0] == '::1'
|
||||
assert response['ipv4_address'] == '127.0.0.1'
|
||||
assert response['version'] == 5
|
||||
|
||||
# XXX: is there any feasible way of testing IPv6 source addresses?
|
||||
# Same would go for non-proxy source_address test...
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_ipv4_client_source_address(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}, source_address=source_address) as rh:
|
||||
response = ctx.socks_info_request(rh)
|
||||
assert response['client_address'][0] == source_address
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
@pytest.mark.parametrize('reply_code', [
|
||||
Socks5Reply.GENERAL_FAILURE,
|
||||
Socks5Reply.CONNECTION_NOT_ALLOWED,
|
||||
Socks5Reply.NETWORK_UNREACHABLE,
|
||||
Socks5Reply.HOST_UNREACHABLE,
|
||||
Socks5Reply.CONNECTION_REFUSED,
|
||||
Socks5Reply.TTL_EXPIRED,
|
||||
Socks5Reply.COMMAND_NOT_SUPPORTED,
|
||||
Socks5Reply.ADDRESS_TYPE_NOT_SUPPORTED,
|
||||
])
|
||||
def test_socks5_errors(self, handler, ctx, reply_code):
|
||||
with ctx.socks_server(Socks5ProxyHandler, reply=reply_code) as server_address:
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
||||
with pytest.raises(ProxyError):
|
||||
ctx.socks_info_request(rh)
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_timeout(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler, sleep=2) as server_address:
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}, timeout=1) as rh:
|
||||
with pytest.raises(TransportError):
|
||||
ctx.socks_info_request(rh)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
import io
|
||||
import itertools
|
||||
import json
|
||||
import subprocess
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from yt_dlp.compat import (
|
||||
|
@ -28,6 +29,7 @@
|
|||
InAdvancePagedList,
|
||||
LazyList,
|
||||
OnDemandPagedList,
|
||||
Popen,
|
||||
age_restricted,
|
||||
args_to_str,
|
||||
base_url,
|
||||
|
@ -47,8 +49,6 @@
|
|||
encode_base_n,
|
||||
encode_compat_str,
|
||||
encodeFilename,
|
||||
escape_rfc3986,
|
||||
escape_url,
|
||||
expand_path,
|
||||
extract_attributes,
|
||||
extract_basic_auth,
|
||||
|
@ -132,7 +132,12 @@
|
|||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
from yt_dlp.utils.networking import HTTPHeaderDict
|
||||
from yt_dlp.utils.networking import (
|
||||
HTTPHeaderDict,
|
||||
escape_rfc3986,
|
||||
normalize_url,
|
||||
remove_dot_segments,
|
||||
)
|
||||
|
||||
|
||||
class TestUtil(unittest.TestCase):
|
||||
|
@ -933,24 +938,45 @@ def test_escape_rfc3986(self):
|
|||
self.assertEqual(escape_rfc3986('foo bar'), 'foo%20bar')
|
||||
self.assertEqual(escape_rfc3986('foo%20bar'), 'foo%20bar')
|
||||
|
||||
def test_escape_url(self):
|
||||
def test_normalize_url(self):
|
||||
self.assertEqual(
|
||||
escape_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'),
|
||||
normalize_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'),
|
||||
'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4'
|
||||
)
|
||||
self.assertEqual(
|
||||
escape_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'),
|
||||
normalize_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'),
|
||||
'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290'
|
||||
)
|
||||
self.assertEqual(
|
||||
escape_url('http://тест.рф/фрагмент'),
|
||||
normalize_url('http://тест.рф/фрагмент'),
|
||||
'http://xn--e1aybc.xn--p1ai/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82'
|
||||
)
|
||||
self.assertEqual(
|
||||
escape_url('http://тест.рф/абв?абв=абв#абв'),
|
||||
normalize_url('http://тест.рф/абв?абв=абв#абв'),
|
||||
'http://xn--e1aybc.xn--p1ai/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2'
|
||||
)
|
||||
self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
|
||||
self.assertEqual(normalize_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
|
||||
|
||||
self.assertEqual(normalize_url('http://www.example.com/../a/b/../c/./d.html'), 'http://www.example.com/a/c/d.html')
|
||||
|
||||
def test_remove_dot_segments(self):
|
||||
self.assertEqual(remove_dot_segments('/a/b/c/./../../g'), '/a/g')
|
||||
self.assertEqual(remove_dot_segments('mid/content=5/../6'), 'mid/6')
|
||||
self.assertEqual(remove_dot_segments('/ad/../cd'), '/cd')
|
||||
self.assertEqual(remove_dot_segments('/ad/../cd/'), '/cd/')
|
||||
self.assertEqual(remove_dot_segments('/..'), '/')
|
||||
self.assertEqual(remove_dot_segments('/./'), '/')
|
||||
self.assertEqual(remove_dot_segments('/./a'), '/a')
|
||||
self.assertEqual(remove_dot_segments('/abc/./.././d/././e/.././f/./../../ghi'), '/ghi')
|
||||
self.assertEqual(remove_dot_segments('/'), '/')
|
||||
self.assertEqual(remove_dot_segments('/t'), '/t')
|
||||
self.assertEqual(remove_dot_segments('t'), 't')
|
||||
self.assertEqual(remove_dot_segments(''), '')
|
||||
self.assertEqual(remove_dot_segments('/../a/b/c'), '/a/b/c')
|
||||
self.assertEqual(remove_dot_segments('../a'), 'a')
|
||||
self.assertEqual(remove_dot_segments('./a'), 'a')
|
||||
self.assertEqual(remove_dot_segments('.'), '')
|
||||
self.assertEqual(remove_dot_segments('////'), '////')
|
||||
|
||||
def test_js_to_json_vars_strings(self):
|
||||
self.assertDictEqual(
|
||||
|
@ -1183,6 +1209,9 @@ def test_js_to_json_edgecases(self):
|
|||
on = js_to_json('\'"\\""\'')
|
||||
self.assertEqual(json.loads(on), '"""', msg='Unnecessary quote escape should be escaped')
|
||||
|
||||
on = js_to_json('[new Date("spam"), \'("eggs")\']')
|
||||
self.assertEqual(json.loads(on), ['spam', '("eggs")'], msg='Date regex should match a single string')
|
||||
|
||||
def test_js_to_json_malformed(self):
|
||||
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
||||
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
||||
|
@ -1194,6 +1223,14 @@ def test_js_to_json_template_literal(self):
|
|||
self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""')
|
||||
self.assertEqual(js_to_json('`${name}`', {}), '"name"')
|
||||
|
||||
def test_js_to_json_common_constructors(self):
|
||||
self.assertEqual(json.loads(js_to_json('new Map([["a", 5]])')), {'a': 5})
|
||||
self.assertEqual(json.loads(js_to_json('Array(5, 10)')), [5, 10])
|
||||
self.assertEqual(json.loads(js_to_json('new Array(15,5)')), [15, 5])
|
||||
self.assertEqual(json.loads(js_to_json('new Map([Array(5, 10),new Array(15,5)])')), {'5': 10, '15': 5})
|
||||
self.assertEqual(json.loads(js_to_json('new Date("123")')), "123")
|
||||
self.assertEqual(json.loads(js_to_json('new Date(\'2023-10-19\')')), "2023-10-19")
|
||||
|
||||
def test_extract_attributes(self):
|
||||
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
||||
|
@ -2320,6 +2357,8 @@ def test_traverse_obj(self):
|
|||
|
||||
def test_http_header_dict(self):
|
||||
headers = HTTPHeaderDict()
|
||||
headers['ytdl-test'] = b'0'
|
||||
self.assertEqual(list(headers.items()), [('Ytdl-Test', '0')])
|
||||
headers['ytdl-test'] = 1
|
||||
self.assertEqual(list(headers.items()), [('Ytdl-Test', '1')])
|
||||
headers['Ytdl-test'] = '2'
|
||||
|
@ -2356,6 +2395,21 @@ def test_extract_basic_auth(self):
|
|||
assert extract_basic_auth('http://user:@foo.bar') == ('http://foo.bar', 'Basic dXNlcjo=')
|
||||
assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz')
|
||||
|
||||
@unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows')
|
||||
def test_Popen_windows_escaping(self):
|
||||
def run_shell(args):
|
||||
stdout, stderr, error = Popen.run(
|
||||
args, text=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
assert not stderr
|
||||
assert not error
|
||||
return stdout
|
||||
|
||||
# Test escaping
|
||||
assert run_shell(['echo', 'test"&']) == '"test""&"\n'
|
||||
# Test if delayed expansion is disabled
|
||||
assert run_shell(['echo', '^!']) == '"^!"\n'
|
||||
assert run_shell('echo "^!"') == '"^!"\n'
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
from .extractor.openload import PhantomJSwrapper
|
||||
from .minicurses import format_text
|
||||
from .networking import HEADRequest, Request, RequestDirector
|
||||
from .networking.common import _REQUEST_HANDLERS
|
||||
from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES
|
||||
from .networking.exceptions import (
|
||||
HTTPError,
|
||||
NoSupportingHandlers,
|
||||
|
@ -60,7 +60,7 @@
|
|||
get_postprocessor,
|
||||
)
|
||||
from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
|
||||
from .update import REPOSITORY, current_git_head, detect_variant
|
||||
from .update import REPOSITORY, _get_system_deprecation, current_git_head, detect_variant
|
||||
from .utils import (
|
||||
DEFAULT_OUTTMPL,
|
||||
IDENTITY,
|
||||
|
@ -239,9 +239,9 @@ class YoutubeDL:
|
|||
'selected' (check selected formats),
|
||||
or None (check only if requested by extractor)
|
||||
paths: Dictionary of output paths. The allowed keys are 'home'
|
||||
'temp' and the keys of OUTTMPL_TYPES (in utils.py)
|
||||
'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py)
|
||||
outtmpl: Dictionary of templates for output names. Allowed keys
|
||||
are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
|
||||
are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py).
|
||||
For compatibility with youtube-dl, a single string can also be used
|
||||
outtmpl_na_placeholder: Placeholder for unavailable meta fields.
|
||||
restrictfilenames: Do not allow "&" and spaces in file names
|
||||
|
@ -256,8 +256,6 @@ class YoutubeDL:
|
|||
overwrites: Overwrite all video and metadata files if True,
|
||||
overwrite only non-video files if None
|
||||
and don't overwrite any file if False
|
||||
For compatibility with youtube-dl,
|
||||
"nooverwrites" may also be used instead
|
||||
playlist_items: Specific indices of playlist to download.
|
||||
playlistrandom: Download playlist items in random order.
|
||||
lazy_playlist: Process playlist entries as they are received.
|
||||
|
@ -424,7 +422,7 @@ class YoutubeDL:
|
|||
asked whether to download the video.
|
||||
- Raise utils.DownloadCancelled(msg) to abort remaining
|
||||
downloads when a video is rejected.
|
||||
match_filter_func in utils.py is one example for this.
|
||||
match_filter_func in utils/_utils.py is one example for this.
|
||||
color: A Dictionary with output stream names as keys
|
||||
and their respective color policy as values.
|
||||
Can also just be a single color policy,
|
||||
|
@ -553,6 +551,7 @@ class YoutubeDL:
|
|||
You can reduce network I/O by disabling it if you don't
|
||||
care about HLS. (only for youtube)
|
||||
no_color: Same as `color='no_color'`
|
||||
no_overwrites: Same as `overwrites=False`
|
||||
"""
|
||||
|
||||
_NUMERIC_FIELDS = {
|
||||
|
@ -572,7 +571,7 @@ class YoutubeDL:
|
|||
'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
|
||||
'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
|
||||
'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
|
||||
'preference', 'language', 'language_preference', 'quality', 'source_preference',
|
||||
'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
|
||||
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
|
||||
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
|
||||
}
|
||||
|
@ -604,6 +603,7 @@ def __init__(self, params=None, auto_init=True):
|
|||
self._playlist_level = 0
|
||||
self._playlist_urls = set()
|
||||
self.cache = Cache(self)
|
||||
self.__header_cookies = []
|
||||
|
||||
stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
|
||||
self._out_files = Namespace(
|
||||
|
@ -621,7 +621,8 @@ def __init__(self, params=None, auto_init=True):
|
|||
|
||||
if self.params.get('no_color'):
|
||||
if self.params.get('color') is not None:
|
||||
self.report_warning('Overwriting params from "color" with "no_color"')
|
||||
self.params.setdefault('_warnings', []).append(
|
||||
'Overwriting params from "color" with "no_color"')
|
||||
self.params['color'] = 'no_color'
|
||||
|
||||
term_allow_color = os.environ.get('TERM', '').lower() != 'dumb'
|
||||
|
@ -631,7 +632,7 @@ def process_color_policy(stream):
|
|||
policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
|
||||
if policy in ('auto', None):
|
||||
return term_allow_color and supports_terminal_sequences(stream)
|
||||
assert policy in ('always', 'never', 'no_color')
|
||||
assert policy in ('always', 'never', 'no_color'), policy
|
||||
return {'always': True, 'never': False}.get(policy, policy)
|
||||
|
||||
self._allow_colors = Namespace(**{
|
||||
|
@ -639,17 +640,9 @@ def process_color_policy(stream):
|
|||
for name, stream in self._out_files.items_ if name != 'console'
|
||||
})
|
||||
|
||||
# The code is left like this to be reused for future deprecations
|
||||
MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)
|
||||
current_version = sys.version_info[:2]
|
||||
if current_version < MIN_RECOMMENDED:
|
||||
msg = ('Support for Python version %d.%d has been deprecated. '
|
||||
'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'
|
||||
'\n You will no longer receive updates on this version')
|
||||
if current_version < MIN_SUPPORTED:
|
||||
msg = 'Python version %d.%d is no longer supported'
|
||||
self.deprecated_feature(
|
||||
f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
|
||||
system_deprecation = _get_system_deprecation()
|
||||
if system_deprecation:
|
||||
self.deprecated_feature(system_deprecation.replace('\n', '\n '))
|
||||
|
||||
if self.params.get('allow_unplayable_formats'):
|
||||
self.report_warning(
|
||||
|
@ -680,12 +673,10 @@ def process_color_policy(stream):
|
|||
|
||||
self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
|
||||
self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
|
||||
self.__header_cookies = []
|
||||
self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
|
||||
self.params['http_headers'].pop('Cookie', None)
|
||||
self._request_director = self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
|
||||
|
||||
self._request_director = self.build_request_director(
|
||||
sorted(_REQUEST_HANDLERS.values(), key=lambda rh: rh.RH_NAME.lower()))
|
||||
if auto_init and auto_init != 'no_verbose_header':
|
||||
self.print_debug_header()
|
||||
|
||||
|
@ -949,7 +940,7 @@ def __enter__(self):
|
|||
|
||||
def save_cookies(self):
|
||||
if self.params.get('cookiefile') is not None:
|
||||
self.cookiejar.save(ignore_discard=True, ignore_expires=True)
|
||||
self.cookiejar.save()
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.restore_console_title()
|
||||
|
@ -1300,16 +1291,16 @@ def create_key(outer_mobj):
|
|||
else:
|
||||
break
|
||||
|
||||
fmt = outer_mobj.group('format')
|
||||
if fmt == 's' and value is not None and last_field in field_size_compat_map.keys():
|
||||
fmt = f'0{field_size_compat_map[last_field]:d}d'
|
||||
|
||||
if None not in (value, replacement):
|
||||
try:
|
||||
value = replacement_formatter.format(replacement, value)
|
||||
except ValueError:
|
||||
value, default = None, na
|
||||
|
||||
fmt = outer_mobj.group('format')
|
||||
if fmt == 's' and last_field in field_size_compat_map.keys() and isinstance(value, int):
|
||||
fmt = f'0{field_size_compat_map[last_field]:d}d'
|
||||
|
||||
flags = outer_mobj.group('conversion') or ''
|
||||
str_fmt = f'{fmt[:-1]}s'
|
||||
if value is None:
|
||||
|
@ -2338,13 +2329,13 @@ def _merge(formats_pair):
|
|||
return new_dict
|
||||
|
||||
def _check_formats(formats):
|
||||
if (self.params.get('check_formats') is not None
|
||||
if self.params.get('check_formats') == 'selected':
|
||||
yield from self._check_formats(formats)
|
||||
return
|
||||
elif (self.params.get('check_formats') is not None
|
||||
or self.params.get('allow_unplayable_formats')):
|
||||
yield from formats
|
||||
return
|
||||
elif self.params.get('check_formats') == 'selected':
|
||||
yield from self._check_formats(formats)
|
||||
return
|
||||
|
||||
for f in formats:
|
||||
if f.get('has_drm'):
|
||||
|
@ -2592,7 +2583,7 @@ def _fill_common_fields(self, info_dict, final=True):
|
|||
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
||||
# see http://bugs.python.org/issue1646728)
|
||||
with contextlib.suppress(ValueError, OverflowError, OSError):
|
||||
upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
|
||||
upload_date = datetime.datetime.fromtimestamp(info_dict[ts_key], datetime.timezone.utc)
|
||||
info_dict[date_key] = upload_date.strftime('%Y%m%d')
|
||||
|
||||
live_keys = ('is_live', 'was_live')
|
||||
|
@ -3290,7 +3281,7 @@ def existing_video_file(*filepaths):
|
|||
fd, success = None, True
|
||||
if info_dict.get('protocol') or info_dict.get('url'):
|
||||
fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
|
||||
if fd is not FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
|
||||
if fd != FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
|
||||
info_dict.get('section_start') or info_dict.get('section_end')):
|
||||
msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
|
||||
else 'You have requested downloading the video partially, but ffmpeg is not installed')
|
||||
|
@ -3454,7 +3445,8 @@ def ffmpeg_fixup(cndn, msg, cls):
|
|||
) for pp in self._pps['post_process'])
|
||||
|
||||
if not postprocessed_by_ffmpeg:
|
||||
ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
|
||||
ffmpeg_fixup(fd != FFmpegFD and ext == 'm4a'
|
||||
and info_dict.get('container') == 'm4a_dash',
|
||||
'writing DASH m4a. Only some players support this container',
|
||||
FFmpegFixupM4aPP)
|
||||
ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
|
||||
|
@ -3976,7 +3968,7 @@ def get_encoding(stream):
|
|||
})) or 'none'))
|
||||
|
||||
write_debug(f'Proxy map: {self.proxies}')
|
||||
# write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers)}')
|
||||
write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
|
||||
for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
|
||||
display_list = ['%s%s' % (
|
||||
klass.__name__, '' if klass.__name__ == name else f' as {name}')
|
||||
|
@ -4031,7 +4023,7 @@ def _opener(self):
|
|||
"""
|
||||
Get a urllib OpenerDirector from the Urllib handler (deprecated).
|
||||
"""
|
||||
self.deprecation_warning('YoutubeDL._opener() is deprecated, use YoutubeDL.urlopen()')
|
||||
self.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()')
|
||||
handler = self._request_director.handlers['Urllib']
|
||||
return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
|
||||
|
||||
|
@ -4065,6 +4057,9 @@ def urlopen(self, req):
|
|||
raise RequestError(
|
||||
'file:// URLs are disabled by default in yt-dlp for security reasons. '
|
||||
'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
|
||||
if 'unsupported proxy type: "https"' in ue.msg.lower():
|
||||
raise RequestError(
|
||||
'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests')
|
||||
raise
|
||||
except SSLError as e:
|
||||
if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
|
||||
|
@ -4077,9 +4072,9 @@ def urlopen(self, req):
|
|||
except HTTPError as e: # TODO: Remove in a future release
|
||||
raise _CompatHTTPError(e) from e
|
||||
|
||||
def build_request_director(self, handlers):
|
||||
def build_request_director(self, handlers, preferences=None):
|
||||
logger = _YDLLogger(self)
|
||||
headers = self.params.get('http_headers').copy()
|
||||
headers = self.params['http_headers'].copy()
|
||||
proxies = self.proxies.copy()
|
||||
clean_headers(headers)
|
||||
clean_proxies(proxies, headers)
|
||||
|
@ -4106,6 +4101,9 @@ def build_request_director(self, handlers):
|
|||
},
|
||||
}),
|
||||
))
|
||||
director.preferences.update(preferences or [])
|
||||
if 'prefer-legacy-http-handler' in self.params['compat_opts']:
|
||||
director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
|
||||
return director
|
||||
|
||||
def encode(self, s):
|
||||
|
@ -4228,7 +4226,7 @@ def _write_subtitles(self, info_dict, filename):
|
|||
return ret
|
||||
|
||||
def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
|
||||
''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
|
||||
''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error '''
|
||||
write_all = self.params.get('write_all_thumbnails', False)
|
||||
thumbnails, ret = [], []
|
||||
if write_all or self.params.get('writethumbnail', False):
|
||||
|
@ -4244,6 +4242,9 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None
|
|||
self.write_debug(f'Skipping writing {label} thumbnail')
|
||||
return ret
|
||||
|
||||
if thumbnails and not self._ensure_dir_exists(filename):
|
||||
return None
|
||||
|
||||
for idx, t in list(enumerate(thumbnails))[::-1]:
|
||||
thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
|
||||
thumb_display_id = f'{label} thumbnail {t["id"]}'
|
||||
|
|
|
@ -21,7 +21,9 @@ def get_hidden_imports():
|
|||
yield from ('yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated')
|
||||
yield from ('yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated')
|
||||
yield pycryptodome_module()
|
||||
yield from collect_submodules('websockets')
|
||||
# Only `websockets` is required, others are collected just in case
|
||||
for module in ('websockets', 'requests', 'urllib3'):
|
||||
yield from collect_submodules(module)
|
||||
# These are auto-detected, but explicitly add them just in case
|
||||
yield from ('mutagen', 'brotli', 'certifi')
|
||||
|
||||
|
|
|
@ -1,14 +1,11 @@
|
|||
import os
|
||||
import sys
|
||||
import warnings
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from ._deprecated import * # noqa: F401, F403
|
||||
from .compat_utils import passthrough_module
|
||||
|
||||
# XXX: Implement this the same way as other DeprecationWarnings without circular import
|
||||
passthrough_module(__name__, '._legacy', callback=lambda attr: warnings.warn(
|
||||
DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=5))
|
||||
passthrough_module(__name__, '._deprecated')
|
||||
del passthrough_module
|
||||
|
||||
|
||||
# HTMLParseError has been deprecated in Python 3.3 and removed in
|
||||
|
@ -33,7 +30,7 @@ def compat_etree_fromstring(text):
|
|||
if compat_os_name == 'nt':
|
||||
def compat_shlex_quote(s):
|
||||
import re
|
||||
return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
|
||||
return s if re.match(r'^[-_\w./]+$', s) else s.replace('"', '""').join('""')
|
||||
else:
|
||||
from shlex import quote as compat_shlex_quote # noqa: F401
|
||||
|
||||
|
|
|
@ -1,4 +1,12 @@
|
|||
"""Deprecated - New code should avoid these"""
|
||||
import warnings
|
||||
|
||||
from .compat_utils import passthrough_module
|
||||
|
||||
# XXX: Implement this the same way as other DeprecationWarnings without circular import
|
||||
passthrough_module(__name__, '.._legacy', callback=lambda attr: warnings.warn(
|
||||
DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6))
|
||||
del passthrough_module
|
||||
|
||||
import base64
|
||||
import urllib.error
|
||||
|
|
|
@ -16,12 +16,12 @@
|
|||
import shutil
|
||||
import socket
|
||||
import struct
|
||||
import subprocess
|
||||
import tokenize
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree as etree
|
||||
from subprocess import DEVNULL
|
||||
|
||||
# isort: split
|
||||
import asyncio # noqa: F401
|
||||
|
@ -85,7 +85,7 @@ def compat_setenv(key, value, env=os.environ):
|
|||
compat_Struct = struct.Struct
|
||||
compat_struct_pack = struct.pack
|
||||
compat_struct_unpack = struct.unpack
|
||||
compat_subprocess_get_DEVNULL = lambda: DEVNULL
|
||||
compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL
|
||||
compat_tokenize_tokenize = tokenize.tokenize
|
||||
compat_urllib_error = urllib.error
|
||||
compat_urllib_HTTPError = urllib.error.HTTPError
|
||||
|
|
|
@ -15,7 +15,7 @@ def get_package_info(module):
|
|||
name=getattr(module, '_yt_dlp__identifier', module.__name__),
|
||||
version=str(next(filter(None, (
|
||||
getattr(module, attr, None)
|
||||
for attr in ('__version__', 'version_string', 'version')
|
||||
for attr in ('_yt_dlp__version', '__version__', 'version_string', 'version')
|
||||
)), None)))
|
||||
|
||||
|
||||
|
|
13
yt_dlp/compat/types.py
Normal file
13
yt_dlp/compat/types.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
# flake8: noqa: F405
|
||||
from types import * # noqa: F403
|
||||
|
||||
from .compat_utils import passthrough_module
|
||||
|
||||
passthrough_module(__name__, 'types')
|
||||
del passthrough_module
|
||||
|
||||
try:
|
||||
# NB: pypy has builtin NoneType, so checking NameError won't work
|
||||
from types import NoneType # >= 3.10
|
||||
except ImportError:
|
||||
NoneType = type(None)
|
|
@ -1,6 +1,9 @@
|
|||
# flake8: noqa: F405
|
||||
from urllib import * # noqa: F403
|
||||
|
||||
del request # noqa: F821
|
||||
from . import request # noqa: F401
|
||||
|
||||
from ..compat_utils import passthrough_module
|
||||
|
||||
passthrough_module(__name__, 'urllib')
|
||||
|
|
|
@ -33,7 +33,6 @@
|
|||
from .utils import (
|
||||
Popen,
|
||||
error_to_str,
|
||||
escape_url,
|
||||
expand_path,
|
||||
is_path_like,
|
||||
sanitize_url,
|
||||
|
@ -42,6 +41,7 @@
|
|||
write_string,
|
||||
)
|
||||
from .utils._utils import _YDLLogger
|
||||
from .utils.networking import normalize_url
|
||||
|
||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
|
||||
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
|
||||
|
@ -97,7 +97,7 @@ def load_cookies(cookie_file, browser_specification, ydl):
|
|||
|
||||
jar = YoutubeDLCookieJar(cookie_file)
|
||||
if not is_filename or os.access(cookie_file, os.R_OK):
|
||||
jar.load(ignore_discard=True, ignore_expires=True)
|
||||
jar.load()
|
||||
cookie_jars.append(jar)
|
||||
|
||||
return _merge_cookie_jars(cookie_jars)
|
||||
|
@ -138,7 +138,7 @@ def _extract_firefox_cookies(profile, container, logger):
|
|||
containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
|
||||
if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
|
||||
raise FileNotFoundError(f'could not read containers.json in {search_root}')
|
||||
with open(containers_path) as containers:
|
||||
with open(containers_path, encoding='utf8') as containers:
|
||||
identities = json.load(containers).get('identities', [])
|
||||
container_id = next((context.get('userContextId') for context in identities if container in (
|
||||
context.get('name'),
|
||||
|
@ -1213,7 +1213,7 @@ def open(self, file, *, write=False):
|
|||
file.truncate(0)
|
||||
yield file
|
||||
|
||||
def _really_save(self, f, ignore_discard=False, ignore_expires=False):
|
||||
def _really_save(self, f, ignore_discard, ignore_expires):
|
||||
now = time.time()
|
||||
for cookie in self:
|
||||
if (not ignore_discard and cookie.discard
|
||||
|
@ -1234,7 +1234,7 @@ def _really_save(self, f, ignore_discard=False, ignore_expires=False):
|
|||
name, value
|
||||
)))
|
||||
|
||||
def save(self, filename=None, *args, **kwargs):
|
||||
def save(self, filename=None, ignore_discard=True, ignore_expires=True):
|
||||
"""
|
||||
Save cookies to a file.
|
||||
Code is taken from CPython 3.6
|
||||
|
@ -1253,9 +1253,9 @@ def save(self, filename=None, *args, **kwargs):
|
|||
|
||||
with self.open(filename, write=True) as f:
|
||||
f.write(self._HEADER)
|
||||
self._really_save(f, *args, **kwargs)
|
||||
self._really_save(f, ignore_discard, ignore_expires)
|
||||
|
||||
def load(self, filename=None, ignore_discard=False, ignore_expires=False):
|
||||
def load(self, filename=None, ignore_discard=True, ignore_expires=True):
|
||||
"""Load cookies from a file."""
|
||||
if filename is None:
|
||||
if self.filename is not None:
|
||||
|
@ -1308,7 +1308,7 @@ def prepare_line(line):
|
|||
|
||||
def get_cookie_header(self, url):
|
||||
"""Generate a Cookie HTTP header for a given url"""
|
||||
cookie_req = urllib.request.Request(escape_url(sanitize_url(url)))
|
||||
cookie_req = urllib.request.Request(normalize_url(sanitize_url(url)))
|
||||
self.add_cookie_header(cookie_req)
|
||||
return cookie_req.get_header('Cookie')
|
||||
|
||||
|
@ -1317,7 +1317,7 @@ def get_cookies_for_url(self, url):
|
|||
# Policy `_now` attribute must be set before calling `_cookies_for_request`
|
||||
# Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360
|
||||
self._policy._now = self._now = int(time.time())
|
||||
return self._cookies_for_request(urllib.request.Request(escape_url(sanitize_url(url))))
|
||||
return self._cookies_for_request(urllib.request.Request(normalize_url(sanitize_url(url))))
|
||||
|
||||
def clear(self, *args, **kwargs):
|
||||
with contextlib.suppress(KeyError):
|
||||
|
|
|
@ -43,6 +43,8 @@
|
|||
|
||||
try:
|
||||
import sqlite3
|
||||
# We need to get the underlying `sqlite` version, see https://github.com/yt-dlp/yt-dlp/issues/8152
|
||||
sqlite3._yt_dlp__version = sqlite3.sqlite_version
|
||||
except ImportError:
|
||||
# although sqlite3 is part of the standard library, it is possible to compile python without
|
||||
# sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
|
||||
|
@ -56,6 +58,15 @@
|
|||
# See https://github.com/yt-dlp/yt-dlp/issues/2633
|
||||
websockets = None
|
||||
|
||||
try:
|
||||
import urllib3
|
||||
except ImportError:
|
||||
urllib3 = None
|
||||
|
||||
try:
|
||||
import requests
|
||||
except ImportError:
|
||||
requests = None
|
||||
|
||||
try:
|
||||
import xattr # xattr or pyxattr
|
||||
|
|
|
@ -137,7 +137,7 @@ def _write_cookies(self):
|
|||
self._cookies_tempfile = tmp_cookies.name
|
||||
self.to_screen(f'[download] Writing temporary cookies file to "{self._cookies_tempfile}"')
|
||||
# real_download resets _cookies_tempfile; if it's None then save() will write to cookiejar.filename
|
||||
self.ydl.cookiejar.save(self._cookies_tempfile, ignore_discard=True, ignore_expires=True)
|
||||
self.ydl.cookiejar.save(self._cookies_tempfile)
|
||||
return self.ydl.cookiejar.filename or self._cookies_tempfile
|
||||
|
||||
def _call_downloader(self, tmpfilename, info_dict):
|
||||
|
@ -559,12 +559,13 @@ def _call_downloader(self, tmpfilename, info_dict):
|
|||
|
||||
selected_formats = info_dict.get('requested_formats') or [info_dict]
|
||||
for i, fmt in enumerate(selected_formats):
|
||||
cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url'])
|
||||
is_http = re.match(r'^https?://', fmt['url'])
|
||||
cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else []
|
||||
if cookies:
|
||||
args.extend(['-cookies', ''.join(
|
||||
f'{cookie.name}={cookie.value}; path={cookie.path}; domain={cookie.domain};\r\n'
|
||||
for cookie in cookies)])
|
||||
if fmt.get('http_headers') and re.match(r'^https?://', fmt['url']):
|
||||
if fmt.get('http_headers') and is_http:
|
||||
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
|
||||
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
|
||||
args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in fmt['http_headers'].items())])
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
from ..networking.exceptions import HTTPError, IncompleteRead
|
||||
from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj
|
||||
from ..utils.networking import HTTPHeaderDict
|
||||
from ..utils.progress import ProgressCalculator
|
||||
|
||||
|
||||
class HttpQuietDownloader(HttpFD):
|
||||
|
@ -226,8 +227,7 @@ def _start_frag_download(self, ctx, info_dict):
|
|||
resume_len = ctx['complete_frags_downloaded_bytes']
|
||||
total_frags = ctx['total_frags']
|
||||
ctx_id = ctx.get('ctx_id')
|
||||
# This dict stores the download progress, it's updated by the progress
|
||||
# hook
|
||||
# Stores the download progress, updated by the progress hook
|
||||
state = {
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': resume_len,
|
||||
|
@ -237,14 +237,8 @@ def _start_frag_download(self, ctx, info_dict):
|
|||
'tmpfilename': ctx['tmpfilename'],
|
||||
}
|
||||
|
||||
start = time.time()
|
||||
ctx.update({
|
||||
'started': start,
|
||||
'fragment_started': start,
|
||||
# Amount of fragment's bytes downloaded by the time of the previous
|
||||
# frag progress hook invocation
|
||||
'prev_frag_downloaded_bytes': 0,
|
||||
})
|
||||
ctx['started'] = time.time()
|
||||
progress = ProgressCalculator(resume_len)
|
||||
|
||||
def frag_progress_hook(s):
|
||||
if s['status'] not in ('downloading', 'finished'):
|
||||
|
@ -259,38 +253,35 @@ def frag_progress_hook(s):
|
|||
state['max_progress'] = ctx.get('max_progress')
|
||||
state['progress_idx'] = ctx.get('progress_idx')
|
||||
|
||||
time_now = time.time()
|
||||
state['elapsed'] = time_now - start
|
||||
state['elapsed'] = progress.elapsed
|
||||
frag_total_bytes = s.get('total_bytes') or 0
|
||||
s['fragment_info_dict'] = s.pop('info_dict', {})
|
||||
|
||||
# XXX: Fragment resume is not accounted for here
|
||||
if not ctx['live']:
|
||||
estimated_size = (
|
||||
(ctx['complete_frags_downloaded_bytes'] + frag_total_bytes)
|
||||
/ (state['fragment_index'] + 1) * total_frags)
|
||||
state['total_bytes_estimate'] = estimated_size
|
||||
progress.total = estimated_size
|
||||
progress.update(s.get('downloaded_bytes'))
|
||||
state['total_bytes_estimate'] = progress.total
|
||||
else:
|
||||
progress.update(s.get('downloaded_bytes'))
|
||||
|
||||
if s['status'] == 'finished':
|
||||
state['fragment_index'] += 1
|
||||
ctx['fragment_index'] = state['fragment_index']
|
||||
state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
|
||||
ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
|
||||
ctx['speed'] = state['speed'] = self.calc_speed(
|
||||
ctx['fragment_started'], time_now, frag_total_bytes)
|
||||
ctx['fragment_started'] = time.time()
|
||||
ctx['prev_frag_downloaded_bytes'] = 0
|
||||
else:
|
||||
frag_downloaded_bytes = s['downloaded_bytes']
|
||||
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
|
||||
ctx['speed'] = state['speed'] = self.calc_speed(
|
||||
ctx['fragment_started'], time_now, frag_downloaded_bytes - ctx.get('frag_resume_len', 0))
|
||||
if not ctx['live']:
|
||||
state['eta'] = self.calc_eta(state['speed'], estimated_size - state['downloaded_bytes'])
|
||||
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
|
||||
progress.thread_reset()
|
||||
|
||||
state['downloaded_bytes'] = ctx['complete_frags_downloaded_bytes'] = progress.downloaded
|
||||
state['speed'] = ctx['speed'] = progress.speed.smooth
|
||||
state['eta'] = progress.eta.smooth
|
||||
|
||||
self._hook_progress(state, info_dict)
|
||||
|
||||
ctx['dl'].add_progress_hook(frag_progress_hook)
|
||||
|
||||
return start
|
||||
return ctx['started']
|
||||
|
||||
def _finish_frag_download(self, ctx, info_dict):
|
||||
ctx['dest_stream'].close()
|
||||
|
@ -500,7 +491,6 @@ def _download_fragment(fragment):
|
|||
download_fragment(fragment, ctx_copy)
|
||||
return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized')
|
||||
|
||||
self.report_warning('The download speed shown is only of one thread. This is a known issue')
|
||||
with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
|
||||
try:
|
||||
for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):
|
||||
|
|
|
@ -122,7 +122,6 @@
|
|||
from .archiveorg import (
|
||||
ArchiveOrgIE,
|
||||
YoutubeWebArchiveIE,
|
||||
VLiveWebArchiveIE,
|
||||
)
|
||||
from .arcpublishing import ArcPublishingIE
|
||||
from .arkena import ArkenaIE
|
||||
|
@ -138,10 +137,6 @@
|
|||
ArteTVCategoryIE,
|
||||
)
|
||||
from .arnes import ArnesIE
|
||||
from .asiancrush import (
|
||||
AsianCrushIE,
|
||||
AsianCrushPlaylistIE,
|
||||
)
|
||||
from .atresplayer import AtresPlayerIE
|
||||
from .atscaleconf import AtScaleConfEventIE
|
||||
from .atttechchannel import ATTTechChannelIE
|
||||
|
@ -165,6 +160,7 @@
|
|||
AWAANLiveIE,
|
||||
AWAANSeasonIE,
|
||||
)
|
||||
from .axs import AxsIE
|
||||
from .azmedien import AZMedienIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .banbye import (
|
||||
|
@ -223,7 +219,11 @@
|
|||
BiliBiliPlayerIE,
|
||||
BilibiliSpaceVideoIE,
|
||||
BilibiliSpaceAudioIE,
|
||||
BilibiliSpacePlaylistIE,
|
||||
BilibiliCollectionListIE,
|
||||
BilibiliSeriesListIE,
|
||||
BilibiliFavoritesListIE,
|
||||
BilibiliWatchlaterIE,
|
||||
BilibiliPlaylistIE,
|
||||
BiliIntlIE,
|
||||
BiliIntlSeriesIE,
|
||||
BiliLiveIE,
|
||||
|
@ -271,6 +271,10 @@
|
|||
BrightcoveLegacyIE,
|
||||
BrightcoveNewIE,
|
||||
)
|
||||
from .brilliantpala import (
|
||||
BrilliantpalaElearnIE,
|
||||
BrilliantpalaClassesIE,
|
||||
)
|
||||
from .businessinsider import BusinessInsiderIE
|
||||
from .bundesliga import BundesligaIE
|
||||
from .buzzfeed import BuzzFeedIE
|
||||
|
@ -292,9 +296,11 @@
|
|||
from .camsoda import CamsodaIE
|
||||
from .camtasia import CamtasiaEmbedIE
|
||||
from .camwithher import CamWithHerIE
|
||||
from .canal1 import Canal1IE
|
||||
from .canalalpha import CanalAlphaIE
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .caracoltv import CaracolTvPlayIE
|
||||
from .carambatv import (
|
||||
CarambaTVIE,
|
||||
CarambaTVPageIE,
|
||||
|
@ -303,6 +309,7 @@
|
|||
from .cbc import (
|
||||
CBCIE,
|
||||
CBCPlayerIE,
|
||||
CBCPlayerPlaylistIE,
|
||||
CBCGemIE,
|
||||
CBCGemPlaylistIE,
|
||||
CBCGemLiveIE,
|
||||
|
@ -351,6 +358,10 @@
|
|||
from .cinchcast import CinchcastIE
|
||||
from .cinemax import CinemaxIE
|
||||
from .cinetecamilano import CinetecaMilanoIE
|
||||
from .cineverse import (
|
||||
CineverseIE,
|
||||
CineverseDetailsIE,
|
||||
)
|
||||
from .ciscolive import (
|
||||
CiscoLiveSessionIE,
|
||||
CiscoLiveSearchIE,
|
||||
|
@ -560,8 +571,10 @@
|
|||
EpiconIE,
|
||||
EpiconSeriesIE,
|
||||
)
|
||||
from .eplus import EplusIbIE
|
||||
from .epoch import EpochIE
|
||||
from .eporner import EpornerIE
|
||||
from .erocast import ErocastIE
|
||||
from .eroprofile import (
|
||||
EroProfileIE,
|
||||
EroProfileAlbumIE,
|
||||
|
@ -884,6 +897,10 @@
|
|||
from .jove import JoveIE
|
||||
from .joj import JojIE
|
||||
from .jstream import JStreamIE
|
||||
from .jtbc import (
|
||||
JTBCIE,
|
||||
JTBCProgramIE,
|
||||
)
|
||||
from .jwplatform import JWPlatformIE
|
||||
from .kakao import KakaoIE
|
||||
from .kaltura import KalturaIE
|
||||
|
@ -940,6 +957,7 @@
|
|||
from .lbry import (
|
||||
LBRYIE,
|
||||
LBRYChannelIE,
|
||||
LBRYPlaylistIE,
|
||||
)
|
||||
from .lci import LCIIE
|
||||
from .lcp import (
|
||||
|
@ -1040,6 +1058,7 @@
|
|||
from .massengeschmacktv import MassengeschmackTVIE
|
||||
from .masters import MastersIE
|
||||
from .matchtv import MatchTVIE
|
||||
from .mbn import MBNIE
|
||||
from .mdr import MDRIE
|
||||
from .medaltv import MedalTVIE
|
||||
from .mediaite import MediaiteIE
|
||||
|
@ -1119,6 +1138,7 @@
|
|||
MofosexEmbedIE,
|
||||
)
|
||||
from .mojvideo import MojvideoIE
|
||||
from .monstercat import MonstercatIE
|
||||
from .morningstar import MorningstarIE
|
||||
from .motherless import (
|
||||
MotherlessIE,
|
||||
|
@ -1291,6 +1311,11 @@
|
|||
NineCNineMediaIE,
|
||||
CPTwentyFourIE,
|
||||
)
|
||||
from .niconicochannelplus import (
|
||||
NiconicoChannelPlusIE,
|
||||
NiconicoChannelPlusChannelVideosIE,
|
||||
NiconicoChannelPlusChannelLivesIE,
|
||||
)
|
||||
from .ninegag import NineGagIE
|
||||
from .ninenow import NineNowIE
|
||||
from .nintendo import NintendoIE
|
||||
|
@ -1421,7 +1446,7 @@
|
|||
PatreonIE,
|
||||
PatreonCampaignIE
|
||||
)
|
||||
from .pbs import PBSIE
|
||||
from .pbs import PBSIE, PBSKidsIE
|
||||
from .pearvideo import PearVideoIE
|
||||
from .peekvids import PeekVidsIE, PlayVidsIE
|
||||
from .peertube import (
|
||||
|
@ -1444,6 +1469,7 @@
|
|||
from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .piapro import PiaproIE
|
||||
from .piaulizaportal import PIAULIZAPortalIE
|
||||
from .picarto import (
|
||||
PicartoIE,
|
||||
PicartoVodIE,
|
||||
|
@ -1501,6 +1527,7 @@
|
|||
from .popcorntimes import PopcorntimesIE
|
||||
from .popcorntv import PopcornTVIE
|
||||
from .porn91 import Porn91IE
|
||||
from .pornbox import PornboxIE
|
||||
from .porncom import PornComIE
|
||||
from .pornflip import PornFlipIE
|
||||
from .pornhd import PornHdIE
|
||||
|
@ -1519,7 +1546,7 @@
|
|||
PuhuTVIE,
|
||||
PuhuTVSerieIE,
|
||||
)
|
||||
from .pr0gramm import Pr0grammStaticIE, Pr0grammIE
|
||||
from .pr0gramm import Pr0grammIE
|
||||
from .prankcast import PrankCastIE
|
||||
from .premiershiprugby import PremiershipRugbyIE
|
||||
from .presstv import PressTVIE
|
||||
|
@ -1555,7 +1582,14 @@
|
|||
from .radiode import RadioDeIE
|
||||
from .radiojavan import RadioJavanIE
|
||||
from .radiobremen import RadioBremenIE
|
||||
from .radiofrance import FranceCultureIE, RadioFranceIE
|
||||
from .radiofrance import (
|
||||
FranceCultureIE,
|
||||
RadioFranceIE,
|
||||
RadioFranceLiveIE,
|
||||
RadioFrancePodcastIE,
|
||||
RadioFranceProfileIE,
|
||||
RadioFranceProgramScheduleIE,
|
||||
)
|
||||
from .radiozet import RadioZetPodcastIE
|
||||
from .radiokapital import (
|
||||
RadioKapitalIE,
|
||||
|
@ -1586,6 +1620,7 @@
|
|||
from .rbgtum import (
|
||||
RbgTumIE,
|
||||
RbgTumCourseIE,
|
||||
RbgTumNewCourseIE,
|
||||
)
|
||||
from .rcs import (
|
||||
RCSIE,
|
||||
|
@ -1699,8 +1734,8 @@
|
|||
MegaTVComIE,
|
||||
MegaTVComEmbedIE,
|
||||
)
|
||||
from .ant1newsgr import (
|
||||
Ant1NewsGrWatchIE,
|
||||
from .antenna import (
|
||||
AntennaGrWatchIE,
|
||||
Ant1NewsGrArticleIE,
|
||||
Ant1NewsGrEmbedIE,
|
||||
)
|
||||
|
@ -1710,6 +1745,10 @@
|
|||
RuvIE,
|
||||
RuvSpilaIE
|
||||
)
|
||||
from .s4c import (
|
||||
S4CIE,
|
||||
S4CSeriesIE
|
||||
)
|
||||
from .safari import (
|
||||
SafariIE,
|
||||
SafariApiIE,
|
||||
|
@ -1790,7 +1829,10 @@
|
|||
from .slutload import SlutloadIE
|
||||
from .smotrim import SmotrimIE
|
||||
from .snotr import SnotrIE
|
||||
from .sohu import SohuIE
|
||||
from .sohu import (
|
||||
SohuIE,
|
||||
SohuVIE,
|
||||
)
|
||||
from .sonyliv import (
|
||||
SonyLIVIE,
|
||||
SonyLIVSeriesIE,
|
||||
|
@ -1901,6 +1943,11 @@
|
|||
from .tagesschau import TagesschauIE
|
||||
from .tass import TassIE
|
||||
from .tbs import TBSIE
|
||||
from .tbsjp import (
|
||||
TBSJPEpisodeIE,
|
||||
TBSJPProgramIE,
|
||||
TBSJPPlaylistIE,
|
||||
)
|
||||
from .tdslifeway import TDSLifewayIE
|
||||
from .teachable import (
|
||||
TeachableIE,
|
||||
|
@ -1951,7 +1998,10 @@
|
|||
WeTvSeriesIE,
|
||||
)
|
||||
from .tennistv import TennisTVIE
|
||||
from .tenplay import TenPlayIE
|
||||
from .tenplay import (
|
||||
TenPlayIE,
|
||||
TenPlaySeasonIE,
|
||||
)
|
||||
from .testurl import TestURLIE
|
||||
from .tf1 import TF1IE
|
||||
from .tfo import TFOIE
|
||||
|
@ -1963,10 +2013,6 @@
|
|||
)
|
||||
from .thestar import TheStarIE
|
||||
from .thesun import TheSunIE
|
||||
from .theta import (
|
||||
ThetaVideoIE,
|
||||
ThetaStreamIE,
|
||||
)
|
||||
from .theweatherchannel import TheWeatherChannelIE
|
||||
from .thisamericanlife import ThisAmericanLifeIE
|
||||
from .thisav import ThisAVIE
|
||||
|
@ -2348,7 +2394,8 @@
|
|||
)
|
||||
from .weibo import (
|
||||
WeiboIE,
|
||||
WeiboMobileIE
|
||||
WeiboVideoIE,
|
||||
WeiboUserIE,
|
||||
)
|
||||
from .weiqitv import WeiqiTVIE
|
||||
from .weverse import (
|
||||
|
@ -2364,6 +2411,7 @@
|
|||
from .whyp import WhypIE
|
||||
from .wikimedia import WikimediaIE
|
||||
from .willow import WillowIE
|
||||
from .wimbledon import WimbledonIE
|
||||
from .wimtv import WimTVIE
|
||||
from .whowatch import WhoWatchIE
|
||||
from .wistia import (
|
||||
|
|
|
@ -180,20 +180,103 @@ class ABCIViewIE(InfoExtractor):
|
|||
_VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
|
||||
_GEO_COUNTRIES = ['AU']
|
||||
|
||||
# ABC iview programs are normally available for 14 days only.
|
||||
_TESTS = [{
|
||||
'url': 'https://iview.abc.net.au/show/utopia/series/1/video/CO1211V001S00',
|
||||
'md5': '52a942bfd7a0b79a6bfe9b4ce6c9d0ed',
|
||||
'info_dict': {
|
||||
'id': 'CO1211V001S00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Series 1 Ep 1 Wood For The Trees',
|
||||
'series': 'Utopia',
|
||||
'description': 'md5:0cfb2c183c1b952d1548fd65c8a95c00',
|
||||
'upload_date': '20230726',
|
||||
'uploader_id': 'abc1',
|
||||
'series_id': 'CO1211V',
|
||||
'episode_id': 'CO1211V001S00',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
'episode_number': 1,
|
||||
'episode': 'Wood For The Trees',
|
||||
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/co/CO1211V001S00_5ad8353f4df09_1280.jpg',
|
||||
'timestamp': 1690403700,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'note': 'No episode name',
|
||||
'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00',
|
||||
'md5': '67715ce3c78426b11ba167d875ac6abf',
|
||||
'info_dict': {
|
||||
'id': 'LE1927H001S00',
|
||||
'ext': 'mp4',
|
||||
'title': "Series 11 Ep 1",
|
||||
'series': "Gruen",
|
||||
'title': 'Series 11 Ep 1',
|
||||
'series': 'Gruen',
|
||||
'description': 'md5:52cc744ad35045baf6aded2ce7287f67',
|
||||
'upload_date': '20190925',
|
||||
'uploader_id': 'abc1',
|
||||
'series_id': 'LE1927H',
|
||||
'episode_id': 'LE1927H001S00',
|
||||
'season_number': 11,
|
||||
'season': 'Season 11',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/le/LE1927H001S00_5d954fbd79e25_1280.jpg',
|
||||
'timestamp': 1569445289,
|
||||
},
|
||||
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'note': 'No episode number',
|
||||
'url': 'https://iview.abc.net.au/show/four-corners/series/2022/video/NC2203H039S00',
|
||||
'md5': '77cb7d8434440e3b28fbebe331c2456a',
|
||||
'info_dict': {
|
||||
'id': 'NC2203H039S00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Series 2022 Locking Up Kids',
|
||||
'series': 'Four Corners',
|
||||
'description': 'md5:54829ca108846d1a70e1fcce2853e720',
|
||||
'upload_date': '20221114',
|
||||
'uploader_id': 'abc1',
|
||||
'series_id': 'NC2203H',
|
||||
'episode_id': 'NC2203H039S00',
|
||||
'season_number': 2022,
|
||||
'season': 'Season 2022',
|
||||
'episode_number': None,
|
||||
'episode': 'Locking Up Kids',
|
||||
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/nc/NC2203H039S00_636d8a0944a22_1920.jpg',
|
||||
'timestamp': 1668460497,
|
||||
|
||||
},
|
||||
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'note': 'No episode name or number',
|
||||
'url': 'https://iview.abc.net.au/show/landline/series/2021/video/RF2004Q043S00',
|
||||
'md5': '2e17dec06b13cc81dc119d2565289396',
|
||||
'info_dict': {
|
||||
'id': 'RF2004Q043S00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Series 2021',
|
||||
'series': 'Landline',
|
||||
'description': 'md5:c9f30d9c0c914a7fd23842f6240be014',
|
||||
'upload_date': '20211205',
|
||||
'uploader_id': 'abc1',
|
||||
'series_id': 'RF2004Q',
|
||||
'episode_id': 'RF2004Q043S00',
|
||||
'season_number': 2021,
|
||||
'season': 'Season 2021',
|
||||
'episode_number': None,
|
||||
'episode': None,
|
||||
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/rf/RF2004Q043S00_61a950639dbc0_1920.jpg',
|
||||
'timestamp': 1638710705,
|
||||
|
||||
},
|
||||
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
|
@ -255,6 +338,8 @@ def tokenize_url(url, token):
|
|||
'episode_number': int_or_none(self._search_regex(
|
||||
r'\bEp\s+(\d+)\b', title, 'episode number', default=None)),
|
||||
'episode_id': house_number,
|
||||
'episode': self._search_regex(
|
||||
r'^(?:Series\s+\d+)?\s*(?:Ep\s+\d+)?\s*(.*)$', title, 'episode', default='') or None,
|
||||
'uploader_id': video_params.get('channel'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
import urllib.request
|
||||
import urllib.response
|
||||
import uuid
|
||||
|
||||
from ..utils.networking import clean_proxies
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_ecb_decrypt
|
||||
from ..utils import (
|
||||
|
@ -27,74 +27,21 @@
|
|||
update_url_query,
|
||||
)
|
||||
|
||||
# NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862)
|
||||
|
||||
|
||||
def add_opener(ydl, handler):
|
||||
''' Add a handler for opening URLs, like _download_webpage '''
|
||||
def add_opener(ydl, handler): # FIXME: Create proper API in .networking
|
||||
"""Add a handler for opening URLs, like _download_webpage"""
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
|
||||
assert isinstance(ydl._opener, urllib.request.OpenerDirector)
|
||||
ydl._opener.add_handler(handler)
|
||||
|
||||
|
||||
def remove_opener(ydl, handler):
|
||||
'''
|
||||
Remove handler(s) for opening URLs
|
||||
@param handler Either handler object itself or handler type.
|
||||
Specifying handler type will remove all handler which isinstance returns True.
|
||||
'''
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
|
||||
opener = ydl._opener
|
||||
assert isinstance(ydl._opener, urllib.request.OpenerDirector)
|
||||
if isinstance(handler, (type, tuple)):
|
||||
find_cp = lambda x: isinstance(x, handler)
|
||||
else:
|
||||
find_cp = lambda x: x is handler
|
||||
|
||||
removed = []
|
||||
for meth in dir(handler):
|
||||
if meth in ["redirect_request", "do_open", "proxy_open"]:
|
||||
# oops, coincidental match
|
||||
continue
|
||||
|
||||
i = meth.find("_")
|
||||
protocol = meth[:i]
|
||||
condition = meth[i + 1:]
|
||||
|
||||
if condition.startswith("error"):
|
||||
j = condition.find("_") + i + 1
|
||||
kind = meth[j + 1:]
|
||||
try:
|
||||
kind = int(kind)
|
||||
except ValueError:
|
||||
pass
|
||||
lookup = opener.handle_error.get(protocol, {})
|
||||
opener.handle_error[protocol] = lookup
|
||||
elif condition == "open":
|
||||
kind = protocol
|
||||
lookup = opener.handle_open
|
||||
elif condition == "response":
|
||||
kind = protocol
|
||||
lookup = opener.process_response
|
||||
elif condition == "request":
|
||||
kind = protocol
|
||||
lookup = opener.process_request
|
||||
else:
|
||||
continue
|
||||
|
||||
handlers = lookup.setdefault(kind, [])
|
||||
if handlers:
|
||||
handlers[:] = [x for x in handlers if not find_cp(x)]
|
||||
|
||||
removed.append(x for x in handlers if find_cp(x))
|
||||
|
||||
if removed:
|
||||
for x in opener.handlers:
|
||||
if find_cp(x):
|
||||
x.add_parent(None)
|
||||
opener.handlers[:] = [x for x in opener.handlers if not find_cp(x)]
|
||||
rh = ydl._request_director.handlers['Urllib']
|
||||
if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
|
||||
return
|
||||
headers = ydl.params['http_headers'].copy()
|
||||
proxies = ydl.proxies.copy()
|
||||
clean_proxies(proxies, headers)
|
||||
opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=proxies)
|
||||
assert isinstance(opener, urllib.request.OpenerDirector)
|
||||
opener.add_handler(handler)
|
||||
rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
|
||||
|
||||
|
||||
class AbemaLicenseHandler(urllib.request.BaseHandler):
|
||||
|
@ -140,7 +87,7 @@ def abematv_license_open(self, url):
|
|||
ticket = urllib.parse.urlparse(url).netloc
|
||||
response_data = self._get_videokey_from_ticket(ticket)
|
||||
return urllib.response.addinfourl(io.BytesIO(response_data), headers={
|
||||
'Content-Length': len(response_data),
|
||||
'Content-Length': str(len(response_data)),
|
||||
}, url=url, code=200)
|
||||
|
||||
|
||||
|
@ -212,10 +159,7 @@ def _get_device_token(self):
|
|||
})
|
||||
AbemaTVBaseIE._USERTOKEN = user_data['token']
|
||||
|
||||
# don't allow adding it 2 times or more, though it's guarded
|
||||
remove_opener(self._downloader, AbemaLicenseHandler)
|
||||
add_opener(self._downloader, AbemaLicenseHandler(self))
|
||||
|
||||
return self._USERTOKEN
|
||||
|
||||
def _get_media_token(self, invalidate=False, to_show=True):
|
||||
|
|
|
@ -338,6 +338,7 @@ class BiographyIE(AENetworksBaseIE):
|
|||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'skip': '404 Not Found',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
|
|
@ -22,8 +22,11 @@ def _call_api(self, asin, data=None, note=None):
|
|||
|
||||
resp = self._download_json(
|
||||
f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}',
|
||||
asin, note=note, headers={'Content-Type': 'application/json'},
|
||||
data=json.dumps(data).encode() if data else None,
|
||||
asin, note=note, headers={
|
||||
'Content-Type': 'application/json',
|
||||
'currentpageurl': '/',
|
||||
'currentplatform': 'dWeb'
|
||||
}, data=json.dumps(data).encode() if data else None,
|
||||
query=None if data else {
|
||||
'deviceType': 'A1WMMUXPCUJL4N',
|
||||
'contentId': asin,
|
||||
|
@ -46,7 +49,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
|
|||
'ext': 'mp4',
|
||||
'title': 'May I Kiss You?',
|
||||
'language': 'Hindi',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'description': 'md5:a549bfc747973e04feb707833474e59d',
|
||||
'release_timestamp': 1644710400,
|
||||
'release_date': '20220213',
|
||||
|
@ -68,7 +71,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
|
|||
'ext': 'mp4',
|
||||
'title': 'Jahaan',
|
||||
'language': 'Hindi',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'description': 'md5:05eb765a77bf703f322f120ec6867339',
|
||||
'release_timestamp': 1647475200,
|
||||
'release_date': '20220317',
|
||||
|
|
|
@ -26,6 +26,7 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
|||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
|
||||
'only_matching': True,
|
||||
|
|
|
@ -5,22 +5,26 @@
|
|||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
make_archive_id,
|
||||
scale_thumbnails_to_max_format_width,
|
||||
)
|
||||
|
||||
|
||||
class Ant1NewsGrBaseIE(InfoExtractor):
|
||||
class AntennaBaseIE(InfoExtractor):
|
||||
def _download_and_extract_api_data(self, video_id, netloc, cid=None):
|
||||
url = f'{self.http_scheme()}//{netloc}{self._API_PATH}'
|
||||
info = self._download_json(url, video_id, query={'cid': cid or video_id})
|
||||
try:
|
||||
source = info['url']
|
||||
except KeyError:
|
||||
raise ExtractorError('no source found for %s' % video_id)
|
||||
formats, subs = (self._extract_m3u8_formats_and_subtitles(source, video_id, 'mp4')
|
||||
if determine_ext(source) == 'm3u8' else ([{'url': source}], {}))
|
||||
info = self._download_json(f'{self.http_scheme()}//{netloc}{self._API_PATH}',
|
||||
video_id, query={'cid': cid or video_id})
|
||||
if not info.get('url'):
|
||||
raise ExtractorError(f'No source found for {video_id}')
|
||||
|
||||
ext = determine_ext(info['url'])
|
||||
if ext == 'm3u8':
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(info['url'], video_id, 'mp4')
|
||||
else:
|
||||
formats, subs = [{'url': info['url'], 'format_id': ext}], {}
|
||||
|
||||
thumbnails = scale_thumbnails_to_max_format_width(
|
||||
formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+')
|
||||
formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+') if info.get('thumb') else []
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info.get('title'),
|
||||
|
@ -30,21 +34,31 @@ def _download_and_extract_api_data(self, video_id, netloc, cid=None):
|
|||
}
|
||||
|
||||
|
||||
class Ant1NewsGrWatchIE(Ant1NewsGrBaseIE):
|
||||
IE_NAME = 'ant1newsgr:watch'
|
||||
IE_DESC = 'ant1news.gr videos'
|
||||
_VALID_URL = r'https?://(?P<netloc>(?:www\.)?ant1news\.gr)/watch/(?P<id>\d+)/'
|
||||
class AntennaGrWatchIE(AntennaBaseIE):
|
||||
IE_NAME = 'antenna:watch'
|
||||
IE_DESC = 'antenna.gr and ant1news.gr videos'
|
||||
_VALID_URL = r'https?://(?P<netloc>(?:www\.)?(?:antenna|ant1news)\.gr)/watch/(?P<id>\d+)/'
|
||||
_API_PATH = '/templates/data/player'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ant1news.gr/watch/1506168/ant1-news-09112021-stis-18-45',
|
||||
'md5': '95925e6b32106754235f2417e0d2dfab',
|
||||
'md5': 'c472d9dd7cd233c63aff2ea42201cda6',
|
||||
'info_dict': {
|
||||
'id': '1506168',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:0ad00fa66ecf8aa233d26ab0dba7514a',
|
||||
'description': 'md5:18665af715a6dcfeac1d6153a44f16b0',
|
||||
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/26d46bf6-8158-4f02-b197-7096c714b2de.jpg',
|
||||
'thumbnail': r're:https://ant1media\.azureedge\.net/imgHandler/\d+/26d46bf6-8158-4f02-b197-7096c714b2de\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.antenna.gr/watch/1643812/oi-prodotes-epeisodio-01',
|
||||
'md5': '8f6f7dd3b1dba4d835ba990e25f31243',
|
||||
'info_dict': {
|
||||
'id': '1643812',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'mp4',
|
||||
'title': 'ΟΙ ΠΡΟΔΟΤΕΣ – ΕΠΕΙΣΟΔΙΟ 01',
|
||||
'thumbnail': r're:https://ant1media\.azureedge\.net/imgHandler/\d+/b3d63096-e72d-43c4-87a0-00d4363d242f\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
|
@ -52,11 +66,12 @@ def _real_extract(self, url):
|
|||
video_id, netloc = self._match_valid_url(url).group('id', 'netloc')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
info = self._download_and_extract_api_data(video_id, netloc)
|
||||
info['description'] = self._og_search_description(webpage)
|
||||
info['description'] = self._og_search_description(webpage, default=None)
|
||||
info['_old_archive_ids'] = [make_archive_id('Ant1NewsGrWatch', video_id)],
|
||||
return info
|
||||
|
||||
|
||||
class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE):
|
||||
class Ant1NewsGrArticleIE(AntennaBaseIE):
|
||||
IE_NAME = 'ant1newsgr:article'
|
||||
IE_DESC = 'ant1news.gr articles'
|
||||
_VALID_URL = r'https?://(?:www\.)?ant1news\.gr/[^/]+/article/(?P<id>\d+)/'
|
||||
|
@ -96,7 +111,7 @@ def _real_extract(self, url):
|
|||
video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')})
|
||||
|
||||
|
||||
class Ant1NewsGrEmbedIE(Ant1NewsGrBaseIE):
|
||||
class Ant1NewsGrEmbedIE(AntennaBaseIE):
|
||||
IE_NAME = 'ant1newsgr:embed'
|
||||
IE_DESC = 'ant1news.gr embedded videos'
|
||||
_BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player'
|
|
@ -3,7 +3,6 @@
|
|||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .naver import NaverBaseIE
|
||||
from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..networking import HEADRequest
|
||||
|
@ -947,237 +946,3 @@ def _real_extract(self, url):
|
|||
if not info.get('title'):
|
||||
info['title'] = video_id
|
||||
return info
|
||||
|
||||
|
||||
class VLiveWebArchiveIE(InfoExtractor):
|
||||
IE_NAME = 'web.archive:vlive'
|
||||
IE_DESC = 'web.archive.org saved vlive videos'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://)?web\.archive\.org/
|
||||
(?:web/)?(?:(?P<date>[0-9]{14})?[0-9A-Za-z_*]*/)? # /web and the version index is optional
|
||||
(?:https?(?::|%3[Aa])//)?(?:
|
||||
(?:(?:www|m)\.)?vlive\.tv(?::(?:80|443))?/(?:video|embed)/(?P<id>[0-9]+) # VLive URL
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://web.archive.org/web/20221221144331/http://www.vlive.tv/video/1326',
|
||||
'md5': 'cc7314812855ce56de70a06a27314983',
|
||||
'info_dict': {
|
||||
'id': '1326',
|
||||
'ext': 'mp4',
|
||||
'title': "Girl's Day's Broadcast",
|
||||
'creator': "Girl's Day",
|
||||
'view_count': int,
|
||||
'uploader_id': 'muploader_a',
|
||||
'uploader_url': None,
|
||||
'uploader': None,
|
||||
'upload_date': '20150817',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1439816449,
|
||||
'like_count': int,
|
||||
'channel': 'Girl\'s Day',
|
||||
'channel_id': 'FDF27',
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1439818140,
|
||||
'release_date': '20150817',
|
||||
'duration': 1014,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20221221182103/http://www.vlive.tv/video/16937',
|
||||
'info_dict': {
|
||||
'id': '16937',
|
||||
'ext': 'mp4',
|
||||
'title': '첸백시 걍방',
|
||||
'creator': 'EXO',
|
||||
'view_count': int,
|
||||
'subtitles': 'mincount:12',
|
||||
'uploader_id': 'muploader_j',
|
||||
'uploader_url': 'http://vlive.tv',
|
||||
'uploader': None,
|
||||
'upload_date': '20161112',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1478923074,
|
||||
'like_count': int,
|
||||
'channel': 'EXO',
|
||||
'channel_id': 'F94BD',
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1478924280,
|
||||
'release_date': '20161112',
|
||||
'duration': 906,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870',
|
||||
'info_dict': {
|
||||
'id': '101870',
|
||||
'ext': 'mp4',
|
||||
'title': '[ⓓ xV] “레벨이들 매력에 반해? 안 반해?” 움직이는 HD 포토 (레드벨벳:Red Velvet)',
|
||||
'creator': 'Dispatch',
|
||||
'view_count': int,
|
||||
'subtitles': 'mincount:6',
|
||||
'uploader_id': 'V__FRA08071',
|
||||
'uploader_url': 'http://vlive.tv',
|
||||
'uploader': None,
|
||||
'upload_date': '20181130',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1543601327,
|
||||
'like_count': int,
|
||||
'channel': 'Dispatch',
|
||||
'channel_id': 'C796F3',
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1543601040,
|
||||
'release_date': '20181130',
|
||||
'duration': 279,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
# The wayback machine has special timestamp and "mode" values:
|
||||
# timestamp:
|
||||
# 1 = the first capture
|
||||
# 2 = the last capture
|
||||
# mode:
|
||||
# id_ = Identity - perform no alterations of the original resource, return it as it was archived.
|
||||
_WAYBACK_BASE_URL = 'https://web.archive.org/web/2id_/'
|
||||
|
||||
def _download_archived_page(self, url, video_id, *, timestamp='2', **kwargs):
|
||||
for retry in self.RetryManager():
|
||||
try:
|
||||
return self._download_webpage(f'https://web.archive.org/web/{timestamp}id_/{url}', video_id, **kwargs)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
|
||||
raise ExtractorError('Page was not archived', expected=True)
|
||||
retry.error = e
|
||||
continue
|
||||
|
||||
def _download_archived_json(self, url, video_id, **kwargs):
|
||||
page = self._download_archived_page(url, video_id, **kwargs)
|
||||
if not page:
|
||||
raise ExtractorError('Page was not archived', expected=True)
|
||||
else:
|
||||
return self._parse_json(page, video_id)
|
||||
|
||||
def _extract_formats_from_m3u8(self, m3u8_url, params, video_id):
|
||||
m3u8_doc = self._download_archived_page(m3u8_url, video_id, note='Downloading m3u8', query=params, fatal=False)
|
||||
if not m3u8_doc:
|
||||
return
|
||||
|
||||
# M3U8 document should be changed to archive domain
|
||||
m3u8_doc = m3u8_doc.splitlines()
|
||||
url_base = m3u8_url.rsplit('/', 1)[0]
|
||||
first_segment = None
|
||||
for i, line in enumerate(m3u8_doc):
|
||||
if not line.startswith('#'):
|
||||
m3u8_doc[i] = f'{self._WAYBACK_BASE_URL}{url_base}/{line}?{urllib.parse.urlencode(params)}'
|
||||
first_segment = first_segment or m3u8_doc[i]
|
||||
|
||||
# Segments may not have been archived. See https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870
|
||||
urlh = self._request_webpage(HEADRequest(first_segment), video_id, errnote=False,
|
||||
fatal=False, note='Check first segment availablity')
|
||||
if urlh:
|
||||
formats, subtitles = self._parse_m3u8_formats_and_subtitles('\n'.join(m3u8_doc), ext='mp4', video_id=video_id)
|
||||
if subtitles:
|
||||
self._report_ignoring_subs('m3u8')
|
||||
return formats
|
||||
|
||||
# Closely follows the logic of the ArchiveTeam grab script
|
||||
# See: https://github.com/ArchiveTeam/vlive-grab/blob/master/vlive.lua
|
||||
def _real_extract(self, url):
|
||||
video_id, url_date = self._match_valid_url(url).group('id', 'date')
|
||||
|
||||
webpage = self._download_archived_page(f'https://www.vlive.tv/video/{video_id}', video_id, timestamp=url_date)
|
||||
|
||||
player_info = self._search_json(r'__PRELOADED_STATE__\s*=', webpage, 'player info', video_id)
|
||||
user_country = traverse_obj(player_info, ('common', 'userCountry'))
|
||||
|
||||
main_script_url = self._search_regex(r'<script\s+src="([^"]+/js/main\.[^"]+\.js)"', webpage, 'main script url')
|
||||
main_script = self._download_archived_page(main_script_url, video_id, note='Downloading main script')
|
||||
app_id = self._search_regex(r'appId\s*=\s*"([^"]+)"', main_script, 'app id')
|
||||
|
||||
inkey = self._download_archived_json(
|
||||
f'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/{video_id}/inkey', video_id, note='Fetching inkey', query={
|
||||
'appId': app_id,
|
||||
'platformType': 'PC',
|
||||
'gcc': user_country,
|
||||
'locale': 'en_US',
|
||||
}, fatal=False)
|
||||
|
||||
vod_id = traverse_obj(player_info, ('postDetail', 'post', 'officialVideo', 'vodId'))
|
||||
|
||||
vod_data = self._download_archived_json(
|
||||
f'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{vod_id}', video_id, note='Fetching vod data', query={
|
||||
'key': inkey.get('inkey'),
|
||||
'pid': 'rmcPlayer_16692457559726800', # partially unix time and partially random. Fixed value used by archiveteam project
|
||||
'sid': '2024',
|
||||
'ver': '2.0',
|
||||
'devt': 'html5_pc',
|
||||
'doct': 'json',
|
||||
'ptc': 'https',
|
||||
'sptc': 'https',
|
||||
'cpt': 'vtt',
|
||||
'ctls': '%7B%22visible%22%3A%7B%22fullscreen%22%3Atrue%2C%22logo%22%3Afalse%2C%22playbackRate%22%3Afalse%2C%22scrap%22%3Afalse%2C%22playCount%22%3Atrue%2C%22commentCount%22%3Atrue%2C%22title%22%3Atrue%2C%22writer%22%3Atrue%2C%22expand%22%3Afalse%2C%22subtitles%22%3Atrue%2C%22thumbnails%22%3Atrue%2C%22quality%22%3Atrue%2C%22setting%22%3Atrue%2C%22script%22%3Afalse%2C%22logoDimmed%22%3Atrue%2C%22badge%22%3Atrue%2C%22seekingTime%22%3Atrue%2C%22muted%22%3Atrue%2C%22muteButton%22%3Afalse%2C%22viewerNotice%22%3Afalse%2C%22linkCount%22%3Afalse%2C%22createTime%22%3Afalse%2C%22thumbnail%22%3Atrue%7D%2C%22clicked%22%3A%7B%22expand%22%3Afalse%2C%22subtitles%22%3Afalse%7D%7D',
|
||||
'pv': '4.26.9',
|
||||
'dr': '1920x1080',
|
||||
'cpl': 'en_US',
|
||||
'lc': 'en_US',
|
||||
'adi': '%5B%7B%22type%22%3A%22pre%22%2C%22exposure%22%3Afalse%2C%22replayExposure%22%3Afalse%7D%5D',
|
||||
'adu': '%2F',
|
||||
'videoId': vod_id,
|
||||
'cc': user_country,
|
||||
})
|
||||
|
||||
formats = []
|
||||
|
||||
streams = traverse_obj(vod_data, ('streams', ...))
|
||||
if len(streams) > 1:
|
||||
self.report_warning('Multiple streams found. Only the first stream will be downloaded.')
|
||||
stream = streams[0]
|
||||
|
||||
max_stream = max(
|
||||
stream.get('videos') or [],
|
||||
key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
|
||||
if max_stream is not None:
|
||||
params = {arg.get('name'): arg.get('value') for arg in stream.get('keys', []) if arg.get('type') == 'param'}
|
||||
formats = self._extract_formats_from_m3u8(max_stream.get('source'), params, video_id) or []
|
||||
|
||||
# For parts of the project MP4 files were archived
|
||||
max_video = max(
|
||||
traverse_obj(vod_data, ('videos', 'list', ...)),
|
||||
key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
|
||||
if max_video is not None:
|
||||
video_url = self._WAYBACK_BASE_URL + max_video.get('source')
|
||||
urlh = self._request_webpage(HEADRequest(video_url), video_id, errnote=False,
|
||||
fatal=False, note='Check video availablity')
|
||||
if urlh:
|
||||
formats.append({'url': video_url})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(player_info, ('postDetail', 'post', {
|
||||
'title': ('officialVideo', 'title', {str}),
|
||||
'creator': ('author', 'nickname', {str}),
|
||||
'channel': ('channel', 'channelName', {str}),
|
||||
'channel_id': ('channel', 'channelCode', {str}),
|
||||
'duration': ('officialVideo', 'playTime', {int_or_none}),
|
||||
'view_count': ('officialVideo', 'playCount', {int_or_none}),
|
||||
'like_count': ('officialVideo', 'likeCount', {int_or_none}),
|
||||
'comment_count': ('officialVideo', 'commentCount', {int_or_none}),
|
||||
'timestamp': ('officialVideo', 'createdAt', {lambda x: int_or_none(x, scale=1000)}),
|
||||
'release_timestamp': ('officialVideo', 'willStartAt', {lambda x: int_or_none(x, scale=1000)}),
|
||||
})),
|
||||
**traverse_obj(vod_data, ('meta', {
|
||||
'uploader_id': ('user', 'id', {str}),
|
||||
'uploader': ('user', 'name', {str}),
|
||||
'uploader_url': ('user', 'url', {url_or_none}),
|
||||
'thumbnail': ('cover', 'source', {url_or_none}),
|
||||
}), expected_type=lambda x: x or None),
|
||||
**NaverBaseIE.process_subtitles(vod_data, lambda x: [self._WAYBACK_BASE_URL + x]),
|
||||
}
|
||||
|
|
|
@ -48,17 +48,7 @@ class ArteTVIE(ArteTVBaseIE):
|
|||
}, {
|
||||
'note': 'No alt_title',
|
||||
'url': 'https://www.arte.tv/fr/videos/110371-000-A/la-chaleur-supplice-des-arbres-de-rue/',
|
||||
'info_dict': {
|
||||
'id': '110371-000-A',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20220718',
|
||||
'duration': 154,
|
||||
'timestamp': 1658162460,
|
||||
'description': 'md5:5890f36fe7dccfadb8b7c0891de54786',
|
||||
'title': 'La chaleur, supplice des arbres de rue',
|
||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/CPE2sQDtD8GLQgt8DuYHLf/940x530',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'}
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
|
||||
'only_matching': True,
|
||||
|
@ -67,19 +57,20 @@ class ArteTVIE(ArteTVBaseIE):
|
|||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'note': 'age-restricted',
|
||||
'url': 'https://www.arte.tv/de/videos/006785-000-A/the-element-of-crime/',
|
||||
'info_dict': {
|
||||
'id': '110203-006-A',
|
||||
'chapters': 'count:16',
|
||||
'description': 'md5:cf592f1df52fe52007e3f8eac813c084',
|
||||
'alt_title': 'Zaz',
|
||||
'title': 'Baloise Session 2022',
|
||||
'timestamp': 1668445200,
|
||||
'duration': 4054,
|
||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/ubQjmVCGyRx3hmBuZEK9QZ/940x530',
|
||||
'upload_date': '20221114',
|
||||
'id': '006785-000-A',
|
||||
'description': 'md5:c2f94fdfefc8a280e4dab68ab96ab0ba',
|
||||
'title': 'The Element of Crime',
|
||||
'timestamp': 1696111200,
|
||||
'duration': 5849,
|
||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/q82dTTfyuCXupPsGxXsd7B/940x530',
|
||||
'upload_date': '20230930',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'expected_warnings': ['geo restricted']
|
||||
}
|
||||
}]
|
||||
|
||||
_GEO_BYPASS = True
|
||||
|
@ -136,7 +127,9 @@ def _real_extract(self, url):
|
|||
lang = mobj.group('lang') or mobj.group('lang_2')
|
||||
langauge_code = self._LANG_MAP.get(lang)
|
||||
|
||||
config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id)
|
||||
config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id, headers={
|
||||
'x-validated-age': '18'
|
||||
})
|
||||
|
||||
geoblocking = traverse_obj(config, ('data', 'attributes', 'restriction', 'geoblocking')) or {}
|
||||
if geoblocking.get('restrictedArea'):
|
||||
|
@ -169,7 +162,7 @@ def _real_extract(self, url):
|
|||
)))
|
||||
|
||||
short_label = traverse_obj(stream_version, 'shortLabel', expected_type=str, default='?')
|
||||
if stream['protocol'].startswith('HLS'):
|
||||
if 'HLS' in stream['protocol']:
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
stream['url'], video_id=video_id, ext='mp4', m3u8_id=stream_version_code, fatal=False)
|
||||
for fmt in fmts:
|
||||
|
|
|
@ -1,196 +0,0 @@
|
|||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
OnDemandPagedList,
|
||||
parse_age_limit,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class AsianCrushBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|(?:cocoro|retrocrush)\.tv))'
|
||||
_KALTURA_KEYS = [
|
||||
'video_url', 'progressive_url', 'download_url', 'thumbnail_url',
|
||||
'widescreen_thumbnail_url', 'screencap_widescreen',
|
||||
]
|
||||
_API_SUFFIX = {'retrocrush.tv': '-ott'}
|
||||
|
||||
def _call_api(self, host, endpoint, video_id, query, resource):
|
||||
return self._download_json(
|
||||
'https://api%s.%s/%s' % (self._API_SUFFIX.get(host, ''), host, endpoint), video_id,
|
||||
'Downloading %s JSON metadata' % resource, query=query,
|
||||
headers=self.geo_verification_headers())['objects']
|
||||
|
||||
def _download_object_data(self, host, object_id, resource):
|
||||
return self._call_api(
|
||||
host, 'search', object_id, {'id': object_id}, resource)[0]
|
||||
|
||||
def _get_object_description(self, obj):
|
||||
return strip_or_none(obj.get('long_description') or obj.get('short_description'))
|
||||
|
||||
def _parse_video_data(self, video):
|
||||
title = video['name']
|
||||
|
||||
entry_id, partner_id = [None] * 2
|
||||
for k in self._KALTURA_KEYS:
|
||||
k_url = video.get(k)
|
||||
if k_url:
|
||||
mobj = re.search(r'/p/(\d+)/.+?/entryId/([^/]+)/', k_url)
|
||||
if mobj:
|
||||
partner_id, entry_id = mobj.groups()
|
||||
break
|
||||
|
||||
meta_categories = try_get(video, lambda x: x['meta']['categories'], list) or []
|
||||
categories = list(filter(None, [c.get('name') for c in meta_categories]))
|
||||
|
||||
show_info = video.get('show_info') or {}
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'kaltura:%s:%s' % (partner_id, entry_id),
|
||||
'ie_key': KalturaIE.ie_key(),
|
||||
'id': entry_id,
|
||||
'title': title,
|
||||
'description': self._get_object_description(video),
|
||||
'age_limit': parse_age_limit(video.get('mpaa_rating') or video.get('tv_rating')),
|
||||
'categories': categories,
|
||||
'series': show_info.get('show_name'),
|
||||
'season_number': int_or_none(show_info.get('season_num')),
|
||||
'season_id': show_info.get('season_id'),
|
||||
'episode_number': int_or_none(show_info.get('episode_num')),
|
||||
}
|
||||
|
||||
|
||||
class AsianCrushIE(AsianCrushBaseIE):
|
||||
_VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % AsianCrushBaseIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/video/004289v/women-who-flirt',
|
||||
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
|
||||
'info_dict': {
|
||||
'id': '1_y4tmjm5r',
|
||||
'ext': 'mp4',
|
||||
'title': 'Women Who Flirt',
|
||||
'description': 'md5:b65c7e0ae03a85585476a62a186f924c',
|
||||
'timestamp': 1496936429,
|
||||
'upload_date': '20170608',
|
||||
'uploader_id': 'craig@crifkin.com',
|
||||
'age_limit': 13,
|
||||
'categories': 'count:5',
|
||||
'duration': 5812,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/video/010400v/drifters/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.retrocrush.tv/video/true-tears/012328v-i...gave-away-my-tears',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
if host == 'cocoro.tv':
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
embed_vars = self._parse_json(self._search_regex(
|
||||
r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars',
|
||||
default='{}'), video_id, fatal=False) or {}
|
||||
video_id = embed_vars.get('entry_id') or video_id
|
||||
|
||||
video = self._download_object_data(host, video_id, 'video')
|
||||
return self._parse_video_data(video)
|
||||
|
||||
|
||||
class AsianCrushPlaylistIE(AsianCrushBaseIE):
|
||||
_VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushBaseIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/series/006447s/fruity-samurai',
|
||||
'info_dict': {
|
||||
'id': '6447',
|
||||
'title': 'Fruity Samurai',
|
||||
'description': 'md5:7535174487e4a202d3872a7fc8f2f154',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/series/013920s/peep-show/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/series/016375s/mononoke/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.retrocrush.tv/series/012355s/true-tears',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PAGE_SIZE = 1000000000
|
||||
|
||||
def _fetch_page(self, domain, parent_id, page):
|
||||
videos = self._call_api(
|
||||
domain, 'getreferencedobjects', parent_id, {
|
||||
'max': self._PAGE_SIZE,
|
||||
'object_type': 'video',
|
||||
'parent_id': parent_id,
|
||||
'start': page * self._PAGE_SIZE,
|
||||
}, 'page %d' % (page + 1))
|
||||
for video in videos:
|
||||
yield self._parse_video_data(video)
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, playlist_id = self._match_valid_url(url).groups()
|
||||
|
||||
if host == 'cocoro.tv':
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = []
|
||||
|
||||
for mobj in re.finditer(
|
||||
r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
|
||||
webpage):
|
||||
attrs = extract_attributes(mobj.group(0))
|
||||
if attrs.get('class') == 'clearfix':
|
||||
entries.append(self.url_result(
|
||||
mobj.group('url'), ie=AsianCrushIE.ie_key()))
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title',
|
||||
default=None) or self._html_extract_title(webpage)
|
||||
if title:
|
||||
title = re.sub(r'\s*\|\s*.+?$', '', title)
|
||||
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:description', webpage, 'description', fatal=False)
|
||||
else:
|
||||
show = self._download_object_data(host, playlist_id, 'show')
|
||||
title = show.get('name')
|
||||
description = self._get_object_description(show)
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, host, playlist_id),
|
||||
self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
|
@ -12,7 +12,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
|||
|
||||
def _aws_execute_api(self, aws_dict, video_id, query=None):
|
||||
query = query or {}
|
||||
amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
|
||||
amz_date = datetime.datetime.now(datetime.timezone.utc).strftime('%Y%m%dT%H%M%SZ')
|
||||
date = amz_date[:8]
|
||||
headers = {
|
||||
'Accept': 'application/json',
|
||||
|
|
87
yt_dlp/extractor/axs.py
Normal file
87
yt_dlp/extractor/axs.py
Normal file
|
@ -0,0 +1,87 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AxsIE(InfoExtractor):
|
||||
IE_NAME = 'axs.tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?axs\.tv/(?:channel/(?:[^/?#]+/)+)?video/(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.axs.tv/video/5f4dc776b70e4f1c194f22ef/',
|
||||
'md5': '8d97736ae8e50c64df528e5e676778cf',
|
||||
'info_dict': {
|
||||
'id': '5f4dc776b70e4f1c194f22ef',
|
||||
'title': 'Small Town',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:e314d28bfaa227a4d7ec965fae19997f',
|
||||
'upload_date': '20230602',
|
||||
'timestamp': 1685729564,
|
||||
'duration': 1284.216,
|
||||
'series': 'Rock & Roll Road Trip with Sammy Hagar',
|
||||
'season': 2,
|
||||
'episode': '3',
|
||||
'thumbnail': 'https://images.dotstudiopro.com/5f4e9d330a0c3b295a7e8394',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.axs.tv/channel/rock-star-interview/video/daryl-hall',
|
||||
'md5': '300ae795cd8f9984652c0949734ffbdc',
|
||||
'info_dict': {
|
||||
'id': '5f488148b70e4f392572977c',
|
||||
'display_id': 'daryl-hall',
|
||||
'title': 'Daryl Hall',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:e54ecaa0f4b5683fc9259e9e4b196628',
|
||||
'upload_date': '20230214',
|
||||
'timestamp': 1676403615,
|
||||
'duration': 2570.668,
|
||||
'series': 'The Big Interview with Dan Rather',
|
||||
'season': 3,
|
||||
'episode': '5',
|
||||
'thumbnail': 'https://images.dotstudiopro.com/5f4d1901f340b50d937cec32',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
webpage_json_data = self._search_json(
|
||||
r'mountObj\s*=', webpage, 'video ID data', display_id,
|
||||
transform_source=js_to_json)
|
||||
video_id = webpage_json_data['video_id']
|
||||
company_id = webpage_json_data['company_id']
|
||||
|
||||
meta = self._download_json(
|
||||
f'https://api.myspotlight.tv/dotplayer/video/{company_id}/{video_id}',
|
||||
video_id, query={'device_type': 'desktop_web'})['video']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
meta['video_m3u8'], video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
subtitles = {}
|
||||
for cc in traverse_obj(meta, ('closeCaption', lambda _, v: url_or_none(v['srtPath']))):
|
||||
subtitles.setdefault(cc.get('srtShortLang') or 'en', []).append(
|
||||
{'ext': cc.get('srtExt'), 'url': cc['srtPath']})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(meta, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'series': ('seriestitle', {str}),
|
||||
'season': ('season', {int}),
|
||||
'episode': ('episode', {str}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'timestamp': ('updated_at', {parse_iso8601}),
|
||||
'thumbnail': ('thumb', {url_or_none}),
|
||||
}),
|
||||
'subtitles': subtitles,
|
||||
}
|
|
@ -31,7 +31,7 @@ def _extract_playlist(self, playlist_id):
|
|||
|
||||
|
||||
class BanByeIE(BanByeBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?watch/(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?banbye\.com/(?:en/)?watch/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T',
|
||||
'md5': '2f4ea15c5ca259a73d909b2cfd558eb5',
|
||||
|
@ -59,7 +59,27 @@ class BanByeIE(BanByeBaseIE):
|
|||
'title': 'Krzysztof Karoń',
|
||||
'id': 'p_Ld82N6gBw_OJ',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
'playlist_mincount': 9,
|
||||
}, {
|
||||
'url': 'https://banbye.com/watch/v_kb6_o1Kyq-CD',
|
||||
'info_dict': {
|
||||
'id': 'v_kb6_o1Kyq-CD',
|
||||
'ext': 'mp4',
|
||||
'title': 'Co tak naprawdę dzieje się we Francji?! Czy Warszawa a potem cała Polska będzie drugim Paryżem?!🤔🇵🇱',
|
||||
'description': 'md5:82be4c0e13eae8ea1ca8b9f2e07226a8',
|
||||
'uploader': 'Marcin Rola - MOIM ZDANIEM!🇵🇱',
|
||||
'channel_id': 'ch_QgWnHvDG2fo5',
|
||||
'channel_url': 'https://banbye.com/channel/ch_QgWnHvDG2fo5',
|
||||
'duration': 597,
|
||||
'timestamp': 1688642656,
|
||||
'upload_date': '20230706',
|
||||
'thumbnail': 'https://cdn.banbye.com/video/v_kb6_o1Kyq-CD/96.webp',
|
||||
'tags': ['Paryż', 'Francja', 'Polska', 'Imigranci', 'Morawiecki', 'Tusk'],
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -100,7 +120,7 @@ def _real_extract(self, url):
|
|||
|
||||
|
||||
class BanByeChannelIE(BanByeBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?channel/(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?banbye\.com/(?:en/)?channel/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://banbye.com/channel/ch_wrealu24',
|
||||
'info_dict': {
|
||||
|
|
|
@ -15,11 +15,13 @@
|
|||
float_or_none,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
|
@ -41,7 +43,6 @@ class BBCCoUkIE(InfoExtractor):
|
|||
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
|
||||
music/(?:clips|audiovideo/popular)[/#]|
|
||||
radio/player/|
|
||||
sounds/play/|
|
||||
events/[^/]+/play/[^/]+/
|
||||
)
|
||||
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
||||
|
@ -218,20 +219,6 @@ class BBCCoUkIE(InfoExtractor):
|
|||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
|
||||
'note': 'Audio',
|
||||
'info_dict': {
|
||||
'id': 'm0007jz9',
|
||||
'ext': 'mp4',
|
||||
'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra',
|
||||
'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
|
||||
'duration': 9840,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||
'only_matching': True,
|
||||
|
@ -844,6 +831,20 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'upload_date': '20190604',
|
||||
'categories': ['Psychology'],
|
||||
},
|
||||
}, {
|
||||
# BBC Sounds
|
||||
'url': 'https://www.bbc.co.uk/sounds/play/m001q78b',
|
||||
'info_dict': {
|
||||
'id': 'm001q789',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Night Tracks Mix - Music for the darkling hour',
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg',
|
||||
'chapters': 'count:8',
|
||||
'description': 'md5:815fb51cbdaa270040aab8145b3f1d67',
|
||||
'uploader': 'Radio 3',
|
||||
'duration': 1800,
|
||||
'uploader_id': 'bbc_radio_three',
|
||||
},
|
||||
}, { # onion routes
|
||||
'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
|
||||
'only_matching': True,
|
||||
|
@ -1128,6 +1129,13 @@ def _real_extract(self, url):
|
|||
'uploader_id': network.get('id'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'chapters': traverse_obj(preload_state, (
|
||||
'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), {
|
||||
'title': ('titles', {lambda x: join_nonempty(
|
||||
'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
|
||||
'start_time': ('offset', 'start', {float_or_none}),
|
||||
'end_time': ('offset', 'end', {float_or_none}),
|
||||
})) or None,
|
||||
}
|
||||
|
||||
bbc3_config = self._parse_json(
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
@ -8,7 +9,8 @@
|
|||
class BildIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html'
|
||||
IE_DESC = 'Bild.de'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'note': 'static MP4 only',
|
||||
'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html',
|
||||
'md5': 'dd495cbd99f2413502a1713a1156ac8a',
|
||||
'info_dict': {
|
||||
|
@ -19,7 +21,19 @@ class BildIE(InfoExtractor):
|
|||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 196,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'note': 'static MP4 and HLS',
|
||||
'url': 'https://www.bild.de/video/clip/news-ausland/deftiger-abgang-vom-10m-turm-bademeister-sorgt-fuer-skandal-85158620.bild.html',
|
||||
'md5': 'fb0ed4f09c495d4ba7ce2eee0bb90de1',
|
||||
'info_dict': {
|
||||
'id': '85158620',
|
||||
'ext': 'mp4',
|
||||
'title': 'Der Sprungturm-Skandal',
|
||||
'description': 'md5:709b543c24dc31bbbffee73bccda34ad',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 69,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@ -27,11 +41,23 @@ def _real_extract(self, url):
|
|||
video_data = self._download_json(
|
||||
url.split('.bild.html')[0] + ',view=json.bild.html', video_id)
|
||||
|
||||
formats = []
|
||||
for src in traverse_obj(video_data, ('clipList', 0, 'srces', lambda _, v: v['src'])):
|
||||
src_type = src.get('type')
|
||||
if src_type == 'application/x-mpegURL':
|
||||
formats.extend(
|
||||
self._extract_m3u8_formats(
|
||||
src['src'], video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
elif src_type == 'video/mp4':
|
||||
formats.append({'url': src['src'], 'format_id': 'http-mp4'})
|
||||
else:
|
||||
self.report_warning(f'Skipping unsupported format type: "{src_type}"')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': unescapeHTML(video_data['title']).strip(),
|
||||
'description': unescapeHTML(video_data.get('description')),
|
||||
'url': video_data['clipList'][0]['srces'][0]['src'],
|
||||
'formats': formats,
|
||||
'thumbnail': video_data.get('poster'),
|
||||
'duration': int_or_none(video_data.get('durationSec')),
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
import hashlib
|
||||
import itertools
|
||||
import math
|
||||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
|
||||
|
@ -14,6 +15,7 @@
|
|||
GeoRestrictedError,
|
||||
InAdvancePagedList,
|
||||
OnDemandPagedList,
|
||||
bool_or_none,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
format_field,
|
||||
|
@ -34,27 +36,31 @@
|
|||
unsmuggle_url,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
variadic,
|
||||
)
|
||||
|
||||
|
||||
class BilibiliBaseIE(InfoExtractor):
|
||||
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
|
||||
|
||||
def extract_formats(self, play_info):
|
||||
format_names = {
|
||||
r['quality']: traverse_obj(r, 'new_description', 'display_desc')
|
||||
for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
|
||||
}
|
||||
|
||||
audios = traverse_obj(play_info, ('dash', 'audio', ...))
|
||||
audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
|
||||
flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
|
||||
if flac_audio:
|
||||
audios.append(flac_audio)
|
||||
formats = [{
|
||||
'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
|
||||
'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
|
||||
'acodec': audio.get('codecs'),
|
||||
'acodec': traverse_obj(audio, ('codecs', {str.lower})),
|
||||
'vcodec': 'none',
|
||||
'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
|
||||
'filesize': int_or_none(audio.get('size'))
|
||||
'filesize': int_or_none(audio.get('size')),
|
||||
'format_id': str_or_none(audio.get('id')),
|
||||
} for audio in audios]
|
||||
|
||||
formats.extend({
|
||||
|
@ -65,9 +71,13 @@ def extract_formats(self, play_info):
|
|||
'height': int_or_none(video.get('height')),
|
||||
'vcodec': video.get('codecs'),
|
||||
'acodec': 'none' if audios else None,
|
||||
'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
|
||||
'tbr': float_or_none(video.get('bandwidth'), scale=1000),
|
||||
'filesize': int_or_none(video.get('size')),
|
||||
'quality': int_or_none(video.get('id')),
|
||||
'format_id': traverse_obj(
|
||||
video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
|
||||
('id', {str_or_none}), get_all=False),
|
||||
'format': format_names.get(video.get('id')),
|
||||
} for video in traverse_obj(play_info, ('dash', 'video', ...)))
|
||||
|
||||
|
@ -149,7 +159,7 @@ def _get_episodes_from_season(self, ss_id, url):
|
|||
|
||||
|
||||
class BiliBiliIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/video/BV13x41117TL',
|
||||
|
@ -245,7 +255,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
|||
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
|
||||
'duration': 313.557,
|
||||
'upload_date': '20220709',
|
||||
'uploader': '小夫Tech',
|
||||
'uploader': '小夫太渴',
|
||||
'timestamp': 1657347907,
|
||||
'uploader_id': '1326814124',
|
||||
'comment_count': int,
|
||||
|
@ -502,7 +512,7 @@ def _real_extract(self, url):
|
|||
|
||||
|
||||
class BiliBiliBangumiMediaIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/bangumi/media/md24097891',
|
||||
'info_dict': {
|
||||
|
@ -521,7 +531,7 @@ def _real_extract(self, url):
|
|||
|
||||
|
||||
class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/ss(?P<id>\d+)'
|
||||
_VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ss26801',
|
||||
'info_dict': {
|
||||
|
@ -672,13 +682,35 @@ def get_entries(page_data):
|
|||
return self.playlist_result(paged_list, playlist_id)
|
||||
|
||||
|
||||
class BilibiliSpacePlaylistIE(BilibiliSpaceBaseIE):
|
||||
_VALID_URL = r'https?://space.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail\?sid=(?P<sid>\d+)'
|
||||
class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
|
||||
def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
|
||||
for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
|
||||
yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
|
||||
|
||||
def _get_uploader(self, uid, playlist_id):
|
||||
webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
|
||||
return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
|
||||
|
||||
def _extract_playlist(self, fetch_page, get_metadata, get_entries):
|
||||
metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||
metadata.pop('page_count', None)
|
||||
metadata.pop('page_size', None)
|
||||
return metadata, page_list
|
||||
|
||||
|
||||
class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
|
||||
'info_dict': {
|
||||
'id': '2142762_57445',
|
||||
'title': '《底特律 变人》'
|
||||
'title': '【完结】《底特律 变人》全结局流程解说',
|
||||
'description': '',
|
||||
'uploader': '老戴在此',
|
||||
'uploader_id': '2142762',
|
||||
'timestamp': int,
|
||||
'upload_date': str,
|
||||
'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
|
||||
},
|
||||
'playlist_mincount': 31,
|
||||
}]
|
||||
|
@ -699,22 +731,251 @@ def get_metadata(page_data):
|
|||
return {
|
||||
'page_count': math.ceil(entry_count / page_size),
|
||||
'page_size': page_size,
|
||||
'title': traverse_obj(page_data, ('meta', 'name'))
|
||||
'uploader': self._get_uploader(mid, playlist_id),
|
||||
**traverse_obj(page_data, {
|
||||
'title': ('meta', 'name', {str}),
|
||||
'description': ('meta', 'description', {str}),
|
||||
'uploader_id': ('meta', 'mid', {str_or_none}),
|
||||
'timestamp': ('meta', 'ptime', {int_or_none}),
|
||||
'thumbnail': ('meta', 'cover', {url_or_none}),
|
||||
})
|
||||
}
|
||||
|
||||
def get_entries(page_data):
|
||||
for entry in page_data.get('archives', []):
|
||||
yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}',
|
||||
BiliBiliIE, entry['bvid'])
|
||||
return self._get_entries(page_data, 'archives')
|
||||
|
||||
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||
return self.playlist_result(paged_list, playlist_id, metadata['title'])
|
||||
return self.playlist_result(paged_list, playlist_id, **metadata)
|
||||
|
||||
|
||||
class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
|
||||
'info_dict': {
|
||||
'id': '1958703906_547718',
|
||||
'title': '直播回放',
|
||||
'description': '直播回放',
|
||||
'uploader': '靡烟miya',
|
||||
'uploader_id': '1958703906',
|
||||
'timestamp': 1637985853,
|
||||
'upload_date': '20211127',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
},
|
||||
'playlist_mincount': 513,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mid, sid = self._match_valid_url(url).group('mid', 'sid')
|
||||
playlist_id = f'{mid}_{sid}'
|
||||
playlist_meta = traverse_obj(self._download_json(
|
||||
f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
|
||||
), {
|
||||
'title': ('data', 'meta', 'name', {str}),
|
||||
'description': ('data', 'meta', 'description', {str}),
|
||||
'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
|
||||
'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
|
||||
'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
|
||||
})
|
||||
|
||||
def fetch_page(page_idx):
|
||||
return self._download_json(
|
||||
'https://api.bilibili.com/x/series/archives',
|
||||
playlist_id, note=f'Downloading page {page_idx}',
|
||||
query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
|
||||
|
||||
def get_metadata(page_data):
|
||||
page_size = page_data['page']['size']
|
||||
entry_count = page_data['page']['total']
|
||||
return {
|
||||
'page_count': math.ceil(entry_count / page_size),
|
||||
'page_size': page_size,
|
||||
'uploader': self._get_uploader(mid, playlist_id),
|
||||
**playlist_meta
|
||||
}
|
||||
|
||||
def get_entries(page_data):
|
||||
return self._get_entries(page_data, 'archives')
|
||||
|
||||
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||
return self.playlist_result(paged_list, playlist_id, **metadata)
|
||||
|
||||
|
||||
class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
|
||||
'info_dict': {
|
||||
'id': '1103407912',
|
||||
'title': '【V2】(旧)',
|
||||
'description': '',
|
||||
'uploader': '晓月春日',
|
||||
'uploader_id': '84912',
|
||||
'timestamp': 1604905176,
|
||||
'upload_date': '20201109',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
'playlist_mincount': 22,
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
fid = self._match_id(url)
|
||||
|
||||
list_info = self._download_json(
|
||||
f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
|
||||
fid, note='Downloading favlist metadata')
|
||||
if list_info['code'] == -403:
|
||||
self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
|
||||
|
||||
entries = self._get_entries(self._download_json(
|
||||
f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
|
||||
fid, note='Download favlist entries'), 'data')
|
||||
|
||||
return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
|
||||
'title': ('title', {str}),
|
||||
'description': ('intro', {str}),
|
||||
'uploader': ('upper', 'name', {str}),
|
||||
'uploader_id': ('upper', 'mid', {str_or_none}),
|
||||
'timestamp': ('ctime', {int_or_none}),
|
||||
'modified_timestamp': ('mtime', {int_or_none}),
|
||||
'thumbnail': ('cover', {url_or_none}),
|
||||
'view_count': ('cnt_info', 'play', {int_or_none}),
|
||||
'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
|
||||
})))
|
||||
|
||||
|
||||
class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/watchlater/#/list',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
|
||||
watchlater_info = self._download_json(
|
||||
'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
|
||||
if watchlater_info['code'] == -101:
|
||||
self.raise_login_required(msg='You need to login to access your watchlater list')
|
||||
entries = self._get_entries(watchlater_info, ('data', 'list'))
|
||||
return self.playlist_result(entries, id=list_id, title='稍后再看')
|
||||
|
||||
|
||||
class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
|
||||
'info_dict': {
|
||||
'id': '5_547718',
|
||||
'title': '直播回放',
|
||||
'uploader': '靡烟miya',
|
||||
'uploader_id': '1958703906',
|
||||
'timestamp': 1637985853,
|
||||
'upload_date': '20211127',
|
||||
},
|
||||
'playlist_mincount': 513,
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
|
||||
'info_dict': {
|
||||
'id': '5_547718',
|
||||
},
|
||||
'playlist_mincount': 513,
|
||||
'skip': 'redirect url',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/list/ml1103407912',
|
||||
'info_dict': {
|
||||
'id': '3_1103407912',
|
||||
'title': '【V2】(旧)',
|
||||
'uploader': '晓月春日',
|
||||
'uploader_id': '84912',
|
||||
'timestamp': 1604905176,
|
||||
'upload_date': '20201109',
|
||||
'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
|
||||
},
|
||||
'playlist_mincount': 22,
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
|
||||
'info_dict': {
|
||||
'id': '3_1103407912',
|
||||
},
|
||||
'playlist_mincount': 22,
|
||||
'skip': 'redirect url',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/list/watchlater',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/play/watchlater',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
}]
|
||||
|
||||
def _extract_medialist(self, query, list_id):
|
||||
for page_num in itertools.count(1):
|
||||
page_data = self._download_json(
|
||||
'https://api.bilibili.com/x/v2/medialist/resource/list',
|
||||
list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
|
||||
)['data']
|
||||
yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
|
||||
query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
|
||||
if not page_data.get('has_more', False):
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, list_id)
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
|
||||
if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
|
||||
error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
|
||||
error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
|
||||
if error_code == -400 and list_id == 'watchlater':
|
||||
self.raise_login_required('You need to login to access your watchlater playlist')
|
||||
elif error_code == -403:
|
||||
self.raise_login_required('This is a private playlist. You need to login as its owner')
|
||||
elif error_code == 11010:
|
||||
raise ExtractorError('Playlist is no longer available', expected=True)
|
||||
raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
|
||||
|
||||
query = {
|
||||
'ps': 20,
|
||||
'with_current': False,
|
||||
**traverse_obj(initial_state, {
|
||||
'type': ('playlist', 'type', {int_or_none}),
|
||||
'biz_id': ('playlist', 'id', {int_or_none}),
|
||||
'tid': ('tid', {int_or_none}),
|
||||
'sort_field': ('sortFiled', {int_or_none}),
|
||||
'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
|
||||
})
|
||||
}
|
||||
metadata = {
|
||||
'id': f'{query["type"]}_{query["biz_id"]}',
|
||||
**traverse_obj(initial_state, ('mediaListInfo', {
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('upper', 'name', {str}),
|
||||
'uploader_id': ('upper', 'mid', {str_or_none}),
|
||||
'timestamp': ('ctime', {int_or_none}),
|
||||
'thumbnail': ('cover', {url_or_none}),
|
||||
})),
|
||||
}
|
||||
return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
|
||||
|
||||
|
||||
class BilibiliCategoryIE(InfoExtractor):
|
||||
IE_NAME = 'Bilibili category extractor'
|
||||
_MAX_RESULTS = 1000000
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/v/kichiku/mad',
|
||||
'info_dict': {
|
||||
|
@ -1399,7 +1660,7 @@ def _real_extract(self, url):
|
|||
|
||||
|
||||
class BiliLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://live.bilibili.com/(?:blanc/)?(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://live.bilibili.com/196',
|
||||
|
|
|
@ -1,56 +1,170 @@
|
|||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_text_and_html_by_tag,
|
||||
get_elements_by_class,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
determine_ext,
|
||||
mimetype2ext,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
def html_get_element(tag=None, cls=None):
|
||||
assert tag or cls, 'One of tag or class is required'
|
||||
|
||||
if cls:
|
||||
func = functools.partial(get_elements_by_class, cls, tag=tag)
|
||||
else:
|
||||
func = functools.partial(get_element_text_and_html_by_tag, tag)
|
||||
|
||||
def html_get_element_wrapper(html):
|
||||
return variadic(func(html))[0]
|
||||
|
||||
return html_get_element_wrapper
|
||||
|
||||
|
||||
class BpbIE(InfoExtractor):
|
||||
IE_DESC = 'Bundeszentrale für politische Bildung'
|
||||
_VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P<id>[0-9]+)/'
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?bpb\.de/(?:[^/?#]+/)*(?P<id>\d+)(?:[/?#]|$)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
|
||||
'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f',
|
||||
'info_dict': {
|
||||
'id': '297',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Kooperative Berlin',
|
||||
'description': 'md5:f4f75885ba009d3e2b156247a8941ce6',
|
||||
'release_date': '20160115',
|
||||
'series': 'Interview auf dem Geschichtsforum 1989 | 2009',
|
||||
'tags': ['Friedliche Revolution', 'Erinnerungskultur', 'Vergangenheitspolitik', 'DDR 1949 - 1990', 'Freiheitsrecht', 'BStU', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/7/297_teaser_16x9_1240.jpg?8839D',
|
||||
'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
|
||||
'description': 'Joachim Gauck, erster Beauftragter für die Stasi-Unterlagen, spricht auf dem Geschichtsforum über die friedliche Revolution 1989 und eine "gewisse Traurigkeit" im Umgang mit der DDR-Vergangenheit.'
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/mediathek/video/522184/krieg-flucht-und-falschmeldungen-wirstattdesinformation-2/',
|
||||
'info_dict': {
|
||||
'id': '522184',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
|
||||
'description': 'md5:f83c795ff8f825a69456a9e51fc15903',
|
||||
'release_date': '20230621',
|
||||
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/4/522184_teaser_16x9_1240.png?EABFB',
|
||||
'title': 'md5:9b01ccdbf58dbf9e5c9f6e771a803b1c',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/lernen/bewegtbild-und-politische-bildung/webvideo/518789/krieg-flucht-und-falschmeldungen-wirstattdesinformation-1/',
|
||||
'info_dict': {
|
||||
'id': '518789',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
|
||||
'description': 'md5:85228aed433e84ff0ff9bc582abd4ea8',
|
||||
'release_date': '20230302',
|
||||
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/9/518789_teaser_16x9_1240.jpeg?56D0D',
|
||||
'title': 'md5:3e956f264bb501f6383f10495a401da4',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/mediathek/podcasts/apuz-podcast/539727/apuz-20-china/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/mediathek/audio/315813/folge-1-eine-einfuehrung/',
|
||||
'info_dict': {
|
||||
'id': '315813',
|
||||
'ext': 'mp3',
|
||||
'creator': 'Axel Schröder',
|
||||
'description': 'md5:eda9d1af34e5912efef5baf54fba4427',
|
||||
'release_date': '20200921',
|
||||
'series': 'Auf Endlagersuche. Der deutsche Weg zu einem sicheren Atommülllager',
|
||||
'tags': ['Atomenergie', 'Endlager', 'hoch-radioaktiver Abfall', 'Endlagersuche', 'Atommüll', 'Atomendlager', 'Gorleben', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/3/315813_teaser_16x9_1240.png?92A94',
|
||||
'title': 'Folge 1: Eine Einführung',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/517806/die-weltanschauung-der-neuen-rechten/',
|
||||
'info_dict': {
|
||||
'id': '517806',
|
||||
'ext': 'mp3',
|
||||
'creator': 'Bundeszentrale für politische Bildung',
|
||||
'description': 'md5:594689600e919912aade0b2871cc3fed',
|
||||
'release_date': '20230127',
|
||||
'series': 'Vorträge des Fachtags "Modernisierer. Grenzgänger. Anstifter. Sechs Jahrzehnte \'Neue Rechte\'"',
|
||||
'tags': ['Rechtsextremismus', 'Konservatismus', 'Konservativismus', 'neue Rechte', 'Rechtspopulismus', 'Schnellroda', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/6/517806_teaser_16x9_1240.png?7A7A0',
|
||||
'title': 'Die Weltanschauung der "Neuen Rechten"',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/mediathek/reihen/zahlen-und-fakten-soziale-situation-filme/520153/zahlen-und-fakten-die-soziale-situation-in-deutschland-migration/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_TITLE_RE = re.compile('(?P<title>[^<]*)<[^>]+>(?P<series>[^<]*)')
|
||||
|
||||
def _parse_vue_attributes(self, name, string, video_id):
|
||||
attributes = extract_attributes(self._search_regex(rf'(<{name}(?:"[^"]*?"|[^>])*>)', string, name))
|
||||
|
||||
for key, value in attributes.items():
|
||||
if key.startswith(':'):
|
||||
attributes[key] = self._parse_json(value, video_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
return attributes
|
||||
|
||||
@staticmethod
|
||||
def _process_source(source):
|
||||
url = url_or_none(source['src'])
|
||||
if not url:
|
||||
return None
|
||||
|
||||
source_type = source.get('type', '')
|
||||
extension = mimetype2ext(source_type)
|
||||
is_video = source_type.startswith('video')
|
||||
note = url.rpartition('.')[0].rpartition('_')[2] if is_video else None
|
||||
|
||||
return {
|
||||
'url': url,
|
||||
'ext': extension,
|
||||
'vcodec': None if is_video else 'none',
|
||||
'quality': 10 if note == 'high' else 0,
|
||||
'format_note': note,
|
||||
'format_id': join_nonempty(extension, note),
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h2 class="white">(.*?)</h2>', webpage, 'title')
|
||||
video_info_dicts = re.findall(
|
||||
r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage)
|
||||
|
||||
formats = []
|
||||
for video_info in video_info_dicts:
|
||||
video_info = self._parse_json(
|
||||
video_info, video_id, transform_source=js_to_json, fatal=False)
|
||||
if not video_info:
|
||||
continue
|
||||
video_url = video_info.get('src')
|
||||
if not video_url:
|
||||
continue
|
||||
quality = 'high' if '_high' in video_url else 'low'
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'quality': 10 if quality == 'high' else 0,
|
||||
'format_note': quality,
|
||||
'format_id': '%s-%s' % (quality, determine_ext(video_url)),
|
||||
})
|
||||
title_result = traverse_obj(webpage, ({html_get_element(cls='opening-header__title')}, {self._TITLE_RE.match}))
|
||||
json_lds = list(self._yield_json_ld(webpage, video_id, fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'title': traverse_obj(title_result, ('title', {str.strip})) or None,
|
||||
# This metadata could be interpreted otherwise, but it fits "series" the most
|
||||
'series': traverse_obj(title_result, ('series', {str.strip})) or None,
|
||||
'description': join_nonempty(*traverse_obj(webpage, [(
|
||||
{html_get_element(cls='opening-intro')},
|
||||
[{html_get_element(tag='bpb-accordion-item')}, {html_get_element(cls='text-content')}],
|
||||
), {clean_html}]), delim='\n\n') or None,
|
||||
'creator': self._html_search_meta('author', webpage),
|
||||
'uploader': self._html_search_meta('publisher', webpage),
|
||||
'release_date': unified_strdate(self._html_search_meta('date', webpage)),
|
||||
'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)),
|
||||
**traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), {
|
||||
'formats': (':sources', ..., {self._process_source}),
|
||||
'thumbnail': ('poster', {lambda x: urljoin(url, x)}),
|
||||
}),
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
|
||||
class BreitBartIE(InfoExtractor):
|
||||
_VALID_URL = r'https?:\/\/(?:www\.)breitbart.com/videos/v/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?breitbart\.com/videos/v/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.breitbart.com/videos/v/5cOz1yup/?pl=Ij6NDOji',
|
||||
'md5': '0aa6d1d6e183ac5ca09207fe49f17ade',
|
||||
|
|
127
yt_dlp/extractor/brilliantpala.py
Normal file
127
yt_dlp/extractor/brilliantpala.py
Normal file
|
@ -0,0 +1,127 @@
|
|||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
traverse_obj,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class BrilliantpalaBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'brilliantpala'
|
||||
_DOMAIN = '{subdomain}.brilliantpala.org'
|
||||
|
||||
def _initialize_pre_login(self):
|
||||
self._HOMEPAGE = f'https://{self._DOMAIN}'
|
||||
self._LOGIN_API = f'{self._HOMEPAGE}/login/'
|
||||
self._LOGOUT_DEVICES_API = f'{self._HOMEPAGE}/logout_devices/?next=/'
|
||||
self._CONTENT_API = f'{self._HOMEPAGE}/api/v2.4/contents/{{content_id}}/'
|
||||
self._HLS_AES_URI = f'{self._HOMEPAGE}/api/v2.5/video_contents/{{content_id}}/key/'
|
||||
|
||||
def _get_logged_in_username(self, url, video_id):
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
if self._LOGIN_API == urlh.url:
|
||||
self.raise_login_required()
|
||||
return self._html_search_regex(
|
||||
r'"username"\s*:\s*"(?P<username>[^"]+)"', webpage, 'stream page info', 'username')
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_form = self._hidden_inputs(self._download_webpage(
|
||||
self._LOGIN_API, None, 'Downloading login page'))
|
||||
login_form.update({
|
||||
'username': username,
|
||||
'password': password,
|
||||
})
|
||||
self._set_cookie(self._DOMAIN, 'csrftoken', login_form['csrfmiddlewaretoken'])
|
||||
|
||||
logged_page = self._download_webpage(
|
||||
self._LOGIN_API, None, note='Logging in', headers={'Referer': self._LOGIN_API},
|
||||
data=urlencode_postdata(login_form))
|
||||
|
||||
if self._html_search_regex(
|
||||
r'(Your username / email and password)', logged_page, 'auth fail', default=None):
|
||||
raise ExtractorError('wrong username or password', expected=True)
|
||||
|
||||
# the maximum number of logins is one
|
||||
if self._html_search_regex(
|
||||
r'(Logout Other Devices)', logged_page, 'logout devices button', default=None):
|
||||
logout_device_form = self._hidden_inputs(logged_page)
|
||||
self._download_webpage(
|
||||
self._LOGOUT_DEVICES_API, None, headers={'Referer': self._LOGIN_API},
|
||||
note='Logging out other devices', data=urlencode_postdata(logout_device_form))
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_id, content_id = self._match_valid_url(url).group('course_id', 'content_id')
|
||||
video_id = f'{course_id}-{content_id}'
|
||||
|
||||
username = self._get_logged_in_username(url, video_id)
|
||||
|
||||
content_json = self._download_json(
|
||||
self._CONTENT_API.format(content_id=content_id), video_id,
|
||||
note='Fetching content info', errnote='Unable to fetch content info')
|
||||
|
||||
entries = []
|
||||
for stream in traverse_obj(content_json, ('video', 'streams', lambda _, v: v['id'] and v['url'])):
|
||||
formats = self._extract_m3u8_formats(stream['url'], video_id, fatal=False)
|
||||
if not formats:
|
||||
continue
|
||||
entries.append({
|
||||
'id': str(stream['id']),
|
||||
'title': content_json.get('title'),
|
||||
'formats': formats,
|
||||
'hls_aes': {'uri': self._HLS_AES_URI.format(content_id=content_id)},
|
||||
'http_headers': {'X-Key': hashlib.sha256(username.encode('ascii')).hexdigest()},
|
||||
'thumbnail': content_json.get('cover_image'),
|
||||
})
|
||||
|
||||
return self.playlist_result(
|
||||
entries, playlist_id=video_id, playlist_title=content_json.get('title'))
|
||||
|
||||
|
||||
class BrilliantpalaElearnIE(BrilliantpalaBaseIE):
|
||||
IE_NAME = 'Brilliantpala:Elearn'
|
||||
IE_DESC = 'VoD on elearn.brilliantpala.org'
|
||||
_VALID_URL = r'https?://elearn\.brilliantpala\.org/courses/(?P<course_id>\d+)/contents/(?P<content_id>\d+)/?'
|
||||
_TESTS = [{
|
||||
'url': 'https://elearn.brilliantpala.org/courses/42/contents/12345/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://elearn.brilliantpala.org/courses/98/contents/36683/',
|
||||
'info_dict': {
|
||||
'id': '23577',
|
||||
'ext': 'mp4',
|
||||
'title': 'Physical World, Units and Measurements - 1',
|
||||
'thumbnail': 'https://d1j3vi2u94ebt0.cloudfront.net/institute/brilliantpalalms/chapter_contents/26237/e657f81b90874be19795c7ea081f8d5c.png',
|
||||
'live_status': 'not_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_DOMAIN = BrilliantpalaBaseIE._DOMAIN.format(subdomain='elearn')
|
||||
|
||||
|
||||
class BrilliantpalaClassesIE(BrilliantpalaBaseIE):
|
||||
IE_NAME = 'Brilliantpala:Classes'
|
||||
IE_DESC = 'VoD on classes.brilliantpala.org'
|
||||
_VALID_URL = r'https?://classes\.brilliantpala\.org/courses/(?P<course_id>\d+)/contents/(?P<content_id>\d+)/?'
|
||||
_TESTS = [{
|
||||
'url': 'https://classes.brilliantpala.org/courses/42/contents/12345/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://classes.brilliantpala.org/courses/416/contents/25445/',
|
||||
'info_dict': {
|
||||
'id': '9128',
|
||||
'ext': 'mp4',
|
||||
'title': 'Motion in a Straight Line - Class 1',
|
||||
'thumbnail': 'https://d3e4y8hquds3ek.cloudfront.net/institute/brilliantpalaelearn/chapter_contents/ff5ba838d0ec43419f67387fe1a01fa8.png',
|
||||
'live_status': 'not_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_DOMAIN = BrilliantpalaBaseIE._DOMAIN.format(subdomain='classes')
|
39
yt_dlp/extractor/canal1.py
Normal file
39
yt_dlp/extractor/canal1.py
Normal file
|
@ -0,0 +1,39 @@
|
|||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class Canal1IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|noticias\.)?canal1\.com\.co/(?:[^?#&])+/(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://canal1.com.co/noticias/napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco/',
|
||||
'info_dict': {
|
||||
'id': '63b39f6b354977084b85ab54',
|
||||
'display_id': 'napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco',
|
||||
'title': 'Ñapa I Una cadena de producción de arroz que se quedó en veremos y abandonada en el departamento del Chocó',
|
||||
'description': 'md5:bc49c6d64d20610ea1e7daf079a0d013',
|
||||
'thumbnail': r're:^https?://[^?#]+63b39f6b354977084b85ab54',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://noticias.canal1.com.co/noticias/tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter/',
|
||||
'info_dict': {
|
||||
'id': '63b39e93f5fd223aa32250fb',
|
||||
'display_id': 'tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter',
|
||||
'title': 'Tres I El triste récord que impuso Elon Musk, el dueño de Tesla y de Twitter',
|
||||
'description': 'md5:d9f691f131a21ce6767ca6c05d17d791',
|
||||
'thumbnail': r're:^https?://[^?#]+63b39e93f5fd223aa32250fb',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
# Geo-restricted to Colombia
|
||||
'url': 'https://canal1.com.co/programas/guerreros-canal-1/video-inedito-guerreros-despedida-kewin-zarate/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
return self.url_result(
|
||||
self._search_regex(r'"embedUrl"\s*:\s*"([^"]+)', webpage, 'embed url'),
|
||||
display_id=display_id, url_transparent=True)
|
136
yt_dlp/extractor/caracoltv.py
Normal file
136
yt_dlp/extractor/caracoltv.py
Normal file
|
@ -0,0 +1,136 @@
|
|||
import base64
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
traverse_obj,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class CaracolTvPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://play\.caracoltv\.com/videoDetails/(?P<id>[^/?#]+)'
|
||||
_NETRC_MACHINE = 'caracoltv-play'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://play.caracoltv.com/videoDetails/OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
|
||||
'info_dict': {
|
||||
'id': 'OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
|
||||
'title': 'La teoría del promedio',
|
||||
'description': 'md5:1cdd6d2c13f19ef0d9649ab81a023ac3',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}, {
|
||||
'url': 'https://play.caracoltv.com/videoDetails/OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==/ella?season=0',
|
||||
'info_dict': {
|
||||
'id': 'OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==',
|
||||
'title': 'Ella',
|
||||
'description': 'md5:a639b1feb5ddcc0cff92a489b4e544b8',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'https://play.caracoltv.com/videoDetails/OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==/la-vuelta-al-mundo-en-80-risas-2022?season=0',
|
||||
'info_dict': {
|
||||
'id': 'OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==',
|
||||
'title': 'La vuelta al mundo en 80 risas 2022',
|
||||
'description': 'md5:e97aac36106e5c37ebf947b3350106a4',
|
||||
},
|
||||
'playlist_count': 17,
|
||||
}, {
|
||||
'url': 'https://play.caracoltv.com/videoDetails/MzoxX3BwbjRmNjB1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USER_TOKEN = None
|
||||
|
||||
def _extract_app_token(self, webpage):
|
||||
config_js_path = self._search_regex(
|
||||
r'<script[^>]+src\s*=\s*"([^"]+coreConfig.js[^"]+)', webpage, 'config js url', fatal=False)
|
||||
|
||||
mediation_config = {} if not config_js_path else self._search_json(
|
||||
r'mediation\s*:', self._download_webpage(
|
||||
urljoin('https://play.caracoltv.com/', config_js_path), None, fatal=False, note='Extracting JS config'),
|
||||
'mediation_config', None, transform_source=js_to_json, fatal=False)
|
||||
|
||||
key = traverse_obj(
|
||||
mediation_config, ('live', 'key')) or '795cd9c089a1fc48094524a5eba85a3fca1331817c802f601735907c8bbb4f50'
|
||||
secret = traverse_obj(
|
||||
mediation_config, ('live', 'secret')) or '64dec00a6989ba83d087621465b5e5d38bdac22033b0613b659c442c78976fa0'
|
||||
|
||||
return base64.b64encode(f'{key}:{secret}'.encode()).decode()
|
||||
|
||||
def _perform_login(self, email, password):
|
||||
webpage = self._download_webpage('https://play.caracoltv.com/', None, fatal=False)
|
||||
app_token = self._extract_app_token(webpage)
|
||||
|
||||
bearer_token = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/applications/oauth', None, data=b'', note='Retrieving bearer token',
|
||||
headers={'Authorization': f'Basic {app_token}'})['token']
|
||||
|
||||
self._USER_TOKEN = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/user/login', None, note='Performing login', headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {bearer_token}',
|
||||
}, data=json.dumps({
|
||||
'device_data': {
|
||||
'device_id': str(uuid.uuid4()),
|
||||
'device_token': '',
|
||||
'device_type': 'web'
|
||||
},
|
||||
'login_data': {
|
||||
'enabled': True,
|
||||
'email': email,
|
||||
'password': password,
|
||||
}
|
||||
}).encode())['user_token']
|
||||
|
||||
def _extract_video(self, video_data, series_id=None, season_id=None, season_number=None):
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_data['stream_url'], series_id, 'mp4')
|
||||
|
||||
return {
|
||||
'id': video_data['id'],
|
||||
'title': video_data.get('name'),
|
||||
'description': video_data.get('description'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': traverse_obj(
|
||||
video_data, ('extra_thumbs', ..., {'url': 'thumb_url', 'height': 'height', 'width': 'width'})),
|
||||
'series_id': series_id,
|
||||
'season_id': season_id,
|
||||
'season_number': int_or_none(season_number),
|
||||
'episode_number': int_or_none(video_data.get('item_order')),
|
||||
'is_live': video_data.get('entry_type') == 3,
|
||||
}
|
||||
|
||||
def _extract_series_seasons(self, seasons, series_id):
|
||||
for season in seasons:
|
||||
api_response = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/feed', series_id, query={'season_id': season['id']},
|
||||
headers={'Authorization': f'Bearer {self._USER_TOKEN}'})
|
||||
|
||||
season_number = season.get('order')
|
||||
for episode in api_response['items']:
|
||||
yield self._extract_video(episode, series_id, season['id'], season_number)
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
|
||||
if self._USER_TOKEN is None:
|
||||
self._perform_login('guest@inmobly.com', 'Test@gus1')
|
||||
|
||||
api_response = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/feed', series_id, query={'include_ids': series_id},
|
||||
headers={'Authorization': f'Bearer {self._USER_TOKEN}'})['items'][0]
|
||||
|
||||
if not api_response.get('seasons'):
|
||||
return self._extract_video(api_response)
|
||||
|
||||
return self.playlist_result(
|
||||
self._extract_series_seasons(api_response['seasons'], series_id),
|
||||
series_id, **traverse_obj(api_response, {
|
||||
'title': 'name',
|
||||
'description': 'description',
|
||||
}))
|
|
@ -2,6 +2,7 @@
|
|||
import json
|
||||
import base64
|
||||
import time
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
|
@ -65,6 +66,7 @@ class CBCIE(InfoExtractor):
|
|||
'uploader': 'CBCC-NEW',
|
||||
'timestamp': 255977160,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
# multiple iframes
|
||||
'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
|
||||
|
@ -96,7 +98,7 @@ class CBCIE(InfoExtractor):
|
|||
# multiple CBC.APP.Caffeine.initInstance(...)
|
||||
'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238',
|
||||
'info_dict': {
|
||||
'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks',
|
||||
'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks', # FIXME
|
||||
'id': 'dog-indoor-exercise-winter-1.3928238',
|
||||
'description': 'md5:c18552e41726ee95bd75210d1ca9194c',
|
||||
},
|
||||
|
@ -161,7 +163,7 @@ class CBCPlayerIE(InfoExtractor):
|
|||
'upload_date': '20160210',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
'skip': 'Geo-restricted to Canada and no longer available',
|
||||
}, {
|
||||
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
|
||||
'url': 'http://www.cbc.ca/player/play/2657631896',
|
||||
|
@ -174,6 +176,9 @@ class CBCPlayerIE(InfoExtractor):
|
|||
'timestamp': 1425704400,
|
||||
'upload_date': '20150307',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
|
||||
'chapters': [],
|
||||
'duration': 494.811,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cbc.ca/player/play/2164402062',
|
||||
|
@ -186,6 +191,28 @@ class CBCPlayerIE(InfoExtractor):
|
|||
'timestamp': 1320410746,
|
||||
'upload_date': '20111104',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
|
||||
'chapters': [],
|
||||
'duration': 186.867,
|
||||
},
|
||||
}, {
|
||||
# Has subtitles
|
||||
# These broadcasts expire after ~1 month, can find new test URL here:
|
||||
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
|
||||
'url': 'http://www.cbc.ca/player/play/2249992771553',
|
||||
'md5': '2f2fb675dd4f0f8a5bb7588d1b13bacd',
|
||||
'info_dict': {
|
||||
'id': '2249992771553',
|
||||
'ext': 'mp4',
|
||||
'title': 'The National | Women’s soccer pay, Florida seawater, Swift quake',
|
||||
'description': 'md5:adba28011a56cfa47a080ff198dad27a',
|
||||
'timestamp': 1690596000,
|
||||
'duration': 2716.333,
|
||||
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/481/326/thumbnail.jpeg',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'chapters': 'count:5',
|
||||
'upload_date': '20230729',
|
||||
},
|
||||
}]
|
||||
|
||||
|
@ -199,9 +226,42 @@ def _real_extract(self, url):
|
|||
'force_smil_url': True
|
||||
}),
|
||||
'id': video_id,
|
||||
'_format_sort_fields': ('res', 'proto') # Prioritize direct http formats over HLS
|
||||
}
|
||||
|
||||
|
||||
class CBCPlayerPlaylistIE(InfoExtractor):
|
||||
IE_NAME = 'cbc.ca:player:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:player/)(?!play/)(?P<id>[^?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast',
|
||||
'playlist_mincount': 25,
|
||||
'info_dict': {
|
||||
'id': 'news/tv shows/the national/latest broadcast',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.cbc.ca/player/news/Canada/North',
|
||||
'playlist_mincount': 25,
|
||||
'info_dict': {
|
||||
'id': 'news/canada/north',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = urllib.parse.unquote(self._match_id(url)).lower()
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
json_content = self._search_json(
|
||||
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', playlist_id)
|
||||
|
||||
def entries():
|
||||
for video_id in traverse_obj(json_content, (
|
||||
'video', 'clipsByCategory', lambda k, _: k.lower() == playlist_id, 'items', ..., 'id'
|
||||
)):
|
||||
yield self.url_result(f'https://www.cbc.ca/player/play/{video_id}', CBCPlayerIE)
|
||||
|
||||
return self.playlist_result(entries(), playlist_id)
|
||||
|
||||
|
||||
class CBCGemIE(InfoExtractor):
|
||||
IE_NAME = 'gem.cbc.ca'
|
||||
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s[0-9]+[a-z][0-9]+)'
|
||||
|
@ -280,12 +340,12 @@ def _new_claims_token(self, email, password):
|
|||
data = json.dumps({'jwt': sig}).encode()
|
||||
headers = {'content-type': 'application/json', 'ott-device-type': 'web'}
|
||||
resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/token',
|
||||
None, data=data, headers=headers)
|
||||
None, data=data, headers=headers, expected_status=426)
|
||||
cbc_access_token = resp['accessToken']
|
||||
|
||||
headers = {'content-type': 'application/json', 'ott-device-type': 'web', 'ott-access-token': cbc_access_token}
|
||||
resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/profile',
|
||||
None, headers=headers)
|
||||
None, headers=headers, expected_status=426)
|
||||
return resp['claimsToken']
|
||||
|
||||
def _get_claims_token_expiry(self):
|
||||
|
@ -417,6 +477,10 @@ class CBCGemPlaylistIE(InfoExtractor):
|
|||
'id': 'schitts-creek/s06',
|
||||
'title': 'Season 6',
|
||||
'description': 'md5:6a92104a56cbeb5818cc47884d4326a2',
|
||||
'series': 'Schitt\'s Creek',
|
||||
'season_number': 6,
|
||||
'season': 'Season 6',
|
||||
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/season/perso/cbc_schitts_creek_season_06_carousel_v03.jpg?impolicy=ott&im=Resize=(_Size_)&quality=75',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://gem.cbc.ca/schitts-creek/s06',
|
||||
|
|
|
@ -101,6 +101,7 @@ class CBSIE(CBSBaseIE):
|
|||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Subscription required',
|
||||
}, {
|
||||
'url': 'https://www.cbs.com/shows/video/sZH1MGgomIosZgxGJ1l263MFq16oMtW1/',
|
||||
'info_dict': {
|
||||
|
@ -117,6 +118,7 @@ class CBSIE(CBSBaseIE):
|
|||
},
|
||||
'expected_warnings': [
|
||||
'This content expired on', 'No video formats found', 'Requested format is not available'],
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
|
||||
'only_matching': True,
|
||||
|
|
|
@ -90,10 +90,17 @@ class CCCPlaylistIE(InfoExtractor):
|
|||
'id': '30c3',
|
||||
},
|
||||
'playlist_count': 135,
|
||||
}, {
|
||||
'url': 'https://media.ccc.de/c/DS2023',
|
||||
'info_dict': {
|
||||
'title': 'Datenspuren 2023',
|
||||
'id': 'DS2023',
|
||||
},
|
||||
'playlist_count': 37
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url).lower()
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
conf = self._download_json(
|
||||
'https://media.ccc.de/public/conferences/' + playlist_id,
|
||||
|
|
136
yt_dlp/extractor/cineverse.py
Normal file
136
yt_dlp/extractor/cineverse.py
Normal file
|
@ -0,0 +1,136 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
filter_dict,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
smuggle_url,
|
||||
traverse_obj,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CineverseBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https://www\.(?P<host>%s)' % '|'.join(map(re.escape, (
|
||||
'cineverse.com',
|
||||
'asiancrush.com',
|
||||
'dovechannel.com',
|
||||
'screambox.com',
|
||||
'midnightpulp.com',
|
||||
'fandor.com',
|
||||
'retrocrush.tv',
|
||||
)))
|
||||
|
||||
|
||||
class CineverseIE(CineverseBaseIE):
|
||||
_VALID_URL = rf'{CineverseBaseIE._VALID_URL_BASE}/watch/(?P<id>[A-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/watch/DMR00018919/Women-Who-Flirt',
|
||||
'skip': 'geo-blocked',
|
||||
'info_dict': {
|
||||
'title': 'Women Who Flirt',
|
||||
'ext': 'mp4',
|
||||
'id': 'DMR00018919',
|
||||
'modified_timestamp': 1678744575289,
|
||||
'cast': ['Xun Zhou', 'Xiaoming Huang', 'Yi-Lin Sie', 'Sonia Sui', 'Quniciren'],
|
||||
'duration': 5811.597,
|
||||
'description': 'md5:892fd62a05611d394141e8394ace0bc6',
|
||||
'age_limit': 13,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.retrocrush.tv/watch/1000000023016/Archenemy! Crystal Bowie',
|
||||
'skip': 'geo-blocked',
|
||||
'info_dict': {
|
||||
'title': 'Archenemy! Crystal Bowie',
|
||||
'ext': 'mp4',
|
||||
'id': '1000000023016',
|
||||
'episode_number': 3,
|
||||
'season_number': 1,
|
||||
'cast': ['Nachi Nozawa', 'Yoshiko Sakakibara', 'Toshiko Fujita'],
|
||||
'age_limit': 0,
|
||||
'episode': 'Episode 3',
|
||||
'season': 'Season 1',
|
||||
'duration': 1485.067,
|
||||
'description': 'Cobra meets a beautiful bounty hunter by the name of Jane Royal.',
|
||||
'series': 'Space Adventure COBRA (Original Japanese)',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, default={})
|
||||
self._initialize_geo_bypass({
|
||||
'countries': smuggled_data.get('geo_countries'),
|
||||
})
|
||||
video_id = self._match_id(url)
|
||||
html = self._download_webpage(url, video_id)
|
||||
idetails = self._search_nextjs_data(html, video_id)['props']['pageProps']['idetails']
|
||||
|
||||
if idetails.get('err_code') == 1200:
|
||||
self.raise_geo_restricted(
|
||||
'This video is not available from your location due to geo restriction. '
|
||||
'You may be able to bypass it by using the /details/ page instead of the /watch/ page',
|
||||
countries=smuggled_data.get('geo_countries'))
|
||||
|
||||
return {
|
||||
'subtitles': filter_dict({
|
||||
'en': traverse_obj(idetails, (('cc_url_vtt', 'subtitle_url'), {'url': {url_or_none}})) or None,
|
||||
}),
|
||||
'formats': self._extract_m3u8_formats(idetails['url'], video_id),
|
||||
**traverse_obj(idetails, {
|
||||
'title': 'title',
|
||||
'id': ('details', 'item_id'),
|
||||
'description': ('details', 'description'),
|
||||
'duration': ('duration', {lambda x: x / 1000}),
|
||||
'cast': ('details', 'cast', {lambda x: x.split(', ')}),
|
||||
'modified_timestamp': ('details', 'updated_by', 0, 'update_time', 'time', {int_or_none}),
|
||||
'season_number': ('details', 'season', {int_or_none}),
|
||||
'episode_number': ('details', 'episode', {int_or_none}),
|
||||
'age_limit': ('details', 'rating_code', {parse_age_limit}),
|
||||
'series': ('details', 'series_details', 'title'),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class CineverseDetailsIE(CineverseBaseIE):
|
||||
_VALID_URL = rf'{CineverseBaseIE._VALID_URL_BASE}/details/(?P<id>[A-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.retrocrush.tv/details/1000000023012/Space-Adventure-COBRA-(Original-Japanese)',
|
||||
'playlist_mincount': 30,
|
||||
'info_dict': {
|
||||
'title': 'Space Adventure COBRA (Original Japanese)',
|
||||
'id': '1000000023012',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.asiancrush.com/details/NNVG4938/Hansel-and-Gretel',
|
||||
'info_dict': {
|
||||
'id': 'NNVG4938',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hansel and Gretel',
|
||||
'description': 'md5:e3e4c35309c2e82aee044f972c2fb05d',
|
||||
'cast': ['Jeong-myeong Cheon', 'Eun Won-jae', 'Shim Eun-gyeong', 'Ji-hee Jin', 'Hee-soon Park', 'Lydia Park', 'Kyeong-ik Kim'],
|
||||
'duration': 7030.732,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, series_id = self._match_valid_url(url).group('host', 'id')
|
||||
html = self._download_webpage(url, series_id)
|
||||
pageprops = self._search_nextjs_data(html, series_id)['props']['pageProps']
|
||||
|
||||
geo_countries = traverse_obj(pageprops, ('itemDetailsData', 'geo_country', {lambda x: x.split(', ')}))
|
||||
geoblocked = traverse_obj(pageprops, (
|
||||
'itemDetailsData', 'playback_err_msg')) == 'This title is not available in your location.'
|
||||
|
||||
def item_result(item):
|
||||
item_url = f'https://www.{host}/watch/{item["item_id"]}/{item["title"]}'
|
||||
if geoblocked:
|
||||
item_url = smuggle_url(item_url, {'geo_countries': geo_countries})
|
||||
return self.url_result(item_url, CineverseIE)
|
||||
|
||||
season = traverse_obj(pageprops, ('seasonEpisodes', ..., 'episodes', lambda _, v: v['item_id'] and v['title']))
|
||||
if season:
|
||||
return self.playlist_result([item_result(ep) for ep in season], playlist_id=series_id,
|
||||
playlist_title=traverse_obj(pageprops, ('itemDetailsData', 'title')))
|
||||
return item_result(pageprops['itemDetailsData'])
|
|
@ -19,6 +19,7 @@ class CNBCIE(InfoExtractor):
|
|||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Dead link',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -49,6 +50,7 @@ class CNBCVideoIE(InfoExtractor):
|
|||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Dead link',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
|
|
@ -729,7 +729,7 @@ def extract(self, url):
|
|||
except UnsupportedError:
|
||||
raise
|
||||
except ExtractorError as e:
|
||||
e.video_id = e.video_id or self.get_temp_id(url),
|
||||
e.video_id = e.video_id or self.get_temp_id(url)
|
||||
e.ie = e.ie or self.IE_NAME,
|
||||
e.traceback = e.traceback or sys.exc_info()[2]
|
||||
raise
|
||||
|
@ -1042,7 +1042,7 @@ def download_content(self, url_or_request, video_id, note=note, errnote=errnote,
|
|||
fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
|
||||
if self.get_param('load_pages'):
|
||||
url_or_request = self._create_request(url_or_request, data, headers, query)
|
||||
filename = self._request_dump_filename(url_or_request.full_url, video_id)
|
||||
filename = self._request_dump_filename(url_or_request.url, video_id)
|
||||
self.to_screen(f'Loading request from {filename}')
|
||||
try:
|
||||
with open(filename, 'rb') as dumpf:
|
||||
|
@ -1687,7 +1687,7 @@ def _search_nextjs_data(self, webpage, video_id, *, transform_source=None, fatal
|
|||
def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
|
||||
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
|
||||
rectx = re.escape(context_name)
|
||||
FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
|
||||
FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){.*?\breturn\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
|
||||
js, arg_keys, arg_vals = self._search_regex(
|
||||
(rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'),
|
||||
webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),
|
||||
|
@ -2248,18 +2248,10 @@ def _extract_smil_formats_and_subtitles(self, smil_url, video_id, fatal=True, f4
|
|||
if res is False:
|
||||
assert not fatal
|
||||
return [], {}
|
||||
|
||||
smil, urlh = res
|
||||
smil_url = urlh.url
|
||||
|
||||
namespace = self._parse_smil_namespace(smil)
|
||||
|
||||
fmts = self._parse_smil_formats(
|
||||
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
|
||||
subs = self._parse_smil_subtitles(
|
||||
smil, namespace=namespace)
|
||||
|
||||
return fmts, subs
|
||||
return self._parse_smil_formats_and_subtitles(smil, urlh.url, video_id, f4m_params=f4m_params,
|
||||
namespace=self._parse_smil_namespace(smil))
|
||||
|
||||
def _extract_smil_formats(self, *args, **kwargs):
|
||||
fmts, subs = self._extract_smil_formats_and_subtitles(*args, **kwargs)
|
||||
|
@ -2285,9 +2277,8 @@ def _download_smil(self, smil_url, video_id, fatal=True, transform_source=None):
|
|||
def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
|
||||
namespace = self._parse_smil_namespace(smil)
|
||||
|
||||
formats = self._parse_smil_formats(
|
||||
formats, subtitles = self._parse_smil_formats_and_subtitles(
|
||||
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
|
||||
subtitles = self._parse_smil_subtitles(smil, namespace=namespace)
|
||||
|
||||
video_id = os.path.splitext(url_basename(smil_url))[0]
|
||||
title = None
|
||||
|
@ -2326,7 +2317,14 @@ def _parse_smil_namespace(self, smil):
|
|||
return self._search_regex(
|
||||
r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
|
||||
|
||||
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
def _parse_smil_formats(self, *args, **kwargs):
|
||||
fmts, subs = self._parse_smil_formats_and_subtitles(*args, **kwargs)
|
||||
if subs:
|
||||
self._report_ignoring_subs('SMIL')
|
||||
return fmts
|
||||
|
||||
def _parse_smil_formats_and_subtitles(
|
||||
self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
base = smil_url
|
||||
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
|
||||
b = meta.get('base') or meta.get('httpBase')
|
||||
|
@ -2334,7 +2332,7 @@ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_para
|
|||
base = b
|
||||
break
|
||||
|
||||
formats = []
|
||||
formats, subtitles = [], {}
|
||||
rtmp_count = 0
|
||||
http_count = 0
|
||||
m3u8_count = 0
|
||||
|
@ -2382,8 +2380,9 @@ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_para
|
|||
src_url = src_url.strip()
|
||||
|
||||
if proto == 'm3u8' or src_ext == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
|
||||
src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False)
|
||||
self._merge_subtitles(m3u8_subs, target=subtitles)
|
||||
if len(m3u8_formats) == 1:
|
||||
m3u8_count += 1
|
||||
m3u8_formats[0].update({
|
||||
|
@ -2404,11 +2403,15 @@ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_para
|
|||
f4m_url += urllib.parse.urlencode(f4m_params)
|
||||
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
|
||||
elif src_ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
src_url, video_id, mpd_id='dash', fatal=False))
|
||||
mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(
|
||||
src_url, video_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(mpd_formats)
|
||||
self._merge_subtitles(mpd_subs, target=subtitles)
|
||||
elif re.search(r'\.ism/[Mm]anifest', src_url):
|
||||
formats.extend(self._extract_ism_formats(
|
||||
src_url, video_id, ism_id='mss', fatal=False))
|
||||
ism_formats, ism_subs = self._extract_ism_formats_and_subtitles(
|
||||
src_url, video_id, ism_id='mss', fatal=False)
|
||||
formats.extend(ism_formats)
|
||||
self._merge_subtitles(ism_subs, target=subtitles)
|
||||
elif src_url.startswith('http') and self._is_valid_url(src, video_id):
|
||||
http_count += 1
|
||||
formats.append({
|
||||
|
@ -2439,7 +2442,10 @@ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_para
|
|||
'format_note': 'SMIL storyboards',
|
||||
})
|
||||
|
||||
return formats
|
||||
smil_subs = self._parse_smil_subtitles(smil, namespace=namespace)
|
||||
self._merge_subtitles(smil_subs, target=subtitles)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||
urls = []
|
||||
|
|
|
@ -41,7 +41,7 @@ class CorusIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE
|
|||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
|
||||
'url': 'https://www.hgtv.ca/video/bryan-inc/movie-night-popcorn-with-bryan/870923331648/',
|
||||
'info_dict': {
|
||||
'id': '870923331648',
|
||||
'ext': 'mp4',
|
||||
|
@ -54,6 +54,7 @@ class CorusIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE
|
|||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse JSON'],
|
||||
# FIXME: yt-dlp wrongly raises for geo restriction
|
||||
}, {
|
||||
'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753',
|
||||
'only_matching': True,
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
|
||||
|
||||
class CraftsyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.craftsy.com/class/(?P<id>[a-z0-9_-]+)/'
|
||||
_VALID_URL = r'https?://www\.craftsy\.com/class/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.craftsy.com/class/the-midnight-quilt-show-season-5/',
|
||||
'info_dict': {
|
||||
|
|
|
@ -27,11 +27,24 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||
_AUTH_HEADERS = None
|
||||
_API_ENDPOINT = None
|
||||
_BASIC_AUTH = None
|
||||
_QUERY = {}
|
||||
_CLIENT_ID = ('cr_web', 'noaihdevm_6iyg0a8l0q')
|
||||
_LOCALE_LOOKUP = {
|
||||
'ar': 'ar-SA',
|
||||
'de': 'de-DE',
|
||||
'': 'en-US',
|
||||
'es': 'es-419',
|
||||
'es-es': 'es-ES',
|
||||
'fr': 'fr-FR',
|
||||
'it': 'it-IT',
|
||||
'pt-br': 'pt-BR',
|
||||
'pt-pt': 'pt-PT',
|
||||
'ru': 'ru-RU',
|
||||
'hi': 'hi-IN',
|
||||
}
|
||||
|
||||
@property
|
||||
def is_logged_in(self):
|
||||
return self._get_cookies(self._BASE_URL).get('etp_rt')
|
||||
return bool(self._get_cookies(self._BASE_URL).get('etp_rt'))
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self.is_logged_in:
|
||||
|
@ -62,49 +75,49 @@ def _perform_login(self, username, password):
|
|||
if not self.is_logged_in:
|
||||
raise ExtractorError('Login succeeded but did not set etp_rt cookie')
|
||||
|
||||
def _update_query(self, lang):
|
||||
if lang in CrunchyrollBaseIE._QUERY:
|
||||
return
|
||||
|
||||
webpage = self._download_webpage(
|
||||
f'{self._BASE_URL}/{lang}', None, note=f'Retrieving main page (lang={lang or None})')
|
||||
|
||||
initial_state = self._search_json(r'__INITIAL_STATE__\s*=', webpage, 'initial state', None)
|
||||
CrunchyrollBaseIE._QUERY[lang] = traverse_obj(initial_state, {
|
||||
'locale': ('localization', 'locale'),
|
||||
}) or None
|
||||
|
||||
if CrunchyrollBaseIE._BASIC_AUTH:
|
||||
return
|
||||
|
||||
app_config = self._search_json(r'__APP_CONFIG__\s*=', webpage, 'app config', None)
|
||||
cx_api_param = app_config['cxApiParams']['accountAuthClientId' if self.is_logged_in else 'anonClientId']
|
||||
self.write_debug(f'Using cxApiParam={cx_api_param}')
|
||||
CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
|
||||
|
||||
def _update_auth(self):
|
||||
if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds():
|
||||
return
|
||||
|
||||
assert CrunchyrollBaseIE._BASIC_AUTH, '_update_query needs to be called at least one time beforehand'
|
||||
if not CrunchyrollBaseIE._BASIC_AUTH:
|
||||
cx_api_param = self._CLIENT_ID[self.is_logged_in]
|
||||
self.write_debug(f'Using cxApiParam={cx_api_param}')
|
||||
CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
|
||||
|
||||
grant_type = 'etp_rt_cookie' if self.is_logged_in else 'client_id'
|
||||
auth_response = self._download_json(
|
||||
f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
|
||||
headers={'Authorization': CrunchyrollBaseIE._BASIC_AUTH}, data=f'grant_type={grant_type}'.encode())
|
||||
try:
|
||||
auth_response = self._download_json(
|
||||
f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
|
||||
headers={'Authorization': CrunchyrollBaseIE._BASIC_AUTH}, data=f'grant_type={grant_type}'.encode())
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 403:
|
||||
raise ExtractorError(
|
||||
'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
|
||||
'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
|
||||
'and your browser\'s User-Agent (with --user-agent)', expected=True)
|
||||
raise
|
||||
|
||||
CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']}
|
||||
CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10)
|
||||
|
||||
def _locale_from_language(self, language):
|
||||
config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
|
||||
return config_locale[0] if config_locale else self._LOCALE_LOOKUP.get(language)
|
||||
|
||||
def _call_base_api(self, endpoint, internal_id, lang, note=None, query={}):
|
||||
self._update_query(lang)
|
||||
self._update_auth()
|
||||
|
||||
if not endpoint.startswith('/'):
|
||||
endpoint = f'/{endpoint}'
|
||||
|
||||
query = query.copy()
|
||||
locale = self._locale_from_language(lang)
|
||||
if locale:
|
||||
query['locale'] = locale
|
||||
|
||||
return self._download_json(
|
||||
f'{self._BASE_URL}{endpoint}', internal_id, note or f'Calling API: {endpoint}',
|
||||
headers=CrunchyrollBaseIE._AUTH_HEADERS, query={**CrunchyrollBaseIE._QUERY[lang], **query})
|
||||
headers=CrunchyrollBaseIE._AUTH_HEADERS, query=query)
|
||||
|
||||
def _call_api(self, path, internal_id, lang, note='api', query={}):
|
||||
if not path.startswith(f'/content/v2/{self._API_ENDPOINT}/'):
|
||||
|
@ -206,7 +219,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
|||
IE_NAME = 'crunchyroll'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:beta\.|www\.)?crunchyroll\.com/
|
||||
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
||||
(?:(?P<lang>\w{2}(?:-\w{2})?)/)?
|
||||
watch/(?!concert|musicvideo)(?P<id>\w+)'''
|
||||
_TESTS = [{
|
||||
# Premium only
|
||||
|
@ -304,7 +317,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
|||
},
|
||||
'playlist_mincount': 5,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y',
|
||||
'url': 'https://www.crunchyroll.com/de/watch/GY2P1Q98Y',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
|
||||
|
|
|
@ -45,7 +45,7 @@ def _get_vimeo_id(self, activity_id):
|
|||
|
||||
|
||||
class CybraryIE(CybraryBaseIE):
|
||||
_VALID_URL = r'https?://app.cybrary.it/immersive/(?P<enrollment>[0-9]+)/activity/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://app\.cybrary\.it/immersive/(?P<enrollment>[0-9]+)/activity/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://app.cybrary.it/immersive/12487950/activity/63102',
|
||||
'md5': '9ae12d37e555cb2ed554223a71a701d0',
|
||||
|
@ -110,7 +110,7 @@ def _real_extract(self, url):
|
|||
|
||||
|
||||
class CybraryCourseIE(CybraryBaseIE):
|
||||
_VALID_URL = r'https://app.cybrary.it/browse/course/(?P<id>[\w-]+)/?(?:$|[#?])'
|
||||
_VALID_URL = r'https://app\.cybrary\.it/browse/course/(?P<id>[\w-]+)/?(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
'url': 'https://app.cybrary.it/browse/course/az-500-microsoft-azure-security-technologies',
|
||||
'info_dict': {
|
||||
|
|
|
@ -1,31 +1,72 @@
|
|||
import time
|
||||
import hashlib
|
||||
import re
|
||||
import urllib
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .openload import PhantomJSwrapper
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
UserNotLive,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_resolution,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class DouyuTVIE(InfoExtractor):
|
||||
IE_DESC = '斗鱼'
|
||||
class DouyuBaseIE(InfoExtractor):
|
||||
def _download_cryptojs_md5(self, video_id):
|
||||
for url in [
|
||||
'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
|
||||
'https://cdn.bootcdn.net/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
|
||||
]:
|
||||
js_code = self._download_webpage(
|
||||
url, video_id, note='Downloading signing dependency', fatal=False)
|
||||
if js_code:
|
||||
self.cache.store('douyu', 'crypto-js-md5', js_code)
|
||||
return js_code
|
||||
raise ExtractorError('Unable to download JS dependency (crypto-js/md5)')
|
||||
|
||||
def _get_cryptojs_md5(self, video_id):
|
||||
return self.cache.load('douyu', 'crypto-js-md5') or self._download_cryptojs_md5(video_id)
|
||||
|
||||
def _calc_sign(self, sign_func, video_id, a):
|
||||
b = uuid.uuid4().hex
|
||||
c = round(time.time())
|
||||
js_script = f'{self._get_cryptojs_md5(video_id)};{sign_func};console.log(ub98484234("{a}","{b}","{c}"))'
|
||||
phantom = PhantomJSwrapper(self)
|
||||
result = phantom.execute(js_script, video_id,
|
||||
note='Executing JS signing script').strip()
|
||||
return {i: v[0] for i, v in urllib.parse.parse_qs(result).items()}
|
||||
|
||||
def _search_js_sign_func(self, webpage, fatal=True):
|
||||
# The greedy look-behind ensures last possible script tag is matched
|
||||
return self._search_regex(
|
||||
r'(?:<script.*)?<script[^>]*>(.*?ub98484234.*?)</script>', webpage, 'JS sign func', fatal=fatal)
|
||||
|
||||
|
||||
class DouyuTVIE(DouyuBaseIE):
|
||||
IE_DESC = '斗鱼直播'
|
||||
_VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(topic/\w+\?rid=|(?:[^/]+/))*(?P<id>[A-Za-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.douyutv.com/iseven',
|
||||
'url': 'https://www.douyu.com/pigff',
|
||||
'info_dict': {
|
||||
'id': '17732',
|
||||
'display_id': 'iseven',
|
||||
'ext': 'flv',
|
||||
'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': r're:.*m7show@163\.com.*',
|
||||
'thumbnail': r're:^https?://.*\.png',
|
||||
'uploader': '7师傅',
|
||||
'id': '24422',
|
||||
'display_id': 'pigff',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^【PIGFF】.* [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': r'≥15级牌子看鱼吧置顶帖进粉丝vx群',
|
||||
'thumbnail': str,
|
||||
'uploader': 'pigff',
|
||||
'is_live': True,
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
@ -85,15 +126,43 @@ class DouyuTVIE(InfoExtractor):
|
|||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _get_sign_func(self, room_id, video_id):
|
||||
return self._download_json(
|
||||
f'https://www.douyu.com/swf_api/homeH5Enc?rids={room_id}', video_id,
|
||||
note='Getting signing script')['data'][f'room{room_id}']
|
||||
|
||||
def _extract_stream_formats(self, stream_formats):
|
||||
formats = []
|
||||
for stream_info in traverse_obj(stream_formats, (..., 'data')):
|
||||
stream_url = urljoin(
|
||||
traverse_obj(stream_info, 'rtmp_url'), traverse_obj(stream_info, 'rtmp_live'))
|
||||
if stream_url:
|
||||
rate_id = traverse_obj(stream_info, ('rate', {int_or_none}))
|
||||
rate_info = traverse_obj(stream_info, ('multirates', lambda _, v: v['rate'] == rate_id), get_all=False)
|
||||
ext = determine_ext(stream_url)
|
||||
formats.append({
|
||||
'url': stream_url,
|
||||
'format_id': str_or_none(rate_id),
|
||||
'ext': 'mp4' if ext == 'm3u8' else ext,
|
||||
'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
|
||||
'quality': rate_id % -10000 if rate_id is not None else None,
|
||||
**traverse_obj(rate_info, {
|
||||
'format': ('name', {str_or_none}),
|
||||
'tbr': ('bit', {int_or_none}),
|
||||
}),
|
||||
})
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
if video_id.isdigit():
|
||||
room_id = video_id
|
||||
else:
|
||||
page = self._download_webpage(url, video_id)
|
||||
room_id = self._html_search_regex(
|
||||
r'"room_id\\?"\s*:\s*(\d+),', page, 'room id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
room_id = self._search_regex(r'\$ROOM\.room_id\s*=\s*(\d+)', webpage, 'room id')
|
||||
|
||||
if self._search_regex(r'"videoLoop"\s*:\s*(\d+)', webpage, 'loop', default='') == '1':
|
||||
raise UserNotLive('The channel is auto-playing VODs', video_id=video_id)
|
||||
if self._search_regex(r'\$ROOM\.show_status\s*=\s*(\d+)', webpage, 'status', default='') == '2':
|
||||
raise UserNotLive(video_id=video_id)
|
||||
|
||||
# Grab metadata from API
|
||||
params = {
|
||||
|
@ -102,110 +171,136 @@ def _real_extract(self, url):
|
|||
'time': int(time.time()),
|
||||
}
|
||||
params['auth'] = hashlib.md5(
|
||||
f'room/{video_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
|
||||
room = self._download_json(
|
||||
f'room/{room_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
|
||||
room = traverse_obj(self._download_json(
|
||||
f'http://www.douyutv.com/api/v1/room/{room_id}', video_id,
|
||||
note='Downloading room info', query=params)['data']
|
||||
note='Downloading room info', query=params, fatal=False), 'data')
|
||||
|
||||
# 1 = live, 2 = offline
|
||||
if room.get('show_status') == '2':
|
||||
raise ExtractorError('Live stream is offline', expected=True)
|
||||
if traverse_obj(room, 'show_status') == '2':
|
||||
raise UserNotLive(video_id=video_id)
|
||||
|
||||
video_url = urljoin('https://hls3-akm.douyucdn.cn/', self._search_regex(r'(live/.*)', room['hls_url'], 'URL'))
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(video_url, room_id)
|
||||
js_sign_func = self._search_js_sign_func(webpage, fatal=False) or self._get_sign_func(room_id, video_id)
|
||||
form_data = {
|
||||
'rate': 0,
|
||||
**self._calc_sign(js_sign_func, video_id, room_id),
|
||||
}
|
||||
stream_formats = [self._download_json(
|
||||
f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
|
||||
video_id, note="Downloading livestream format",
|
||||
data=urlencode_postdata(form_data))]
|
||||
|
||||
title = unescapeHTML(room['room_name'])
|
||||
description = room.get('show_details')
|
||||
thumbnail = room.get('room_src')
|
||||
uploader = room.get('nickname')
|
||||
for rate_id in traverse_obj(stream_formats[0], ('data', 'multirates', ..., 'rate')):
|
||||
if rate_id != traverse_obj(stream_formats[0], ('data', 'rate')):
|
||||
form_data['rate'] = rate_id
|
||||
stream_formats.append(self._download_json(
|
||||
f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
|
||||
video_id, note=f'Downloading livestream format {rate_id}',
|
||||
data=urlencode_postdata(form_data)))
|
||||
|
||||
return {
|
||||
'id': room_id,
|
||||
'display_id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'formats': self._extract_stream_formats(stream_formats),
|
||||
'is_live': True,
|
||||
'subtitles': subs,
|
||||
'formats': formats,
|
||||
**traverse_obj(room, {
|
||||
'display_id': ('url', {str}, {lambda i: i[1:]}),
|
||||
'title': ('room_name', {unescapeHTML}),
|
||||
'description': ('show_details', {str}),
|
||||
'uploader': ('nickname', {str}),
|
||||
'thumbnail': ('room_src', {url_or_none}),
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
class DouyuShowIE(InfoExtractor):
|
||||
class DouyuShowIE(DouyuBaseIE):
|
||||
_VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://v.douyu.com/show/rjNBdvnVXNzvE2yw',
|
||||
'md5': '0c2cfd068ee2afe657801269b2d86214',
|
||||
'url': 'https://v.douyu.com/show/mPyq7oVNe5Yv1gLY',
|
||||
'info_dict': {
|
||||
'id': 'rjNBdvnVXNzvE2yw',
|
||||
'id': 'mPyq7oVNe5Yv1gLY',
|
||||
'ext': 'mp4',
|
||||
'title': '陈一发儿:砒霜 我有个室友系列!04-01 22点场',
|
||||
'duration': 7150.08,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': '陈一发儿',
|
||||
'uploader_id': 'XrZwYelr5wbK',
|
||||
'uploader_url': 'https://v.douyu.com/author/XrZwYelr5wbK',
|
||||
'upload_date': '20170402',
|
||||
'title': '四川人小时候的味道“蒜苗回锅肉”,传统菜不能丢,要常做来吃',
|
||||
'duration': 633,
|
||||
'thumbnail': str,
|
||||
'uploader': '美食作家王刚V',
|
||||
'uploader_id': 'OVAO4NVx1m7Q',
|
||||
'timestamp': 1661850002,
|
||||
'upload_date': '20220830',
|
||||
'view_count': int,
|
||||
'tags': ['美食', '美食综合'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMATS = {
|
||||
'super': '原画',
|
||||
'high': '超清',
|
||||
'normal': '高清',
|
||||
}
|
||||
|
||||
_QUALITIES = {
|
||||
'super': -1,
|
||||
'high': -2,
|
||||
'normal': -3,
|
||||
}
|
||||
|
||||
_RESOLUTIONS = {
|
||||
'super': '1920x1080',
|
||||
'high': '1280x720',
|
||||
'normal': '852x480',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
url = url.replace('vmobile.', 'v.')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
room_info = self._parse_json(self._search_regex(
|
||||
r'var\s+\$ROOM\s*=\s*({.+});', webpage, 'room info'), video_id)
|
||||
video_info = self._search_json(
|
||||
r'<script>\s*window\.\$DATA\s*=', webpage,
|
||||
'video info', video_id, transform_source=js_to_json)
|
||||
|
||||
video_info = None
|
||||
js_sign_func = self._search_js_sign_func(webpage)
|
||||
form_data = {
|
||||
'vid': video_id,
|
||||
**self._calc_sign(js_sign_func, video_id, video_info['ROOM']['point_id']),
|
||||
}
|
||||
url_info = self._download_json(
|
||||
'https://v.douyu.com/api/stream/getStreamUrl', video_id,
|
||||
data=urlencode_postdata(form_data), note="Downloading video formats")
|
||||
|
||||
for trial in range(5):
|
||||
# Sometimes Douyu rejects our request. Let's try it more times
|
||||
try:
|
||||
video_info = self._download_json(
|
||||
'https://vmobile.douyu.com/video/getInfo', video_id,
|
||||
query={'vid': video_id},
|
||||
headers={
|
||||
'Referer': url,
|
||||
'x-requested-with': 'XMLHttpRequest',
|
||||
})
|
||||
break
|
||||
except ExtractorError:
|
||||
self._sleep(1, video_id)
|
||||
|
||||
if not video_info:
|
||||
raise ExtractorError('Can\'t fetch video info')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_info['data']['video_url'], video_id,
|
||||
entry_protocol='m3u8_native', ext='mp4')
|
||||
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'<em>上传时间:</em><span>([^<]+)</span>', webpage,
|
||||
'upload date', fatal=False))
|
||||
|
||||
uploader = uploader_id = uploader_url = None
|
||||
mobj = re.search(
|
||||
r'(?m)<a[^>]+href="/author/([0-9a-zA-Z]+)".+?<strong[^>]+title="([^"]+)"',
|
||||
webpage)
|
||||
if mobj:
|
||||
uploader_id, uploader = mobj.groups()
|
||||
uploader_url = urljoin(url, '/author/' + uploader_id)
|
||||
formats = []
|
||||
for name, url in traverse_obj(url_info, ('data', 'thumb_video', {dict.items}, ...)):
|
||||
video_url = traverse_obj(url, ('url', {url_or_none}))
|
||||
if video_url:
|
||||
ext = determine_ext(video_url)
|
||||
formats.append({
|
||||
'format': self._FORMATS.get(name),
|
||||
'format_id': name,
|
||||
'url': video_url,
|
||||
'quality': self._QUALITIES.get(name),
|
||||
'ext': 'mp4' if ext == 'm3u8' else ext,
|
||||
'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
|
||||
**parse_resolution(self._RESOLUTIONS.get(name))
|
||||
})
|
||||
else:
|
||||
self.to_screen(
|
||||
f'"{self._FORMATS.get(name, name)}" format may require logging in. {self._login_hint()}')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': room_info['name'],
|
||||
'formats': formats,
|
||||
'duration': room_info.get('duration'),
|
||||
'thumbnail': room_info.get('pic'),
|
||||
'upload_date': upload_date,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'uploader_url': uploader_url,
|
||||
**traverse_obj(video_info, ('DATA', {
|
||||
'title': ('content', 'title', {str}),
|
||||
'uploader': ('content', 'author', {str}),
|
||||
'uploader_id': ('content', 'up_id', {str_or_none}),
|
||||
'duration': ('content', 'video_duration', {int_or_none}),
|
||||
'thumbnail': ('content', 'video_pic', {url_or_none}),
|
||||
'timestamp': ('content', 'create_time', {int_or_none}),
|
||||
'view_count': ('content', 'view_num', {int_or_none}),
|
||||
'tags': ('videoTag', ..., 'tagName', {str}),
|
||||
}))
|
||||
}
|
||||
|
|
|
@ -746,7 +746,7 @@ class MotorTrendIE(DiscoveryPlusBaseIE):
|
|||
|
||||
|
||||
class MotorTrendOnDemandIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?motortrendondemand\.com/detail' + DPlayBaseIE._PATH_REGEX
|
||||
_VALID_URL = r'https?://(?:www\.)?motortrend(?:ondemand\.com|\.com/plus)/detail' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.motortrendondemand.com/detail/wheelstanding-dump-truck-stubby-bobs-comeback/37699/784',
|
||||
'info_dict': {
|
||||
|
@ -767,6 +767,25 @@ class MotorTrendOnDemandIE(DiscoveryPlusBaseIE):
|
|||
'upload_date': '20140101',
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.motortrend.com/plus/detail/roadworthy-rescues-teaser-trailer/4922860/',
|
||||
'info_dict': {
|
||||
'id': '4922860',
|
||||
'ext': 'mp4',
|
||||
'title': 'Roadworthy Rescues | Teaser Trailer',
|
||||
'description': 'Derek Bieri helps Freiburger and Finnegan with their \'68 big-block Dart.',
|
||||
'display_id': 'roadworthy-rescues-teaser-trailer/4922860',
|
||||
'creator': 'Originals',
|
||||
'series': 'Roadworthy Rescues',
|
||||
'thumbnail': r're:^https?://.+\.jpe?g$',
|
||||
'upload_date': '20220907',
|
||||
'timestamp': 1662523200,
|
||||
'duration': 1066.356,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.motortrend.com/plus/detail/ugly-duckling/2450033/12439',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'MTOD'
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import base64
|
||||
import os.path
|
||||
import re
|
||||
|
||||
|
@ -5,14 +6,13 @@
|
|||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class DropboxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P<id>[a-zA-Z0-9]{15})/.*'
|
||||
_VALID_URL = r'https?://(?:www\.)?dropbox\.com/(?:(?:e/)?scl/fi|sh?)/(?P<id>\w+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
|
||||
|
@ -22,7 +22,16 @@ class DropboxIE(InfoExtractor):
|
|||
'title': 'youtube-dl test video \'ä"BaW_jenozKc'
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v',
|
||||
'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dropbox.com/sh/2mgpiuq7kv8nqdf/AABy-fW4dkydT4GmWi2mdOUDa?dl=0&preview=Drone+Shot.mp4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dropbox.com/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dropbox.com/e/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
@ -53,16 +62,25 @@ def _real_extract(self, url):
|
|||
else:
|
||||
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
|
||||
|
||||
info_json = self._search_json(r'InitReact\.mountComponent\(.*?,', webpage, 'mountComponent', video_id,
|
||||
contains_pattern=r'{.+?"preview".+?}', end_pattern=r'\)')['props']
|
||||
transcode_url = traverse_obj(info_json, ((None, 'preview'), 'file', 'preview', 'content', 'transcode_url'), get_all=False)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id)
|
||||
formats, subtitles, has_anonymous_download = [], {}, False
|
||||
for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
|
||||
decoded = base64.b64decode(encoded).decode('utf-8', 'ignore')
|
||||
transcode_url = self._search_regex(
|
||||
r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', decoded, 'transcode url', default=None)
|
||||
if not transcode_url:
|
||||
continue
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4')
|
||||
has_anonymous_download = self._search_regex(r'(anonymous:\tanonymous)', decoded, 'anonymous', default=False)
|
||||
break
|
||||
|
||||
# downloads enabled we can get the original file
|
||||
if 'anonymous' in (try_get(info_json, lambda x: x['sharePermission']['canDownloadRoles']) or []):
|
||||
video_url = re.sub(r'[?&]dl=0', '', url)
|
||||
video_url += ('?' if '?' not in video_url else '&') + 'dl=1'
|
||||
formats.append({'url': video_url, 'format_id': 'original', 'format_note': 'Original', 'quality': 1})
|
||||
if has_anonymous_download:
|
||||
formats.append({
|
||||
'url': update_url_query(url, {'dl': '1'}),
|
||||
'format_id': 'original',
|
||||
'format_note': 'Original',
|
||||
'quality': 1
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
|
96
yt_dlp/extractor/eplus.py
Normal file
96
yt_dlp/extractor/eplus.py
Normal file
|
@ -0,0 +1,96 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
try_call,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class EplusIbIE(InfoExtractor):
|
||||
IE_NAME = 'eplus:inbound'
|
||||
IE_DESC = 'e+ (イープラス) overseas'
|
||||
_VALID_URL = r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)'
|
||||
_TESTS = [{
|
||||
'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D',
|
||||
'info_dict': {
|
||||
'id': '354502-0001-002',
|
||||
'title': 'LoveLive!Series Presents COUNTDOWN LoveLive! 2021→2022~LIVE with a smile!~【Streaming+(配信)】',
|
||||
'live_status': 'was_live',
|
||||
'release_date': '20211231',
|
||||
'release_timestamp': 1640952000,
|
||||
'description': str,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'ignore_no_formats_error': True,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'Could not find the playlist URL. This event may not be accessible',
|
||||
'No video formats found!',
|
||||
'Requested format is not available',
|
||||
],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data_json = self._search_json(r'<script>\s*var app\s*=', webpage, 'data json', video_id)
|
||||
|
||||
delivery_status = data_json.get('delivery_status')
|
||||
archive_mode = data_json.get('archive_mode')
|
||||
release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
|
||||
release_timestamp_str = data_json.get('event_datetime_text') # JST
|
||||
|
||||
self.write_debug(f'delivery_status = {delivery_status}, archive_mode = {archive_mode}')
|
||||
|
||||
if delivery_status == 'PREPARING':
|
||||
live_status = 'is_upcoming'
|
||||
elif delivery_status == 'STARTED':
|
||||
live_status = 'is_live'
|
||||
elif delivery_status == 'STOPPED':
|
||||
if archive_mode != 'ON':
|
||||
raise ExtractorError(
|
||||
'This event has ended and there is no archive for this event', expected=True)
|
||||
live_status = 'post_live'
|
||||
elif delivery_status == 'WAIT_CONFIRM_ARCHIVED':
|
||||
live_status = 'post_live'
|
||||
elif delivery_status == 'CONFIRMED_ARCHIVE':
|
||||
live_status = 'was_live'
|
||||
else:
|
||||
self.report_warning(f'Unknown delivery_status {delivery_status}, treat it as a live')
|
||||
live_status = 'is_live'
|
||||
|
||||
formats = []
|
||||
|
||||
m3u8_playlist_urls = self._search_json(
|
||||
r'var listChannels\s*=', webpage, 'hls URLs', video_id, contains_pattern=r'\[.+\]', default=[])
|
||||
if not m3u8_playlist_urls:
|
||||
if live_status == 'is_upcoming':
|
||||
self.raise_no_formats(
|
||||
f'Could not find the playlist URL. This live event will begin at {release_timestamp_str} JST', expected=True)
|
||||
else:
|
||||
self.raise_no_formats(
|
||||
'Could not find the playlist URL. This event may not be accessible', expected=True)
|
||||
elif live_status == 'is_upcoming':
|
||||
self.raise_no_formats(f'This live event will begin at {release_timestamp_str} JST', expected=True)
|
||||
elif live_status == 'post_live':
|
||||
self.raise_no_formats('This event has ended, and the archive will be available shortly', expected=True)
|
||||
else:
|
||||
for m3u8_playlist_url in m3u8_playlist_urls:
|
||||
formats.extend(self._extract_m3u8_formats(m3u8_playlist_url, video_id))
|
||||
# FIXME: HTTP request headers need to be updated to continue download
|
||||
warning = 'Due to technical limitations, the download will be interrupted after one hour'
|
||||
if live_status == 'is_live':
|
||||
self.report_warning(warning)
|
||||
elif live_status == 'was_live':
|
||||
self.report_warning(f'{warning}. You can restart to continue the download')
|
||||
|
||||
return {
|
||||
'id': data_json['app_id'],
|
||||
'title': data_json.get('app_name'),
|
||||
'formats': formats,
|
||||
'live_status': live_status,
|
||||
'description': data_json.get('content'),
|
||||
'release_timestamp': release_timestamp,
|
||||
}
|
63
yt_dlp/extractor/erocast.py
Normal file
63
yt_dlp/extractor/erocast.py
Normal file
|
@ -0,0 +1,63 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class ErocastIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?erocast\.me/track/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://erocast.me/track/9787/f',
|
||||
'md5': 'af63b91f5f231096aba54dd682abea3b',
|
||||
'info_dict': {
|
||||
'id': '9787',
|
||||
'title': '[F4M] Your roommate, who is definitely not possessed by an alien, suddenly wants to fuck you',
|
||||
'url': 'https://erocast.s3.us-east-2.wasabisys.com/1220419/track.m3u8',
|
||||
'ext': 'm4a',
|
||||
'age_limit': 18,
|
||||
'release_timestamp': 1696178652,
|
||||
'release_date': '20231001',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'description': 'ExtraTerrestrial Tuesday!',
|
||||
'uploader': 'clarissaisshy',
|
||||
'uploader_id': '8113',
|
||||
'uploader_url': 'https://erocast.me/clarissaisshy',
|
||||
'thumbnail': 'https://erocast.s3.us-east-2.wasabisys.com/1220418/conversions/1696179247-lg.jpg',
|
||||
'duration': 2307,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'webpage_url': 'https://erocast.me/track/9787/f4m-your-roommate-who-is-definitely-not-possessed-by-an-alien-suddenly-wants-to-fuck-you',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data = self._search_json(
|
||||
rf'<script>\s*var song_data_{video_id}\s*=', webpage, 'data', video_id, end_pattern=r'</script>')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(
|
||||
data.get('file_url') or data['stream_url'], video_id, 'm4a', m3u8_id='hls'),
|
||||
'age_limit': 18,
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'release_timestamp': ('created_at', {parse_iso8601}),
|
||||
'modified_timestamp': ('updated_at', {parse_iso8601}),
|
||||
'uploader': ('user', 'name', {str}),
|
||||
'uploader_id': ('user', 'id', {str_or_none}),
|
||||
'uploader_url': ('user', 'permalink_url', {url_or_none}),
|
||||
'thumbnail': ('artwork_url', {url_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'view_count': ('plays', {int_or_none}),
|
||||
'comment_count': ('comment_count', {int_or_none}),
|
||||
'webpage_url': ('permalink_url', {url_or_none}),
|
||||
}),
|
||||
}
|
|
@ -11,8 +11,8 @@ class ExpressenIE(InfoExtractor):
|
|||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?(?:expressen|di)\.se/
|
||||
(?:(?:tvspelare/video|videoplayer/embed)/)?
|
||||
tv/(?:[^/]+/)*
|
||||
(?:(?:tvspelare/video|video-?player/embed)/)?
|
||||
(?:tv|nyheter)/(?:[^/?#]+/)*
|
||||
(?P<id>[^/?#&]+)
|
||||
'''
|
||||
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1']
|
||||
|
@ -42,6 +42,12 @@ class ExpressenIE(InfoExtractor):
|
|||
}, {
|
||||
'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.expressen.se/video-player/embed/tv/nyheter/ekero-fodda-olof-gustafsson-forvaltar-knarkbaronen-pablo-escobars-namn',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.expressen.se/nyheter/efter-egna-telefonbluffen-escobar-stammer-klarna/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
|
|
@ -74,6 +74,22 @@ class FacebookIE(InfoExtractor):
|
|||
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/radiokicksfm/videos/3676516585958356/',
|
||||
'info_dict': {
|
||||
'id': '3676516585958356',
|
||||
'ext': 'mp4',
|
||||
'title': 'dr Adam Przygoda',
|
||||
'description': 'md5:34675bda53336b1d16400265c2bb9b3b',
|
||||
'uploader': 'RADIO KICKS FM',
|
||||
'upload_date': '20230818',
|
||||
'timestamp': 1692346159,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader_id': '100063551323670',
|
||||
'duration': 3132.184,
|
||||
'view_count': int,
|
||||
'concurrent_view_count': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
|
||||
'md5': '6a40d33c0eccbb1af76cf0485a052659',
|
||||
'info_dict': {
|
||||
|
@ -97,7 +113,7 @@ class FacebookIE(InfoExtractor):
|
|||
'upload_date': '20140506',
|
||||
'timestamp': 1399398998,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader_id': 'pfbid04scW44U4P9iTyLZAGy8y8W3pR3i2VugvHCimiRudUAVbN3MPp9eXBaYFcgVworZwl',
|
||||
'uploader_id': 'pfbid028wxorhX2ErLFJ578N6P3crHD3PHmXTCqCvfBpsnbSLmbokwSY75p5hWBjHGkG4zxl',
|
||||
'duration': 131.03,
|
||||
'concurrent_view_count': int,
|
||||
},
|
||||
|
@ -179,7 +195,7 @@ class FacebookIE(InfoExtractor):
|
|||
'timestamp': 1486648217,
|
||||
'upload_date': '20170209',
|
||||
'uploader': 'Yaroslav Korpan',
|
||||
'uploader_id': 'pfbid029y8j22EwH3ikeqgH3SEP9G3CAi9kmWKgXJJG9s5geV7mo3J2bvURqHCdgucRgAyhl',
|
||||
'uploader_id': 'pfbid06AScABAWcW91qpiuGrLt99Ef9tvwHoXP6t8KeFYEqkSfreMtfa9nTveh8b2ZEVSWl',
|
||||
'concurrent_view_count': int,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'view_count': int,
|
||||
|
@ -274,7 +290,7 @@ class FacebookIE(InfoExtractor):
|
|||
'title': 'Josef',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'concurrent_view_count': int,
|
||||
'uploader_id': 'pfbid02gXHbDwxumkaKJQaTGUf3znYfYzTuidGEWawiramNx4YamSj2afwYSRkpcjtHtMRJl',
|
||||
'uploader_id': 'pfbid0cibUN6tV7DYgdbJdsUFN46wc4jKpVSPAvJQhFofGqBGmVn3V3JtAs2tfUwziw2hUl',
|
||||
'timestamp': 1549275572,
|
||||
'duration': 3.413,
|
||||
'uploader': 'Josef Novak',
|
||||
|
@ -401,9 +417,9 @@ def _extract_from_url(self, url, video_id):
|
|||
|
||||
def extract_metadata(webpage):
|
||||
post_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall(
|
||||
r'handleWithCustomApplyEach\(\s*ScheduledApplyEach\s*,\s*(\{.+?\})\s*\);', webpage)]
|
||||
r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]
|
||||
post = traverse_obj(post_data, (
|
||||
..., 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
|
||||
..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
|
||||
media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: (
|
||||
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
|
||||
title = get_first(media, ('title', 'text'))
|
||||
|
@ -481,25 +497,25 @@ def extract_dash_manifest(video, formats):
|
|||
dash_manifest = video.get('dash_manifest')
|
||||
if dash_manifest:
|
||||
formats.extend(self._parse_mpd_formats(
|
||||
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest))))
|
||||
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
|
||||
mpd_url=video.get('dash_manifest_url')))
|
||||
|
||||
def process_formats(info):
|
||||
# Downloads with browser's User-Agent are rate limited. Working around
|
||||
# with non-browser User-Agent.
|
||||
for f in info['formats']:
|
||||
f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
|
||||
info['_format_sort_fields'] = ('res', 'quality')
|
||||
|
||||
def extract_relay_data(_filter):
|
||||
return self._parse_json(self._search_regex(
|
||||
r'handleWithCustomApplyEach\([^,]+,\s*({.*?%s.*?})\);' % _filter,
|
||||
r'data-sjs>({.*?%s.*?})</script>' % _filter,
|
||||
webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
|
||||
|
||||
def extract_relay_prefetched_data(_filter):
|
||||
replay_data = extract_relay_data(_filter)
|
||||
for require in (replay_data.get('require') or []):
|
||||
if require[0] == 'RelayPrefetchedStreamCache':
|
||||
return try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
|
||||
return traverse_obj(extract_relay_data(_filter), (
|
||||
'require', (None, (..., ..., ..., '__bbox', 'require')),
|
||||
lambda _, v: 'RelayPrefetchedStreamCache' in v, ..., ...,
|
||||
'__bbox', 'result', 'data', {dict}), get_all=False) or {}
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(self._search_regex([
|
||||
|
@ -510,7 +526,7 @@ def extract_relay_prefetched_data(_filter):
|
|||
|
||||
if not video_data:
|
||||
data = extract_relay_prefetched_data(
|
||||
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+"')
|
||||
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)')
|
||||
if data:
|
||||
entries = []
|
||||
|
||||
|
@ -525,7 +541,8 @@ def parse_graphql_video(video):
|
|||
formats = []
|
||||
q = qualities(['sd', 'hd'])
|
||||
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
|
||||
('playable_url_dash', '')):
|
||||
('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
|
||||
('browser_native_sd_url', 'sd')):
|
||||
playable_url = video.get(key)
|
||||
if not playable_url:
|
||||
continue
|
||||
|
@ -534,7 +551,8 @@ def parse_graphql_video(video):
|
|||
else:
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': q(format_id),
|
||||
# sd, hd formats w/o resolution info should be deprioritized below DASH
|
||||
'quality': q(format_id) - 3,
|
||||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
|
@ -701,9 +719,11 @@ def parse_attachment(attachment, key='media'):
|
|||
for src_type in ('src', 'src_no_ratelimit'):
|
||||
src = f[0].get('%s_%s' % (quality, src_type))
|
||||
if src:
|
||||
preference = -10 if format_id == 'progressive' else -1
|
||||
# sd, hd formats w/o resolution info should be deprioritized below DASH
|
||||
# TODO: investigate if progressive or src formats still exist
|
||||
preference = -10 if format_id == 'progressive' else -3
|
||||
if quality == 'hd':
|
||||
preference += 5
|
||||
preference += 1
|
||||
formats.append({
|
||||
'format_id': '%s_%s_%s' % (format_id, quality, src_type),
|
||||
'url': src,
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
|
||||
|
||||
class FifaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.fifa.com/fifaplus/(?P<locale>\w{2})/watch/([^#?]+/)?(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://www\.fifa\.com/fifaplus/(?P<locale>\w{2})/watch/([^#?]+/)?(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.fifa.com/fifaplus/en/watch/7on10qPcnyLajDDU3ntg6y',
|
||||
'info_dict': {
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
|
||||
class FilmmoduIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www.)?filmmodu.org/(?P<id>[^/]+-(?:turkce-dublaj-izle|altyazili-izle))'
|
||||
_VALID_URL = r'https?://(?:www\.)?filmmodu\.org/(?P<id>[^/]+-(?:turkce-dublaj-izle|altyazili-izle))'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.filmmodu.org/f9-altyazili-izle',
|
||||
'md5': 'aeefd955c2a508a5bdaa3bcec8eeb0d4',
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
|
||||
|
||||
class FOXIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?fox(?:sports)?\.com/(?:watch|replay)/(?P<id>[\da-fA-F]+)'
|
||||
_TESTS = [{
|
||||
# clip
|
||||
'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/',
|
||||
|
@ -50,6 +50,10 @@ class FOXIE(InfoExtractor):
|
|||
# sports event, geo-restricted
|
||||
'url': 'https://www.fox.com/watch/b057484dade738d1f373b3e46216fa2c/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# fox sports replay, geo-restricted
|
||||
'url': 'https://www.foxsports.com/replay/561f3e071347a24e5e877abc56b22e89',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
_HOME_PAGE_URL = 'https://www.fox.com/'
|
||||
|
|
|
@ -60,6 +60,7 @@ class Funker530IE(InfoExtractor):
|
|||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
info = {}
|
||||
rumble_url = list(RumbleEmbedIE._extract_embed_urls(url, webpage))
|
||||
if rumble_url:
|
||||
info = {'url': rumble_url[0], 'ie_key': RumbleEmbedIE.ie_key()}
|
||||
|
|
|
@ -58,6 +58,8 @@ class GenericIE(InfoExtractor):
|
|||
'ext': 'mp4',
|
||||
'title': 'trailer',
|
||||
'upload_date': '20100513',
|
||||
'direct': True,
|
||||
'timestamp': 1273772943.0,
|
||||
}
|
||||
},
|
||||
# Direct link to media delivered compressed (until Accept-Encoding is *)
|
||||
|
@ -101,6 +103,8 @@ class GenericIE(InfoExtractor):
|
|||
'ext': 'webm',
|
||||
'title': '5_Lennart_Poettering_-_Systemd',
|
||||
'upload_date': '20141120',
|
||||
'direct': True,
|
||||
'timestamp': 1416498816.0,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'URL could be a direct video link, returning it as such.'
|
||||
|
@ -133,6 +137,7 @@ class GenericIE(InfoExtractor):
|
|||
'upload_date': '20201204',
|
||||
},
|
||||
}],
|
||||
'skip': 'Dead link',
|
||||
},
|
||||
# RSS feed with item with description and thumbnails
|
||||
{
|
||||
|
@ -145,12 +150,12 @@ class GenericIE(InfoExtractor):
|
|||
'playlist': [{
|
||||
'info_dict': {
|
||||
'ext': 'm4a',
|
||||
'id': 'c1c879525ce2cb640b344507e682c36d',
|
||||
'id': '818a5d38-01cd-152f-2231-ee479677fa82',
|
||||
'title': 're:Hydrogen!',
|
||||
'description': 're:.*In this episode we are going.*',
|
||||
'timestamp': 1567977776,
|
||||
'upload_date': '20190908',
|
||||
'duration': 459,
|
||||
'duration': 423,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'episode_number': 1,
|
||||
'season_number': 1,
|
||||
|
@ -267,6 +272,7 @@ class GenericIE(InfoExtractor):
|
|||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
},
|
||||
# MPD from http://dash-mse-test.appspot.com/media.html
|
||||
{
|
||||
|
@ -278,6 +284,7 @@ class GenericIE(InfoExtractor):
|
|||
'title': 'car-20120827-manifest',
|
||||
'formats': 'mincount:9',
|
||||
'upload_date': '20130904',
|
||||
'timestamp': 1378272859.0,
|
||||
},
|
||||
},
|
||||
# m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
|
||||
|
@ -318,7 +325,7 @@ class GenericIE(InfoExtractor):
|
|||
'id': 'cmQHVoWB5FY',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20130224',
|
||||
'uploader_id': 'TheVerge',
|
||||
'uploader_id': '@TheVerge',
|
||||
'description': r're:^Chris Ziegler takes a look at the\.*',
|
||||
'uploader': 'The Verge',
|
||||
'title': 'First Firefox OS phones side-by-side',
|
||||
|
@ -2370,7 +2377,7 @@ def _extract_kvs(self, url, webpage, video_id):
|
|||
'id': flashvars['video_id'],
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnail': urljoin(url, thumbnail),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
@ -2562,7 +2569,7 @@ def _real_extract(self, url):
|
|||
self._downloader.write_debug('Looking for embeds')
|
||||
embeds = list(self._extract_embeds(original_url, webpage, urlh=full_response, info_dict=info_dict))
|
||||
if len(embeds) == 1:
|
||||
return {**info_dict, **embeds[0]}
|
||||
return merge_dicts(embeds[0], info_dict)
|
||||
elif embeds:
|
||||
return self.playlist_result(embeds, **info_dict)
|
||||
raise UnsupportedError(url)
|
||||
|
|
|
@ -60,13 +60,13 @@ def _real_initialize(self):
|
|||
account_data = self._download_json(
|
||||
'https://api.gofile.io/createAccount', None, note='Getting a new guest account')
|
||||
self._TOKEN = account_data['data']['token']
|
||||
self._set_cookie('gofile.io', 'accountToken', self._TOKEN)
|
||||
self._set_cookie('.gofile.io', 'accountToken', self._TOKEN)
|
||||
|
||||
def _entries(self, file_id):
|
||||
query_params = {
|
||||
'contentId': file_id,
|
||||
'token': self._TOKEN,
|
||||
'websiteToken': 12345,
|
||||
'websiteToken': '7fd94ds12fds4', # From https://gofile.io/dist/js/alljs.js
|
||||
}
|
||||
password = self.get_param('videopassword')
|
||||
if password:
|
||||
|
|
|
@ -383,9 +383,9 @@ def __get_current_timestamp():
|
|||
months = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
|
||||
days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
|
||||
|
||||
time_now = datetime.datetime.utcnow()
|
||||
time_now = datetime.datetime.now(datetime.timezone.utc)
|
||||
format_string = "{} {} {} %H:%M:%S UTC %Y".format(days[time_now.weekday()], months[time_now.month], time_now.day)
|
||||
time_string = datetime.datetime.utcnow().strftime(format_string)
|
||||
time_string = time_now.strftime(format_string)
|
||||
return time_string
|
||||
|
||||
def __str__(self):
|
||||
|
|
|
@ -84,7 +84,7 @@ class HotStarIE(HotStarBaseIE):
|
|||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/)
|
||||
(?:
|
||||
(?P<type>movies|sports|episode|(?P<tv>tv|shows))/
|
||||
(?P<type>movies|sports|clips|episode|(?P<tv>tv|shows))/
|
||||
(?(tv)(?:[^/?#]+/){2}|[^?#]*)
|
||||
)?
|
||||
[^/?#]+/
|
||||
|
@ -142,6 +142,51 @@ class HotStarIE(HotStarBaseIE):
|
|||
'duration': 1272,
|
||||
'channel_id': 3,
|
||||
},
|
||||
'skip': 'HTTP Error 504: Gateway Time-out', # XXX: Investigate 504 errors on some episodes
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/shows/kana-kaanum-kaalangal/1260097087/back-to-school/1260097320',
|
||||
'info_dict': {
|
||||
'id': '1260097320',
|
||||
'ext': 'mp4',
|
||||
'title': 'Back To School',
|
||||
'season': 'Chapter 1',
|
||||
'description': 'md5:b0d6a4c8a650681491e7405496fc7e13',
|
||||
'timestamp': 1650564000,
|
||||
'channel': 'Hotstar Specials',
|
||||
'series': 'Kana Kaanum Kaalangal',
|
||||
'season_number': 1,
|
||||
'season_id': 9441,
|
||||
'upload_date': '20220421',
|
||||
'episode': 'Back To School',
|
||||
'episode_number': 1,
|
||||
'duration': 1810,
|
||||
'channel_id': 54,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/clips/e3-sairat-kahani-pyaar-ki/1000262286',
|
||||
'info_dict': {
|
||||
'id': '1000262286',
|
||||
'ext': 'mp4',
|
||||
'title': 'E3 - SaiRat, Kahani Pyaar Ki',
|
||||
'description': 'md5:e3b4b3203bc0c5396fe7d0e4948a6385',
|
||||
'episode': 'E3 - SaiRat, Kahani Pyaar Ki',
|
||||
'upload_date': '20210606',
|
||||
'timestamp': 1622943900,
|
||||
'duration': 5395,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/movies/premam/1000091195',
|
||||
'info_dict': {
|
||||
'id': '1000091195',
|
||||
'ext': 'mp4',
|
||||
'title': 'Premam',
|
||||
'release_year': 2015,
|
||||
'description': 'md5:d833c654e4187b5e34757eafb5b72d7f',
|
||||
'timestamp': 1462149000,
|
||||
'upload_date': '20160502',
|
||||
'episode': 'Premam',
|
||||
'duration': 8994,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
|
||||
'only_matching': True,
|
||||
|
@ -160,6 +205,7 @@ class HotStarIE(HotStarBaseIE):
|
|||
'episode': 'episode',
|
||||
'tv': 'episode',
|
||||
'shows': 'episode',
|
||||
'clips': 'content',
|
||||
None: 'content',
|
||||
}
|
||||
|
||||
|
@ -187,8 +233,10 @@ def _real_extract(self, url):
|
|||
video_type = self._TYPE.get(video_type, video_type)
|
||||
cookies = self._get_cookies(url) # Cookies before any request
|
||||
|
||||
video_data = self._call_api_v1(f'{video_type}/detail', video_id,
|
||||
query={'tas': 10000, 'contentId': video_id})['body']['results']['item']
|
||||
video_data = traverse_obj(
|
||||
self._call_api_v1(
|
||||
f'{video_type}/detail', video_id, fatal=False, query={'tas': 10000, 'contentId': video_id}),
|
||||
('body', 'results', 'item', {dict})) or {}
|
||||
if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'):
|
||||
self.report_drm(video_id)
|
||||
|
||||
|
@ -273,6 +321,7 @@ def _real_extract(self, url):
|
|||
'description': video_data.get('description'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': int_or_none(traverse_obj(video_data, 'broadcastDate', 'startDate')),
|
||||
'release_year': int_or_none(video_data.get('year')),
|
||||
'formats': formats,
|
||||
'subtitles': subs,
|
||||
'channel': video_data.get('channelName'),
|
||||
|
|
|
@ -1,19 +1,32 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
remove_end,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class HungamaIE(InfoExtractor):
|
||||
class HungamaBaseIE(InfoExtractor):
|
||||
def _call_api(self, path, content_id, fatal=False):
|
||||
return traverse_obj(self._download_json(
|
||||
f'https://cpage.api.hungama.com/v2/page/content/{content_id}/{path}/detail',
|
||||
content_id, fatal=fatal, query={
|
||||
'device': 'web',
|
||||
'platform': 'a',
|
||||
'storeId': '1',
|
||||
}), ('data', {dict})) or {}
|
||||
|
||||
|
||||
class HungamaIE(HungamaBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?hungama\.com/
|
||||
(?:www\.|un\.)?hungama\.com/
|
||||
(?:
|
||||
(?:video|movie)/[^/]+/|
|
||||
(?:video|movie|short-film)/[^/]+/|
|
||||
tv-show/(?:[^/]+/){2}\d+/episode/[^/]+/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
|
@ -25,13 +38,28 @@ class HungamaIE(InfoExtractor):
|
|||
'id': '39349649',
|
||||
'ext': 'mp4',
|
||||
'title': 'Krishna Chants',
|
||||
'description': 'Watch Krishna Chants video now. You can also watch other latest videos only at Hungama',
|
||||
'description': ' ',
|
||||
'upload_date': '20180829',
|
||||
'duration': 264,
|
||||
'timestamp': 1535500800,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://images.hungama.com/c/1/0dc/2ca/39349649/39349649_700x394.jpg',
|
||||
}
|
||||
'thumbnail': 'https://images1.hungama.com/tr:n-a_169_m/c/1/0dc/2ca/39349649/39349649_350x197.jpg?v=8',
|
||||
'tags': 'count:6',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://un.hungama.com/short-film/adira/102524179/',
|
||||
'md5': '2278463f5dc9db9054d0c02602d44666',
|
||||
'info_dict': {
|
||||
'id': '102524179',
|
||||
'ext': 'mp4',
|
||||
'title': 'Adira',
|
||||
'description': 'md5:df20cd4d41eabb33634f06de1025a4b4',
|
||||
'upload_date': '20230417',
|
||||
'timestamp': 1681689600,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://images1.hungama.com/tr:n-a_23_m/c/1/197/ac9/102524179/102524179_350x525.jpg?v=1',
|
||||
'tags': 'count:7',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.hungama.com/movie/kahaani-2/44129919/',
|
||||
'only_matching': True,
|
||||
|
@ -51,14 +79,19 @@ def _real_extract(self, url):
|
|||
'c': 'common',
|
||||
'm': 'get_video_mdn_url',
|
||||
})
|
||||
|
||||
formats = self._extract_m3u8_formats(video_json['stream_url'], video_id, ext='mp4', m3u8_id='hls')
|
||||
|
||||
json_ld = self._search_json_ld(
|
||||
self._download_webpage(url, video_id, fatal=False) or '', video_id, fatal=False)
|
||||
metadata = self._call_api('movie', video_id)
|
||||
|
||||
return {
|
||||
**json_ld,
|
||||
**traverse_obj(metadata, ('head', 'data', {
|
||||
'title': ('title', {str}),
|
||||
'description': ('misc', 'description', {str}),
|
||||
'duration': ('duration', {int}), # duration in JSON is incorrect if string
|
||||
'timestamp': ('releasedate', {unified_timestamp}),
|
||||
'view_count': ('misc', 'playcount', {int_or_none}),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
'tags': ('misc', 'keywords', ..., {str}),
|
||||
})),
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': {
|
||||
|
@ -71,10 +104,10 @@ def _real_extract(self, url):
|
|||
|
||||
|
||||
class HungamaSongIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hungama\.com/song/[^/]+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.|un\.)?hungama\.com/song/[^/]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hungama.com/song/kitni-haseen-zindagi/2931166/',
|
||||
'md5': 'd4a6a05a394ad0453a9bea3ca00e6024',
|
||||
'md5': '964f46828e8b250aa35e5fdcfdcac367',
|
||||
'info_dict': {
|
||||
'id': '2931166',
|
||||
'ext': 'mp3',
|
||||
|
@ -83,8 +116,22 @@ class HungamaSongIE(InfoExtractor):
|
|||
'artist': 'Lucky Ali',
|
||||
'album': None,
|
||||
'release_year': 2000,
|
||||
}
|
||||
}
|
||||
'thumbnail': 'https://stat2.hungama.ind.in/assets/images/default_images/da-200x200.png',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://un.hungama.com/song/tum-kya-mile-from-rocky-aur-rani-kii-prem-kahaani/103553672',
|
||||
'md5': '964f46828e8b250aa35e5fdcfdcac367',
|
||||
'info_dict': {
|
||||
'id': '103553672',
|
||||
'ext': 'mp3',
|
||||
'title': 'md5:5ebeb1e10771b634ce5f700ce68ae5f4',
|
||||
'track': 'Tum Kya Mile (From "Rocky Aur Rani Kii Prem Kahaani")',
|
||||
'artist': 'Pritam Chakraborty, Arijit Singh, Shreya Ghoshal, Amitabh Bhattacharya',
|
||||
'album': 'Tum Kya Mile (From "Rocky Aur Rani Kii Prem Kahaani")',
|
||||
'release_year': 2023,
|
||||
'thumbnail': 'https://images.hungama.com/c/1/7c2/c7b/103553671/103553671_200x200.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
|
@ -122,8 +169,8 @@ def _real_extract(self, url):
|
|||
}
|
||||
|
||||
|
||||
class HungamaAlbumPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hungama\.com/(?:playlists|album)/[^/]+/(?P<id>\d+)'
|
||||
class HungamaAlbumPlaylistIE(HungamaBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.|un\.)?hungama\.com/(?P<path>playlists|album)/[^/]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hungama.com/album/bhuj-the-pride-of-india/69481490/',
|
||||
'playlist_mincount': 7,
|
||||
|
@ -132,16 +179,24 @@ class HungamaAlbumPlaylistIE(InfoExtractor):
|
|||
},
|
||||
}, {
|
||||
'url': 'https://www.hungama.com/playlists/hindi-jan-to-june-2021/123063/',
|
||||
'playlist_mincount': 50,
|
||||
'playlist_mincount': 33,
|
||||
'info_dict': {
|
||||
'id': '123063',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://un.hungama.com/album/what-jhumka-%3F-from-rocky-aur-rani-kii-prem-kahaani/103891805/',
|
||||
'playlist_mincount': 1,
|
||||
'info_dict': {
|
||||
'id': '103891805',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
ptrn = r'<meta[^>]+?property=[\"\']?music:song:url[\"\']?[^>]+?content=[\"\']?([^\"\']+)'
|
||||
items = re.findall(ptrn, webpage)
|
||||
entries = [self.url_result(item, ie=HungamaSongIE.ie_key()) for item in items]
|
||||
return self.playlist_result(entries, video_id)
|
||||
playlist_id, path = self._match_valid_url(url).group('id', 'path')
|
||||
data = self._call_api(remove_end(path, 's'), playlist_id, fatal=True)
|
||||
|
||||
def entries():
|
||||
for song_url in traverse_obj(data, ('body', 'rows', ..., 'data', 'misc', 'share', {url_or_none})):
|
||||
yield self.url_result(song_url, HungamaSongIE)
|
||||
|
||||
return self.playlist_result(entries(), playlist_id)
|
||||
|
|
|
@ -197,10 +197,6 @@ class IGNVideoIE(IGNBaseIE):
|
|||
'thumbnail': 'https://sm.ign.com/ign_me/video/h/how-hitman/how-hitman-aims-to-be-different-than-every-other-s_8z14.jpg',
|
||||
'duration': 298,
|
||||
'tags': 'count:13',
|
||||
'display_id': '112203',
|
||||
'thumbnail': 'https://sm.ign.com/ign_me/video/h/how-hitman/how-hitman-aims-to-be-different-than-every-other-s_8z14.jpg',
|
||||
'duration': 298,
|
||||
'tags': 'count:13',
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 400: Bad Request'],
|
||||
}, {
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
time_seconds,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
@ -11,15 +11,14 @@
|
|||
class IndavideoEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
|
||||
# Some example URLs covered by generic extractor:
|
||||
# http://indavideo.hu/video/Vicces_cica_1
|
||||
# http://index.indavideo.hu/video/2015_0728_beregszasz
|
||||
# http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
|
||||
# http://erotika.indavideo.hu/video/Amator_tini_punci
|
||||
# http://film.indavideo.hu/video/f_hrom_nagymamm_volt
|
||||
# http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
|
||||
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//embed\.indavideo\.hu/player/video/[\da-f]+)']
|
||||
# https://indavideo.hu/video/Vicces_cica_1
|
||||
# https://index.indavideo.hu/video/Hod_Nemetorszagban
|
||||
# https://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
|
||||
# https://film.indavideo.hu/video/f_farkaslesen
|
||||
# https://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
|
||||
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)//embed\.indavideo\.hu/player/video/[\da-f]+)']
|
||||
_TESTS = [{
|
||||
'url': 'http://indavideo.hu/player/video/1bdc3c6d80/',
|
||||
'url': 'https://indavideo.hu/player/video/1bdc3c6d80/',
|
||||
'md5': 'c8a507a1c7410685f83a06eaeeaafeab',
|
||||
'info_dict': {
|
||||
'id': '1837039',
|
||||
|
@ -36,21 +35,33 @@ class IndavideoEmbedIE(InfoExtractor):
|
|||
'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://assets.indavideo.hu/swf/player.swf?v=fe25e500&vID=1bdc3c6d80&autostart=1&hide=1&i=1',
|
||||
'url': 'https://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://indavideo.hu/video/Vicces_cica_1',
|
||||
'info_dict': {
|
||||
'id': '1335611',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vicces cica',
|
||||
'description': 'Játszik a tablettel. :D',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Jet_Pack',
|
||||
'uploader_id': '491217',
|
||||
'timestamp': 1390821212,
|
||||
'upload_date': '20140127',
|
||||
'duration': 7,
|
||||
'age_limit': 0,
|
||||
'tags': ['cica', 'Jet_Pack'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id,
|
||||
video_id)['data']
|
||||
|
||||
title = video['title']
|
||||
f'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/{video_id}/',
|
||||
video_id, query={'_': time_seconds()})['data']
|
||||
|
||||
video_urls = []
|
||||
|
||||
|
@ -60,33 +71,21 @@ def _real_extract(self, url):
|
|||
elif isinstance(video_files, dict):
|
||||
video_urls.extend(video_files.values())
|
||||
|
||||
video_file = video.get('video_file')
|
||||
if video:
|
||||
video_urls.append(video_file)
|
||||
video_urls = list(set(video_urls))
|
||||
|
||||
video_prefix = video_urls[0].rsplit('/', 1)[0]
|
||||
|
||||
for flv_file in video.get('flv_files', []):
|
||||
flv_url = '%s/%s' % (video_prefix, flv_file)
|
||||
if flv_url not in video_urls:
|
||||
video_urls.append(flv_url)
|
||||
|
||||
filesh = video.get('filesh')
|
||||
filesh = video.get('filesh') or {}
|
||||
|
||||
formats = []
|
||||
for video_url in video_urls:
|
||||
height = int_or_none(self._search_regex(
|
||||
r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None))
|
||||
if filesh:
|
||||
if not height:
|
||||
continue
|
||||
token = filesh.get(compat_str(height))
|
||||
if token is None:
|
||||
continue
|
||||
video_url = update_url_query(video_url, {'token': token})
|
||||
if not height and len(filesh) == 1:
|
||||
height = int_or_none(list(filesh.keys())[0])
|
||||
token = filesh.get(str(height))
|
||||
if token is None:
|
||||
continue
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'url': update_url_query(video_url, {'token': token}),
|
||||
'height': height,
|
||||
})
|
||||
|
||||
|
@ -103,7 +102,7 @@ def _real_extract(self, url):
|
|||
|
||||
return {
|
||||
'id': video.get('id') or video_id,
|
||||
'title': title,
|
||||
'title': video.get('title'),
|
||||
'description': video.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'uploader': video.get('user_name'),
|
||||
|
|
|
@ -134,10 +134,17 @@ def _real_extract(self, url):
|
|||
), webpage, 'real id', group='id', default=None)
|
||||
|
||||
if not video_id:
|
||||
nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data')
|
||||
nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data', fatal=False)
|
||||
video_id = traverse_obj(
|
||||
nuxt_data, (..., 'content', 'additionals', 'videoPlayId', {str}), get_all=False)
|
||||
|
||||
if not video_id:
|
||||
nuxt_data = self._search_json(
|
||||
r'<script[^>]+\bid=["\']__NUXT_DATA__["\'][^>]*>',
|
||||
webpage, 'nuxt data', None, end_pattern=r'</script>', contains_pattern=r'\[(?s:.+)\]')
|
||||
|
||||
video_id = traverse_obj(nuxt_data, lambda _, v: re.fullmatch(r'p\d+', v), get_all=False)
|
||||
|
||||
if not video_id:
|
||||
self.raise_no_formats('Unable to extract video ID from webpage')
|
||||
|
||||
|
|
|
@ -499,9 +499,10 @@ class IqIE(InfoExtractor):
|
|||
'tm': tm,
|
||||
'qdy': 'a',
|
||||
'qds': 0,
|
||||
'k_ft1': 141287244169348,
|
||||
'k_ft4': 34359746564,
|
||||
'k_ft5': 1,
|
||||
'k_ft1': '143486267424900',
|
||||
'k_ft4': '1572868',
|
||||
'k_ft7': '4',
|
||||
'k_ft5': '1',
|
||||
'bop': JSON.stringify({
|
||||
'version': '10.0',
|
||||
'dfp': dfp
|
||||
|
@ -529,14 +530,22 @@ def _extract_vms_player_js(self, webpage, video_id):
|
|||
webpack_js_url = self._proto_relative_url(self._search_regex(
|
||||
r'<script src="((?:https?:)?//stc\.iqiyipic\.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL'))
|
||||
webpack_js = self._download_webpage(webpack_js_url, video_id, note='Downloading webpack JS', errnote='Unable to download webpack JS')
|
||||
|
||||
webpack_map = self._search_json(
|
||||
r'["\']\s*\+\s*', webpack_js, 'JS locations', video_id,
|
||||
contains_pattern=r'{\s*(?:\d+\s*:\s*["\'][\da-f]+["\']\s*,?\s*)+}',
|
||||
end_pattern=r'\[\w+\]\+["\']\.js', transform_source=js_to_json)
|
||||
|
||||
replacement_map = self._search_json(
|
||||
r'["\']\s*\+\(\s*', webpack_js, 'replacement map', video_id,
|
||||
contains_pattern=r'{\s*(?:\d+\s*:\s*["\'][\w.-]+["\']\s*,?\s*)+}',
|
||||
end_pattern=r'\[\w+\]\|\|\w+\)\+["\']\.', transform_source=js_to_json,
|
||||
fatal=False) or {}
|
||||
|
||||
for module_index in reversed(webpack_map):
|
||||
real_module = replacement_map.get(module_index) or module_index
|
||||
module_js = self._download_webpage(
|
||||
f'https://stc.iqiyipic.com/_next/static/chunks/{module_index}.{webpack_map[module_index]}.js',
|
||||
f'https://stc.iqiyipic.com/_next/static/chunks/{real_module}.{webpack_map[module_index]}.js',
|
||||
video_id, note=f'Downloading #{module_index} module JS', errnote='Unable to download module JS', fatal=False) or ''
|
||||
if 'vms request' in module_js:
|
||||
self.cache.store('iq', 'player_js', module_js)
|
||||
|
|
|
@ -31,7 +31,7 @@ def _check_if_logged_in(self, webpage):
|
|||
|
||||
|
||||
class ITProTVIE(ITProTVBaseIE):
|
||||
_VALID_URL = r'https://app.itpro.tv/course/(?P<course>[\w-]+)/(?P<id>[\w-]+)'
|
||||
_VALID_URL = r'https://app\.itpro\.tv/course/(?P<course>[\w-]+)/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://app.itpro.tv/course/guided-tour/introductionitprotv',
|
||||
'md5': 'bca4a28c2667fd1a63052e71a94bb88c',
|
||||
|
@ -102,7 +102,7 @@ def _real_extract(self, url):
|
|||
|
||||
|
||||
class ITProTVCourseIE(ITProTVBaseIE):
|
||||
_VALID_URL = r'https?://app.itpro.tv/course/(?P<id>[\w-]+)/?(?:$|[#?])'
|
||||
_VALID_URL = r'https?://app\.itpro\.tv/course/(?P<id>[\w-]+)/?(?:$|[#?])'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://app.itpro.tv/course/guided-tour',
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
|
||||
|
||||
class JableIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?jable.tv/videos/(?P<id>[\w-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?jable\.tv/videos/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://jable.tv/videos/pppd-812/',
|
||||
'md5': 'f1537283a9bc073c31ff86ca35d9b2a6',
|
||||
|
@ -64,7 +64,7 @@ def _real_extract(self, url):
|
|||
|
||||
|
||||
class JablePlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?jable.tv/(?:categories|models|tags)/(?P<id>[\w-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?jable\.tv/(?:categories|models|tags)/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://jable.tv/models/kaede-karen/',
|
||||
'info_dict': {
|
||||
|
|
156
yt_dlp/extractor/jtbc.py
Normal file
156
yt_dlp/extractor/jtbc.py
Normal file
|
@ -0,0 +1,156 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class JTBCIE(InfoExtractor):
|
||||
IE_DESC = 'jtbc.co.kr'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:
|
||||
vod\.jtbc\.co\.kr/player/(?:program|clip)
|
||||
|tv\.jtbc\.co\.kr/(?:replay|trailer|clip)/pr\d+/pm\d+
|
||||
)/(?P<id>(?:ep|vo)\d+)'''
|
||||
_GEO_COUNTRIES = ['KR']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.jtbc.co.kr/replay/pr10011629/pm10067930/ep20216321/view',
|
||||
'md5': 'e6ade71d8c8685bbfd6e6ce4167c6a6c',
|
||||
'info_dict': {
|
||||
'id': 'VO10721192',
|
||||
'display_id': 'ep20216321',
|
||||
'ext': 'mp4',
|
||||
'title': '힘쎈여자 강남순 2회 다시보기',
|
||||
'description': 'md5:043c1d9019100ce271dba09995dbd1e2',
|
||||
'duration': 3770.0,
|
||||
'release_date': '20231008',
|
||||
'age_limit': 15,
|
||||
'thumbnail': 'https://fs.jtbc.co.kr//joydata/CP00000001/prog/drama/stronggirlnamsoon/img/20231008_163541_522_1.jpg',
|
||||
'series': '힘쎈여자 강남순',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vod.jtbc.co.kr/player/program/ep20216733',
|
||||
'md5': '217a6d190f115a75e4bda0ceaa4cd7f4',
|
||||
'info_dict': {
|
||||
'id': 'VO10721429',
|
||||
'display_id': 'ep20216733',
|
||||
'ext': 'mp4',
|
||||
'title': '헬로 마이 닥터 친절한 진료실 149회 다시보기',
|
||||
'description': 'md5:1d70788a982dd5de26874a92fcffddb8',
|
||||
'duration': 2720.0,
|
||||
'release_date': '20231009',
|
||||
'age_limit': 15,
|
||||
'thumbnail': 'https://fs.jtbc.co.kr//joydata/CP00000001/prog/culture/hellomydoctor/img/20231009_095002_528_1.jpg',
|
||||
'series': '헬로 마이 닥터 친절한 진료실',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vod.jtbc.co.kr/player/clip/vo10721270',
|
||||
'md5': '05782e2dc22a9c548aebefe62ae4328a',
|
||||
'info_dict': {
|
||||
'id': 'VO10721270',
|
||||
'display_id': 'vo10721270',
|
||||
'ext': 'mp4',
|
||||
'title': '뭉쳐야 찬다3 2회 예고편 - A매치로 향하는 마지막 관문💥',
|
||||
'description': 'md5:d48b51a8655c84843b4ed8d0c39aae68',
|
||||
'duration': 46.0,
|
||||
'release_date': '20231015',
|
||||
'age_limit': 15,
|
||||
'thumbnail': 'https://fs.jtbc.co.kr//joydata/CP00000001/prog/enter/soccer3/img/20231008_210957_775_1.jpg',
|
||||
'series': '뭉쳐야 찬다3',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.jtbc.co.kr/trailer/pr10010392/pm10032526/vo10720912/view',
|
||||
'md5': '367d480eb3ef54a9cd7a4b4d69c4b32d',
|
||||
'info_dict': {
|
||||
'id': 'VO10720912',
|
||||
'display_id': 'vo10720912',
|
||||
'ext': 'mp4',
|
||||
'title': '아는 형님 404회 예고편 | 10월 14일(토) 저녁 8시 50분 방송!',
|
||||
'description': 'md5:2743bb1079ceb85bb00060f2ad8f0280',
|
||||
'duration': 148.0,
|
||||
'release_date': '20231014',
|
||||
'age_limit': 15,
|
||||
'thumbnail': 'https://fs.jtbc.co.kr//joydata/CP00000001/prog/enter/jtbcbros/img/20231006_230023_802_1.jpg',
|
||||
'series': '아는 형님',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
if display_id.startswith('vo'):
|
||||
video_id = display_id.upper()
|
||||
else:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(r'data-vod="(VO\d+)"', webpage, 'vod id')
|
||||
|
||||
playback_data = self._download_json(
|
||||
f'https://api.jtbc.co.kr/vod/{video_id}', video_id, note='Downloading VOD playback data')
|
||||
|
||||
subtitles = {}
|
||||
for sub in traverse_obj(playback_data, ('tracks', lambda _, v: v['file'])):
|
||||
subtitles.setdefault(sub.get('label', 'und'), []).append({'url': sub['file']})
|
||||
|
||||
formats = []
|
||||
for stream_url in traverse_obj(playback_data, ('sources', 'HLS', ..., 'file', {url_or_none})):
|
||||
stream_url = re.sub(r'/playlist(?:_pd\d+)?\.m3u8', '/index.m3u8', stream_url)
|
||||
formats.extend(self._extract_m3u8_formats(stream_url, video_id, fatal=False))
|
||||
|
||||
metadata = self._download_json(
|
||||
'https://now-api.jtbc.co.kr/v1/vod/detail', video_id,
|
||||
note='Downloading mobile details', fatal=False, query={'vodFileId': video_id})
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
**traverse_obj(metadata, ('vodDetail', {
|
||||
'title': 'vodTitleView',
|
||||
'series': 'programTitle',
|
||||
'age_limit': ('watchAge', {int_or_none}),
|
||||
'release_date': ('broadcastDate', {lambda x: re.match(r'\d{8}', x.replace('.', ''))}, 0),
|
||||
'description': 'episodeContents',
|
||||
'thumbnail': ('imgFileUrl', {url_or_none}),
|
||||
})),
|
||||
'duration': parse_duration(playback_data.get('playTime')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class JTBCProgramIE(InfoExtractor):
|
||||
IE_NAME = 'JTBC:program'
|
||||
_VALID_URL = r'https?://(?:vod\.jtbc\.co\.kr/program|tv\.jtbc\.co\.kr/replay)/(?P<id>pr\d+)/(?:replay|pm\d+)/?(?:$|[?#])'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.jtbc.co.kr/replay/pr10010392/pm10032710',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'pr10010392',
|
||||
},
|
||||
'playlist_count': 398,
|
||||
}, {
|
||||
'url': 'https://vod.jtbc.co.kr/program/pr10011491/replay',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'pr10011491',
|
||||
},
|
||||
'playlist_count': 59,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
program_id = self._match_id(url)
|
||||
|
||||
vod_list = self._download_json(
|
||||
'https://now-api.jtbc.co.kr/v1/vodClip/programHome/programReplayVodList', program_id,
|
||||
note='Downloading program replay list', query={
|
||||
'programId': program_id,
|
||||
'rowCount': '10000',
|
||||
})
|
||||
|
||||
entries = [self.url_result(f'https://vod.jtbc.co.kr/player/program/{video_id}', JTBCIE, video_id)
|
||||
for video_id in traverse_obj(vod_list, ('programReplayVodList', ..., 'episodeId'))]
|
||||
return self.playlist_result(entries, program_id)
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
|
||||
class KommunetvIE(InfoExtractor):
|
||||
_VALID_URL = r'https://(\w+).kommunetv.no/archive/(?P<id>\w+)'
|
||||
_VALID_URL = r'https://\w+\.kommunetv\.no/archive/(?P<id>\w+)'
|
||||
_TEST = {
|
||||
'url': 'https://oslo.kommunetv.no/archive/921',
|
||||
'md5': '5f102be308ee759be1e12b63d5da4bbc',
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import functools
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@ -21,10 +22,11 @@
|
|||
|
||||
|
||||
class LBRYBaseIE(InfoExtractor):
|
||||
_BASE_URL_REGEX = r'(?:https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/|lbry://)'
|
||||
_BASE_URL_REGEX = r'(?x)(?:https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/|lbry://)'
|
||||
_CLAIM_ID_REGEX = r'[0-9a-f]{1,40}'
|
||||
_OPT_CLAIM_ID = '[^:/?#&]+(?:[:#]%s)?' % _CLAIM_ID_REGEX
|
||||
_OPT_CLAIM_ID = '[^$@:/?#&]+(?:[:#]%s)?' % _CLAIM_ID_REGEX
|
||||
_SUPPORTED_STREAM_TYPES = ['video', 'audio']
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
def _call_api_proxy(self, method, display_id, params, resource):
|
||||
headers = {'Content-Type': 'application/json-rpc'}
|
||||
|
@ -68,22 +70,82 @@ def _parse_stream(self, stream, url):
|
|||
'duration': ('value', stream_type, 'duration', {int_or_none}),
|
||||
'channel': ('signing_channel', 'value', 'title', {str}),
|
||||
'channel_id': ('signing_channel', 'claim_id', {str}),
|
||||
'uploader_id': ('signing_channel', 'name', {str}),
|
||||
})
|
||||
|
||||
channel_name = traverse_obj(stream, ('signing_channel', 'name', {str}))
|
||||
if channel_name and info.get('channel_id'):
|
||||
info['channel_url'] = self._permanent_url(url, channel_name, info['channel_id'])
|
||||
if info.get('uploader_id') and info.get('channel_id'):
|
||||
info['channel_url'] = self._permanent_url(url, info['uploader_id'], info['channel_id'])
|
||||
|
||||
return info
|
||||
|
||||
def _fetch_page(self, display_id, url, params, page):
|
||||
page += 1
|
||||
page_params = {
|
||||
'no_totals': True,
|
||||
'page': page,
|
||||
'page_size': self._PAGE_SIZE,
|
||||
**params,
|
||||
}
|
||||
result = self._call_api_proxy(
|
||||
'claim_search', display_id, page_params, f'page {page}')
|
||||
for item in traverse_obj(result, ('items', lambda _, v: v['name'] and v['claim_id'])):
|
||||
yield {
|
||||
**self._parse_stream(item, url),
|
||||
'_type': 'url',
|
||||
'id': item['claim_id'],
|
||||
'url': self._permanent_url(url, item['name'], item['claim_id']),
|
||||
}
|
||||
|
||||
def _playlist_entries(self, url, display_id, claim_param, metadata):
|
||||
qs = parse_qs(url)
|
||||
content = qs.get('content', [None])[0]
|
||||
params = {
|
||||
'fee_amount': qs.get('fee_amount', ['>=0'])[0],
|
||||
'order_by': {
|
||||
'new': ['release_time'],
|
||||
'top': ['effective_amount'],
|
||||
'trending': ['trending_group', 'trending_mixed'],
|
||||
}[qs.get('order', ['new'])[0]],
|
||||
'claim_type': 'stream',
|
||||
'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES,
|
||||
**claim_param,
|
||||
}
|
||||
duration = qs.get('duration', [None])[0]
|
||||
if duration:
|
||||
params['duration'] = {
|
||||
'long': '>=1200',
|
||||
'short': '<=240',
|
||||
}[duration]
|
||||
language = qs.get('language', ['all'])[0]
|
||||
if language != 'all':
|
||||
languages = [language]
|
||||
if language == 'en':
|
||||
languages.append('none')
|
||||
params['any_languages'] = languages
|
||||
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, display_id, url, params),
|
||||
self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, display_id, **traverse_obj(metadata, ('value', {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
})))
|
||||
|
||||
|
||||
class LBRYIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>\$/[^/]+/[^/]+/{1}|@{0}/{0}|(?!@){0})'.format(LBRYBaseIE._OPT_CLAIM_ID, LBRYBaseIE._CLAIM_ID_REGEX)
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'''
|
||||
(?:\$/(?:download|embed)/)?
|
||||
(?P<id>
|
||||
[^$@:/?#]+/{LBRYBaseIE._CLAIM_ID_REGEX}
|
||||
|(?:@{LBRYBaseIE._OPT_CLAIM_ID}/)?{LBRYBaseIE._OPT_CLAIM_ID}
|
||||
)'''
|
||||
_TESTS = [{
|
||||
# Video
|
||||
'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1',
|
||||
'md5': 'fffd15d76062e9a985c22c7c7f2f4805',
|
||||
'md5': '65bd7ec1f6744ada55da8e4c48a2edf9',
|
||||
'info_dict': {
|
||||
'id': '17f983b61f53091fb8ea58a9c56804e4ff8cff4d',
|
||||
'ext': 'mp4',
|
||||
|
@ -97,6 +159,7 @@ class LBRYIE(LBRYBaseIE):
|
|||
'height': 720,
|
||||
'thumbnail': 'https://spee.ch/7/67f2d809c263288c.png',
|
||||
'license': 'None',
|
||||
'uploader_id': '@Mantega',
|
||||
'duration': 346,
|
||||
'channel': 'LBRY/Odysee rats united!!!',
|
||||
'channel_id': '1c8ad6a2ab4e889a71146ae4deeb23bb92dab627',
|
||||
|
@ -130,11 +193,11 @@ class LBRYIE(LBRYBaseIE):
|
|||
'vcodec': 'none',
|
||||
'thumbnail': 'https://spee.ch/d/0bc63b0e6bf1492d.png',
|
||||
'license': 'None',
|
||||
'uploader_id': '@LBRYFoundation',
|
||||
}
|
||||
}, {
|
||||
# HLS
|
||||
'url': 'https://odysee.com/@gardeningincanada:b/plants-i-will-never-grow-again.-the:e',
|
||||
'md5': '25049011f3c8bc2f8b60ad88a031837e',
|
||||
'md5': 'c35fac796f62a14274b4dc2addb5d0ba',
|
||||
'info_dict': {
|
||||
'id': 'e51671357333fe22ae88aad320bde2f6f96b1410',
|
||||
'ext': 'mp4',
|
||||
|
@ -149,6 +212,7 @@ class LBRYIE(LBRYBaseIE):
|
|||
'channel': 'Gardening In Canada',
|
||||
'channel_id': 'b8be0e93b423dad221abe29545fbe8ec36e806bc',
|
||||
'channel_url': 'https://odysee.com/@gardeningincanada:b8be0e93b423dad221abe29545fbe8ec36e806bc',
|
||||
'uploader_id': '@gardeningincanada',
|
||||
'formats': 'mincount:3',
|
||||
'thumbnail': 'https://thumbnails.lbry.com/AgHSc_HzrrE',
|
||||
'license': 'Copyrighted (contact publisher)',
|
||||
|
@ -174,6 +238,7 @@ class LBRYIE(LBRYBaseIE):
|
|||
'formats': 'mincount:1',
|
||||
'thumbnail': 'startswith:https://thumb',
|
||||
'license': 'None',
|
||||
'uploader_id': '@RT',
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}, {
|
||||
|
@ -184,12 +249,13 @@ class LBRYIE(LBRYBaseIE):
|
|||
'id': '41fbfe805eb73c8d3012c0c49faa0f563274f634',
|
||||
'ext': 'mp4',
|
||||
'title': 'Biotechnological Invasion of Skin (April 2023)',
|
||||
'description': 'md5:709a2f4c07bd8891cda3a7cc2d6fcf5c',
|
||||
'description': 'md5:fe28689db2cb7ba3436d819ac3ffc378',
|
||||
'channel': 'Wicked Truths',
|
||||
'channel_id': '23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0',
|
||||
'channel_url': 'https://odysee.com/@wickedtruths:23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0',
|
||||
'timestamp': 1685790036,
|
||||
'upload_date': '20230603',
|
||||
'uploader_id': '@wickedtruths',
|
||||
'timestamp': 1695114347,
|
||||
'upload_date': '20230919',
|
||||
'release_timestamp': 1685617473,
|
||||
'release_date': '20230601',
|
||||
'duration': 1063,
|
||||
|
@ -229,10 +295,10 @@ class LBRYIE(LBRYBaseIE):
|
|||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
if display_id.startswith('$/'):
|
||||
display_id = display_id.split('/', 2)[-1].replace('/', ':')
|
||||
else:
|
||||
if display_id.startswith('@'):
|
||||
display_id = display_id.replace(':', '#')
|
||||
else:
|
||||
display_id = display_id.replace('/', ':')
|
||||
display_id = urllib.parse.unquote(display_id)
|
||||
uri = 'lbry://' + display_id
|
||||
result = self._resolve_url(uri, display_id, 'stream')
|
||||
|
@ -246,12 +312,13 @@ def _real_extract(self, url):
|
|||
streaming_url = self._call_api_proxy(
|
||||
'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
|
||||
|
||||
# GET request returns original video/audio file if available
|
||||
ext = urlhandle_detect_ext(self._request_webpage(
|
||||
streaming_url, display_id, 'Checking for original quality', headers=headers))
|
||||
if ext != 'm3u8':
|
||||
# GET request to v3 API returns original video/audio file if available
|
||||
direct_url = re.sub(r'/api/v\d+/', '/api/v3/', streaming_url)
|
||||
urlh = self._request_webpage(
|
||||
direct_url, display_id, 'Checking for original quality', headers=headers, fatal=False)
|
||||
if urlh and urlhandle_detect_ext(urlh) != 'm3u8':
|
||||
formats.append({
|
||||
'url': streaming_url,
|
||||
'url': direct_url,
|
||||
'format_id': 'original',
|
||||
'quality': 1,
|
||||
**traverse_obj(result, ('value', {
|
||||
|
@ -298,7 +365,7 @@ def _real_extract(self, url):
|
|||
|
||||
class LBRYChannelIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry:channel'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>@%s)/?(?:[?&]|$)' % LBRYBaseIE._OPT_CLAIM_ID
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'(?P<id>@{LBRYBaseIE._OPT_CLAIM_ID})/?(?:[?&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://lbry.tv/@LBRYFoundation:0',
|
||||
'info_dict': {
|
||||
|
@ -314,65 +381,50 @@ class LBRYChannelIE(LBRYBaseIE):
|
|||
'url': 'lbry://@lbry#3f',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
def _fetch_page(self, claim_id, url, params, page):
|
||||
page += 1
|
||||
page_params = {
|
||||
'channel_ids': [claim_id],
|
||||
'claim_type': 'stream',
|
||||
'no_totals': True,
|
||||
'page': page,
|
||||
'page_size': self._PAGE_SIZE,
|
||||
}
|
||||
page_params.update(params)
|
||||
result = self._call_api_proxy(
|
||||
'claim_search', claim_id, page_params, 'page %d' % page)
|
||||
for item in (result.get('items') or []):
|
||||
stream_claim_name = item.get('name')
|
||||
stream_claim_id = item.get('claim_id')
|
||||
if not (stream_claim_name and stream_claim_id):
|
||||
continue
|
||||
|
||||
yield {
|
||||
**self._parse_stream(item, url),
|
||||
'_type': 'url',
|
||||
'id': stream_claim_id,
|
||||
'url': self._permanent_url(url, stream_claim_name, stream_claim_id),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url).replace(':', '#')
|
||||
result = self._resolve_url(
|
||||
'lbry://' + display_id, display_id, 'channel')
|
||||
result = self._resolve_url(f'lbry://{display_id}', display_id, 'channel')
|
||||
claim_id = result['claim_id']
|
||||
qs = parse_qs(url)
|
||||
content = qs.get('content', [None])[0]
|
||||
params = {
|
||||
'fee_amount': qs.get('fee_amount', ['>=0'])[0],
|
||||
'order_by': {
|
||||
'new': ['release_time'],
|
||||
'top': ['effective_amount'],
|
||||
'trending': ['trending_group', 'trending_mixed'],
|
||||
}[qs.get('order', ['new'])[0]],
|
||||
'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES,
|
||||
}
|
||||
duration = qs.get('duration', [None])[0]
|
||||
if duration:
|
||||
params['duration'] = {
|
||||
'long': '>=1200',
|
||||
'short': '<=240',
|
||||
}[duration]
|
||||
language = qs.get('language', ['all'])[0]
|
||||
if language != 'all':
|
||||
languages = [language]
|
||||
if language == 'en':
|
||||
languages.append('none')
|
||||
params['any_languages'] = languages
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, claim_id, url, params),
|
||||
self._PAGE_SIZE)
|
||||
result_value = result.get('value') or {}
|
||||
return self.playlist_result(
|
||||
entries, claim_id, result_value.get('title'),
|
||||
result_value.get('description'))
|
||||
|
||||
return self._playlist_entries(url, claim_id, {'channel_ids': [claim_id]}, result)
|
||||
|
||||
|
||||
class LBRYPlaylistIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry:playlist'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'\$/(?:play)?list/(?P<id>[0-9a-f-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://odysee.com/$/playlist/ffef782f27486f0ac138bde8777f72ebdd0548c2',
|
||||
'info_dict': {
|
||||
'id': 'ffef782f27486f0ac138bde8777f72ebdd0548c2',
|
||||
'title': 'Théâtre Classique',
|
||||
'description': 'Théâtre Classique',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
'url': 'https://odysee.com/$/list/9c6658b3dd21e4f2a0602d523a13150e2b48b770',
|
||||
'info_dict': {
|
||||
'id': '9c6658b3dd21e4f2a0602d523a13150e2b48b770',
|
||||
'title': 'Social Media Exposed',
|
||||
'description': 'md5:98af97317aacd5b85d595775ea37d80e',
|
||||
},
|
||||
'playlist_mincount': 34,
|
||||
}, {
|
||||
'url': 'https://odysee.com/$/playlist/938fb11d-215f-4d1c-ad64-723954df2184',
|
||||
'info_dict': {
|
||||
'id': '938fb11d-215f-4d1c-ad64-723954df2184',
|
||||
},
|
||||
'playlist_mincount': 1000,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
result = traverse_obj(self._call_api_proxy('claim_search', display_id, {
|
||||
'claim_ids': [display_id],
|
||||
'no_totals': True,
|
||||
'page': 1,
|
||||
'page_size': self._PAGE_SIZE,
|
||||
}, 'playlist'), ('items', 0))
|
||||
claim_param = {'claim_ids': traverse_obj(result, ('value', 'claims', ..., {str}))}
|
||||
|
||||
return self._playlist_entries(url, display_id, claim_param, result)
|
||||
|
|
|
@ -57,8 +57,8 @@ class LecturioIE(LecturioBaseIE):
|
|||
_VALID_URL = r'''(?x)
|
||||
https://
|
||||
(?:
|
||||
app\.lecturio\.com/([^/]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))|
|
||||
(?:www\.)?lecturio\.de/[^/]+/(?P<nt_de>[^/?#&]+)\.vortrag
|
||||
app\.lecturio\.com/([^/?#]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))|
|
||||
(?:www\.)?lecturio\.de/(?:[^/?#]+/)+(?P<nt_de>[^/?#&]+)\.vortrag
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
|
@ -73,6 +73,9 @@ class LecturioIE(LecturioBaseIE):
|
|||
}, {
|
||||
'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.lecturio.de/jura/oeffentliches-recht-at-1-staatsexamen/oeffentliches-recht-staatsexamen.vortrag',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://app.lecturio.com/#/lecture/c/6434/39634',
|
||||
'only_matching': True,
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
class LiTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)'
|
||||
|
||||
_URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?id=%s'
|
||||
_URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?content_id=%s'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
|
||||
|
@ -21,16 +21,18 @@ class LiTVIE(InfoExtractor):
|
|||
'id': 'VOD00041606',
|
||||
'title': '花千骨',
|
||||
},
|
||||
'playlist_count': 50,
|
||||
'playlist_count': 51, # 50 episodes + 1 trailer
|
||||
}, {
|
||||
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
|
||||
'md5': '969e343d9244778cb29acec608e53640',
|
||||
'md5': 'b90ff1e9f1d8f5cfcd0a44c3e2b34c7a',
|
||||
'info_dict': {
|
||||
'id': 'VOD00041610',
|
||||
'ext': 'mp4',
|
||||
'title': '花千骨第1集',
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
'description': 'md5:c7017aa144c87467c4fb2909c4b05d6f',
|
||||
'description': '《花千骨》陸劇線上看。十六年前,平靜的村莊內,一名女嬰隨異相出生,途徑此地的蜀山掌門清虛道長算出此女命運非同一般,她體內散發的異香易招惹妖魔。一念慈悲下,他在村莊周邊設下結界阻擋妖魔入侵,讓其年滿十六後去蜀山,並賜名花千骨。',
|
||||
'categories': ['奇幻', '愛情', '中國', '仙俠'],
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
},
|
||||
'params': {
|
||||
|
@ -46,20 +48,17 @@ class LiTVIE(InfoExtractor):
|
|||
'title': '芈月傳第1集 霸星芈月降世楚國',
|
||||
'description': '楚威王二年,太史令唐昧夜觀星象,發現霸星即將現世。王后得知霸星的預言後,想盡辦法不讓孩子順利出生,幸得莒姬相護化解危機。沒想到眾人期待下出生的霸星卻是位公主,楚威王對此失望至極。楚王后命人將女嬰丟棄河中,居然奇蹟似的被少司命像攔下,楚威王認為此女非同凡響,為她取名芈月。',
|
||||
},
|
||||
'skip': 'Georestricted to Taiwan',
|
||||
'skip': 'No longer exists',
|
||||
}]
|
||||
|
||||
def _extract_playlist(self, season_list, video_id, program_info, prompt=True):
|
||||
episode_title = program_info['title']
|
||||
content_id = season_list['contentId']
|
||||
|
||||
def _extract_playlist(self, playlist_data, content_type):
|
||||
all_episodes = [
|
||||
self.url_result(smuggle_url(
|
||||
self._URL_TEMPLATE % (program_info['contentType'], episode['contentId']),
|
||||
self._URL_TEMPLATE % (content_type, episode['contentId']),
|
||||
{'force_noplaylist': True})) # To prevent infinite recursion
|
||||
for episode in season_list['episode']]
|
||||
for episode in traverse_obj(playlist_data, ('seasons', ..., 'episode', lambda _, v: v['contentId']))]
|
||||
|
||||
return self.playlist_result(all_episodes, content_id, episode_title)
|
||||
return self.playlist_result(all_episodes, playlist_data['contentId'], playlist_data.get('title'))
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
@ -68,24 +67,31 @@ def _real_extract(self, url):
|
|||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if self._search_regex(
|
||||
r'(?i)<meta\s[^>]*http-equiv="refresh"\s[^>]*content="[0-9]+;\s*url=https://www\.litv\.tv/"',
|
||||
webpage, 'meta refresh redirect', default=False, group=0):
|
||||
raise ExtractorError('No such content found', expected=True)
|
||||
|
||||
program_info = self._parse_json(self._search_regex(
|
||||
r'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
|
||||
video_id)
|
||||
|
||||
season_list = list(program_info.get('seasonList', {}).values())
|
||||
playlist_id = traverse_obj(season_list, 0, 'contentId')
|
||||
if self._yes_playlist(playlist_id, video_id, smuggled_data):
|
||||
return self._extract_playlist(season_list[0], video_id, program_info)
|
||||
|
||||
# In browsers `getMainUrl` request is always issued. Usually this
|
||||
# In browsers `getProgramInfo` request is always issued. Usually this
|
||||
# endpoint gives the same result as the data embedded in the webpage.
|
||||
# If georestricted, there are no embedded data, so an extra request is
|
||||
# necessary to get the error code
|
||||
# If, for some reason, there are no embedded data, we do an extra request.
|
||||
if 'assetId' not in program_info:
|
||||
program_info = self._download_json(
|
||||
'https://www.litv.tv/vod/ajax/getProgramInfo', video_id,
|
||||
query={'contentId': video_id},
|
||||
headers={'Accept': 'application/json'})
|
||||
|
||||
series_id = program_info['seriesId']
|
||||
if self._yes_playlist(series_id, video_id, smuggled_data):
|
||||
playlist_data = self._download_json(
|
||||
'https://www.litv.tv/vod/ajax/getSeriesTree', video_id,
|
||||
query={'seriesId': series_id}, headers={'Accept': 'application/json'})
|
||||
return self._extract_playlist(playlist_data, program_info['contentType'])
|
||||
|
||||
video_data = self._parse_json(self._search_regex(
|
||||
r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
|
||||
webpage, 'video data', default='{}'), video_id)
|
||||
|
@ -96,7 +102,7 @@ def _real_extract(self, url):
|
|||
'contentType': program_info['contentType'],
|
||||
}
|
||||
video_data = self._download_json(
|
||||
'https://www.litv.tv/vod/getMainUrl', video_id,
|
||||
'https://www.litv.tv/vod/ajax/getMainUrlNoAuth', video_id,
|
||||
data=json.dumps(payload).encode('utf-8'),
|
||||
headers={'Content-Type': 'application/json'})
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue