Merge branch 'master' into mojevideo

This commit is contained in:
bashonly 2023-10-06 18:27:37 -05:00 committed by GitHub
commit a9580b08cf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
72 changed files with 2802 additions and 977 deletions

View File

@ -18,7 +18,7 @@ body:
options:
- label: I'm reporting that yt-dlp is broken on a **supported** site
required: true
- label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
- label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true
@ -64,7 +64,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
[debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs
@ -72,8 +72,8 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.07.06, Current version: 2023.07.06
yt-dlp is up to date (2023.07.06)
Latest version: 2023.09.24, Current version: 2023.09.24
yt-dlp is up to date (2023.09.24)
<more lines>
render: shell
validations:

View File

@ -18,7 +18,7 @@ body:
options:
- label: I'm reporting a new site support request
required: true
- label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
- label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true
@ -76,7 +76,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
[debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs
@ -84,8 +84,8 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.07.06, Current version: 2023.07.06
yt-dlp is up to date (2023.07.06)
Latest version: 2023.09.24, Current version: 2023.09.24
yt-dlp is up to date (2023.09.24)
<more lines>
render: shell
validations:

View File

@ -18,7 +18,7 @@ body:
options:
- label: I'm requesting a site-specific feature
required: true
- label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
- label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true
@ -72,7 +72,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
[debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs
@ -80,8 +80,8 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.07.06, Current version: 2023.07.06
yt-dlp is up to date (2023.07.06)
Latest version: 2023.09.24, Current version: 2023.09.24
yt-dlp is up to date (2023.09.24)
<more lines>
render: shell
validations:

View File

@ -18,7 +18,7 @@ body:
options:
- label: I'm reporting a bug unrelated to a specific site
required: true
- label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
- label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true
@ -57,7 +57,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
[debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs
@ -65,8 +65,8 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.07.06, Current version: 2023.07.06
yt-dlp is up to date (2023.07.06)
Latest version: 2023.09.24, Current version: 2023.09.24
yt-dlp is up to date (2023.09.24)
<more lines>
render: shell
validations:

View File

@ -20,7 +20,7 @@ body:
required: true
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
required: true
- label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
- label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true
- label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
required: true
@ -53,7 +53,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
[debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs
@ -61,7 +61,7 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.07.06, Current version: 2023.07.06
yt-dlp is up to date (2023.07.06)
Latest version: 2023.09.24, Current version: 2023.09.24
yt-dlp is up to date (2023.09.24)
<more lines>
render: shell

View File

@ -26,7 +26,7 @@ body:
required: true
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
required: true
- label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
- label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true
- label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates
required: true
@ -59,7 +59,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
[debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs
@ -67,7 +67,7 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.07.06, Current version: 2023.07.06
yt-dlp is up to date (2023.07.06)
Latest version: 2023.09.24, Current version: 2023.09.24
yt-dlp is up to date (2023.09.24)
<more lines>
render: shell

View File

@ -217,7 +217,7 @@ After you have ensured this site is distributing its content legally, you can fo
1. Add an import in [`yt_dlp/extractor/_extractors.py`](yt_dlp/extractor/_extractors.py). Note that the class name must end with `IE`.
1. Run `python test/test_download.py TestDownload.test_YourExtractor` (note that `YourExtractor` doesn't end with `IE`). This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, the tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. You can also run all the tests in one go with `TestDownload.test_YourExtractor_all`
1. Make sure you have atleast one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the particular test is disabled from running.
1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L91-L426). Add tests and code for as many as you want.
1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L119-L440). Add tests and code for as many as you want.
1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
$ flake8 yt_dlp/extractor/yourextractor.py
@ -251,7 +251,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou
### Mandatory and optional metafields
For extraction to work yt-dlp relies on metadata your extractor extracts and provides to yt-dlp expressed by an [information dictionary](yt_dlp/extractor/common.py#L91-L426) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by yt-dlp:
For extraction to work yt-dlp relies on metadata your extractor extracts and provides to yt-dlp expressed by an [information dictionary](yt_dlp/extractor/common.py#L119-L440) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by yt-dlp:
- `id` (media identifier)
- `title` (media title)
@ -696,7 +696,7 @@ formats = [
### Use convenience conversion and parsing functions
Wrap all extracted numeric data into safe functions from [`yt_dlp/utils.py`](yt_dlp/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
Wrap all extracted numeric data into safe functions from [`yt_dlp/utils/`](yt_dlp/utils/): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
Use `url_or_none` for safe URL processing.
@ -704,7 +704,7 @@ Use `traverse_obj` and `try_call` (superseeds `dict_get` and `try_get`) for safe
Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
Explore [`yt_dlp/utils.py`](yt_dlp/utils.py) for more useful convenience functions.
Explore [`yt_dlp/utils/`](yt_dlp/utils/) for more useful convenience functions.
#### Examples

View File

@ -467,3 +467,39 @@ rdamas
RfadnjdExt
urectanc
nao20010128nao/Lesmiscore
04-pasha-04
aaruni96
aky-01
AmirAflak
ApoorvShah111
at-wat
davinkevin
demon071
denhotte
FinnRG
fireattack
Frankgoji
GD-Slime
hatsomatt
ifan-t
kshitiz305
kylegustavo
mabdelfattah
nathantouze
niemands
Rajeshwaran2001
RedDeffender
Rohxn16
sb0stn
SevenLives
simon300000
snixon
soundchaser128
szabyg
trainman261
trislee
wader
Yalab7
zhallgato
zhong-yiyu
Zprokkel

View File

@ -4,6 +4,202 @@
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
-->
### 2023.09.24
#### Important changes
- **The minimum *recommended* Python version has been raised to 3.8**
Since Python 3.7 has reached end-of-life, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/7803)
- Security: [[CVE-2023-40581](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-40581)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg)
- The shell escape function is now using `""` instead of `\"`.
- `utils.Popen` has been patched to properly quote commands.
#### Core changes
- [Fix HTTP headers and cookie handling](https://github.com/yt-dlp/yt-dlp/commit/6c5211cebeacfc53ad5d5ddf4a659be76039656f) by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
- [Fix `--check-formats`](https://github.com/yt-dlp/yt-dlp/commit/8cb7fc44db010e965d808ee679ef0725cb6e147c) by [pukkandan](https://github.com/pukkandan)
- [Fix support for upcoming Python 3.12](https://github.com/yt-dlp/yt-dlp/commit/836e06d246512f286f30c1371b2c54b72c9ecd93) ([#8130](https://github.com/yt-dlp/yt-dlp/issues/8130)) by [Grub4K](https://github.com/Grub4K)
- [Merged with youtube-dl 66ab08](https://github.com/yt-dlp/yt-dlp/commit/9d6254069c75877bc88bc3584f4326fb1853a543) by [coletdjnz](https://github.com/coletdjnz)
- [Prevent RCE when using `--exec` with `%q` (CVE-2023-40581)](https://github.com/yt-dlp/yt-dlp/commit/de015e930747165dbb8fcd360f8775fd973b7d6e) by [Grub4K](https://github.com/Grub4K)
- [Raise minimum recommended Python version to 3.8](https://github.com/yt-dlp/yt-dlp/commit/61bdf15fc7400601c3da1aa7a43917310a5bf391) ([#8183](https://github.com/yt-dlp/yt-dlp/issues/8183)) by [Grub4K](https://github.com/Grub4K)
- [`FFmpegFixupM3u8PP` may need to run with ffmpeg](https://github.com/yt-dlp/yt-dlp/commit/f73c11803579889dc8e1c99e25dba9a22fef39d8) by [pukkandan](https://github.com/pukkandan)
- **compat**
- [Add `types.NoneType`](https://github.com/yt-dlp/yt-dlp/commit/e0c4db04dc82a699bdabd9821ddc239ebe17d30a) by [pukkandan](https://github.com/pukkandan) (With fixes in [25b6e8f](https://github.com/yt-dlp/yt-dlp/commit/25b6e8f94679b4458550702b46e61249b875a4fd))
- [Deprecate old functions](https://github.com/yt-dlp/yt-dlp/commit/3d2623a898196640f7cc0fc8b70118ff19e6925d) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
- [Ensure submodules are imported correctly](https://github.com/yt-dlp/yt-dlp/commit/a250b247334ce9f641e709cbb64974da6034a2b3) by [pukkandan](https://github.com/pukkandan)
- **cookies**: [Containers JSON should be opened as utf-8](https://github.com/yt-dlp/yt-dlp/commit/dab87ca23650fd87184ff5286b53e6985b59f71d) ([#7800](https://github.com/yt-dlp/yt-dlp/issues/7800)) by [bashonly](https://github.com/bashonly)
- **dependencies**: [Handle deprecation of `sqlite3.version`](https://github.com/yt-dlp/yt-dlp/commit/35f9a306e6934793cff100200cd03f288ec33f11) ([#8167](https://github.com/yt-dlp/yt-dlp/issues/8167)) by [bashonly](https://github.com/bashonly)
- **outtmpl**: [Fix replacement for `playlist_index`](https://github.com/yt-dlp/yt-dlp/commit/a264433c9fba147ecae2420091614186cfeeb895) by [pukkandan](https://github.com/pukkandan)
- **utils**
- [Add temporary shim for logging](https://github.com/yt-dlp/yt-dlp/commit/1b392f905d20ef1f1b300b180f867d43c9ce49b8) by [pukkandan](https://github.com/pukkandan)
- [Improve `parse_duration`](https://github.com/yt-dlp/yt-dlp/commit/af86873218c24c3859ccf575a87f2b00a73b49d0) by [bashonly](https://github.com/bashonly)
- HTTPHeaderDict: [Handle byte values](https://github.com/yt-dlp/yt-dlp/commit/3f7965105d8d2048359e67c1e8b8ebd51588143b) by [pukkandan](https://github.com/pukkandan)
- `clean_podcast_url`: [Handle more trackers](https://github.com/yt-dlp/yt-dlp/commit/2af4eeb77246b8183aae75a0a8d19f18c08115b2) ([#7556](https://github.com/yt-dlp/yt-dlp/issues/7556)) by [bashonly](https://github.com/bashonly), [mabdelfattah](https://github.com/mabdelfattah)
- `js_to_json`: [Handle `Array` objects](https://github.com/yt-dlp/yt-dlp/commit/52414d64ca7b92d3f83964cdd68247989b0c4625) by [Grub4K](https://github.com/Grub4K), [std-move](https://github.com/std-move)
#### Extractor changes
- [Extract subtitles from SMIL manifests](https://github.com/yt-dlp/yt-dlp/commit/550e65410a7a1b105923494ac44460a4dc1a15d9) ([#7667](https://github.com/yt-dlp/yt-dlp/issues/7667)) by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
- [Fix `--load-pages`](https://github.com/yt-dlp/yt-dlp/commit/81b4712bca608b9015aa68a4d96661d56e9cb894) by [pukkandan](https://github.com/pukkandan)
- [Make `_search_nuxt_data` more lenient](https://github.com/yt-dlp/yt-dlp/commit/904a19ee93195ce0bd4b08bd22b186120afb5b17) by [std-move](https://github.com/std-move)
- **abematv**
- [Fix proxy handling](https://github.com/yt-dlp/yt-dlp/commit/497bbbbd7328cb705f70eced94dbd90993819a46) ([#8046](https://github.com/yt-dlp/yt-dlp/issues/8046)) by [SevenLives](https://github.com/SevenLives)
- [Temporary fix for protocol handler](https://github.com/yt-dlp/yt-dlp/commit/9f66247289b9f8ecf931833b3f5f127274dd2161) by [pukkandan](https://github.com/pukkandan)
- **amazonminitv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/538d37671a17e0782d17f08df17800e2e3bd57c8) by [bashonly](https://github.com/bashonly), [GautamMKGarg](https://github.com/GautamMKGarg)
- **antenna**: [Support antenna.gr](https://github.com/yt-dlp/yt-dlp/commit/665876034c8d3c031443f6b4958bed02ccdf4164) ([#7584](https://github.com/yt-dlp/yt-dlp/issues/7584)) by [stdedos](https://github.com/stdedos)
- **artetv**: [Fix HLS formats extraction](https://github.com/yt-dlp/yt-dlp/commit/c2da0b5ea215298135f76e3dc14b972a3c4afacb) by [bashonly](https://github.com/bashonly)
- **axs**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/aee6b9b88c0bcccf27fd23b7e00fc0b7b168928f) ([#8094](https://github.com/yt-dlp/yt-dlp/issues/8094)) by [barsnick](https://github.com/barsnick)
- **banbye**: [Support video ids containing a hyphen](https://github.com/yt-dlp/yt-dlp/commit/578a82e497502b951036ce9da6fe0dac6937ac27) ([#8059](https://github.com/yt-dlp/yt-dlp/issues/8059)) by [kshitiz305](https://github.com/kshitiz305)
- **bbc**: [Extract tracklist as chapters](https://github.com/yt-dlp/yt-dlp/commit/eda0e415d26eb084e570cf5372d38ee1f616b70f) ([#7788](https://github.com/yt-dlp/yt-dlp/issues/7788)) by [garret1317](https://github.com/garret1317)
- **bild.de**: [Extract HLS formats](https://github.com/yt-dlp/yt-dlp/commit/b4c1c408c63724339eb12b16c91b253a7ee62cfa) ([#8032](https://github.com/yt-dlp/yt-dlp/issues/8032)) by [barsnick](https://github.com/barsnick)
- **bilibili**
- [Add support for series, favorites and watch later](https://github.com/yt-dlp/yt-dlp/commit/9e68747f9607f05e92bb7d9b6e79d678b50070e1) ([#7518](https://github.com/yt-dlp/yt-dlp/issues/7518)) by [c-basalt](https://github.com/c-basalt)
- [Extract Dolby audio formats](https://github.com/yt-dlp/yt-dlp/commit/b84fda7388dd20d38921e23b469147f3957c1812) ([#8142](https://github.com/yt-dlp/yt-dlp/issues/8142)) by [ClosedPort22](https://github.com/ClosedPort22)
- [Extract `format_id`](https://github.com/yt-dlp/yt-dlp/commit/5336bf57a7061e0955a37f0542fc8ebf50d55b17) ([#7555](https://github.com/yt-dlp/yt-dlp/issues/7555)) by [c-basalt](https://github.com/c-basalt)
- **bilibilibangumi**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/bdd0b75e3f41ff35440eda6d395008beef19ef2f) ([#7337](https://github.com/yt-dlp/yt-dlp/issues/7337)) by [GD-Slime](https://github.com/GD-Slime)
- **bpb**: [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/f659e6439444ac64305b5c80688cd82f59d2279c) ([#8119](https://github.com/yt-dlp/yt-dlp/issues/8119)) by [Grub4K](https://github.com/Grub4K)
- **brilliantpala**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/92feb5654c5a4c81ba872904a618700fcbb3e546) ([#6680](https://github.com/yt-dlp/yt-dlp/issues/6680)) by [pzhlkj6612](https://github.com/pzhlkj6612)
- **canal1, caracoltvplay**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b3febedbeb662dfdf9b5c1d5799039ad4fc969de) ([#7151](https://github.com/yt-dlp/yt-dlp/issues/7151)) by [elyse0](https://github.com/elyse0)
- **cbc**: [Ignore any 426 from API](https://github.com/yt-dlp/yt-dlp/commit/9bf14be775289bd88cc1f5c89fd761ae51879484) ([#7689](https://github.com/yt-dlp/yt-dlp/issues/7689)) by [makew0rld](https://github.com/makew0rld)
- **cbcplayer**: [Extract HLS formats and subtitles](https://github.com/yt-dlp/yt-dlp/commit/339c339fec095ff4141b20e6aa83629117fb26df) ([#7484](https://github.com/yt-dlp/yt-dlp/issues/7484)) by [trainman261](https://github.com/trainman261)
- **cbcplayerplaylist**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/ed711897814f3ee0b1822e4205e74133467e8f1c) ([#7870](https://github.com/yt-dlp/yt-dlp/issues/7870)) by [trainman261](https://github.com/trainman261)
- **cineverse**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/15591940ff102d1ae337d603a46d8f238c83a61f) ([#8146](https://github.com/yt-dlp/yt-dlp/issues/8146)) by [garret1317](https://github.com/garret1317)
- **crunchyroll**: [Remove initial state extraction](https://github.com/yt-dlp/yt-dlp/commit/9b16762f48914de9ac914601769c76668e433325) ([#7632](https://github.com/yt-dlp/yt-dlp/issues/7632)) by [Grub4K](https://github.com/Grub4K)
- **douyutv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/21f40e75dfc0055ea9cdbd7fe2c46c6f9b561afd) ([#7652](https://github.com/yt-dlp/yt-dlp/issues/7652)) by [c-basalt](https://github.com/c-basalt)
- **dropbox**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b9f2bc2dbed2323734a0d18e65e1e2e23dc833d8) ([#7926](https://github.com/yt-dlp/yt-dlp/issues/7926)) by [bashonly](https://github.com/bashonly), [denhotte](https://github.com/denhotte), [nathantouze](https://github.com/nathantouze) (With fixes in [099fb1b](https://github.com/yt-dlp/yt-dlp/commit/099fb1b35cf835303306549f5113d1802d79c9c7) by [bashonly](https://github.com/bashonly))
- **eplus**: inbound: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/295fbb3ae3a7d0dd50e286be5c487cf145ed5778) ([#5782](https://github.com/yt-dlp/yt-dlp/issues/5782)) by [pzhlkj6612](https://github.com/pzhlkj6612)
- **expressen**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/a5e264d74b4bd60c6e7ec4e38f1a23af4e420531) ([#8153](https://github.com/yt-dlp/yt-dlp/issues/8153)) by [kylegustavo](https://github.com/kylegustavo)
- **facebook**
- [Add dash manifest URL](https://github.com/yt-dlp/yt-dlp/commit/a854fbec56d5004f5147116a41d1dd050632a579) ([#7743](https://github.com/yt-dlp/yt-dlp/issues/7743)) by [ringus1](https://github.com/ringus1)
- [Fix webpage extraction](https://github.com/yt-dlp/yt-dlp/commit/d3d81cc98f554d0adb87d24bfd6fabaaa803944d) ([#7890](https://github.com/yt-dlp/yt-dlp/issues/7890)) by [ringus1](https://github.com/ringus1)
- [Improve format sorting](https://github.com/yt-dlp/yt-dlp/commit/308936619c8a4f3a52d73c829c2006ff6c55fea2) ([#8074](https://github.com/yt-dlp/yt-dlp/issues/8074)) by [fireattack](https://github.com/fireattack)
- reel: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/bb5d84c9d2f1e978c3eddfb5ccbe138036682a36) ([#7564](https://github.com/yt-dlp/yt-dlp/issues/7564)) by [bashonly](https://github.com/bashonly), [demon071](https://github.com/demon071)
- **fox**: [Support foxsports.com](https://github.com/yt-dlp/yt-dlp/commit/30b29f37159e9226e2f2d5434c9a4096ac4efa2e) ([#7724](https://github.com/yt-dlp/yt-dlp/issues/7724)) by [ischmidt20](https://github.com/ischmidt20)
- **funker530**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/0ce1f48bf1cb78d40d734ce73ee1c90eccf92274) ([#8040](https://github.com/yt-dlp/yt-dlp/issues/8040)) by [04-pasha-04](https://github.com/04-pasha-04)
- **generic**
- [Fix KVS thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/53675852195d8dd859555d4789944a6887171ff8) by [bashonly](https://github.com/bashonly)
- [Fix generic title for embeds](https://github.com/yt-dlp/yt-dlp/commit/994f7ef8e6003f4b7b258528755d0b6adcc31714) by [pukkandan](https://github.com/pukkandan)
- **gofile**: [Update token](https://github.com/yt-dlp/yt-dlp/commit/99c99c7185f5d8e9b3699a6fc7f86ec663d7b97e) by [bashonly](https://github.com/bashonly)
- **hotstar**
- [Extract `release_year`](https://github.com/yt-dlp/yt-dlp/commit/7237c8dca0590aa7438ade93f927df88c9381ec7) ([#7869](https://github.com/yt-dlp/yt-dlp/issues/7869)) by [Rajeshwaran2001](https://github.com/Rajeshwaran2001)
- [Make metadata extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/30ea88591b728cca0896018dbf67c2298070c669) by [bashonly](https://github.com/bashonly)
- [Support `/clips/` URLs](https://github.com/yt-dlp/yt-dlp/commit/86eeb044c2342d68c6ef177577f87852e6badd85) ([#7710](https://github.com/yt-dlp/yt-dlp/issues/7710)) by [bashonly](https://github.com/bashonly)
- **hungama**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/4b3a6ef1b3e235ba9a45142830b6edb357c71696) ([#7757](https://github.com/yt-dlp/yt-dlp/issues/7757)) by [bashonly](https://github.com/bashonly), [Yalab7](https://github.com/Yalab7)
- **indavideoembed**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/63e0c5748c0eb461a2ccca4181616eb930b4b750) ([#8129](https://github.com/yt-dlp/yt-dlp/issues/8129)) by [aky-01](https://github.com/aky-01)
- **iprima**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/568f08051841aedea968258889539741e26009e9) ([#7216](https://github.com/yt-dlp/yt-dlp/issues/7216)) by [std-move](https://github.com/std-move)
- **lbry**: [Fix original format extraction](https://github.com/yt-dlp/yt-dlp/commit/127a22460658ac39cbe5c4b3fb88d578363e0dfa) ([#7711](https://github.com/yt-dlp/yt-dlp/issues/7711)) by [bashonly](https://github.com/bashonly)
- **lecturio**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/efa2339502a37cf13ae7f143bd8b2c28f452d1cd) ([#7649](https://github.com/yt-dlp/yt-dlp/issues/7649)) by [simon300000](https://github.com/simon300000)
- **magellantv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f4ea501551526ebcb54d19b84cf0ebe798583a85) ([#7616](https://github.com/yt-dlp/yt-dlp/issues/7616)) by [bashonly](https://github.com/bashonly)
- **massengeschmack.tv**: [Fix title extraction](https://github.com/yt-dlp/yt-dlp/commit/81f46ac573dc443ad48560f308582a26784d3015) ([#7813](https://github.com/yt-dlp/yt-dlp/issues/7813)) by [sb0stn](https://github.com/sb0stn)
- **media.ccc.de**: lists: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/cf11b40ac40e3d23a6352753296f3a732886efb9) ([#8144](https://github.com/yt-dlp/yt-dlp/issues/8144)) by [Rohxn16](https://github.com/Rohxn16)
- **mediaite**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/630a55df8de7747e79aa680959d785dfff2c4b76) ([#7923](https://github.com/yt-dlp/yt-dlp/issues/7923)) by [Grabien](https://github.com/Grabien)
- **mediaklikk**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6e07e4bc7e59f5bdb60e93c011e57b18b009f2b5) ([#8086](https://github.com/yt-dlp/yt-dlp/issues/8086)) by [bashonly](https://github.com/bashonly), [zhallgato](https://github.com/zhallgato)
- **mediastream**: [Make embed extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/635ae31f68a3ac7f6393d59657ed711e34ee3552) by [bashonly](https://github.com/bashonly)
- **mixcloud**: [Update API URL](https://github.com/yt-dlp/yt-dlp/commit/7b71643cc986de9a3768dac4ac9b64f4d05e7f5e) ([#8114](https://github.com/yt-dlp/yt-dlp/issues/8114)) by [garret1317](https://github.com/garret1317)
- **monstercat**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/eaee21bf71889d495076037cbe590c8c0b21ef3a) ([#8133](https://github.com/yt-dlp/yt-dlp/issues/8133)) by [garret1317](https://github.com/garret1317)
- **motortrendondemand**: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/c03a58ec9933e4a42c2d8fa80b8a0ddb2cde64e6) ([#7683](https://github.com/yt-dlp/yt-dlp/issues/7683)) by [AmirAflak](https://github.com/AmirAflak)
- **museai**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/65cfa2b057d7946fbe322155a778fe206556d0c6) ([#7614](https://github.com/yt-dlp/yt-dlp/issues/7614)) by [bashonly](https://github.com/bashonly)
- **mzaalo**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/d7aee8e310b2c4f21d50aac0b420e1b3abde21a4) by [bashonly](https://github.com/bashonly)
- **n1info**: article: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/8ac5b6d96ae5c60cd5ae2495949e0068a6754c45) ([#7373](https://github.com/yt-dlp/yt-dlp/issues/7373)) by [u-spec-png](https://github.com/u-spec-png)
- **nfl.com**: plus, replay: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/1eaca74bc2ca0f5b1ec532f24c61de44f2e8cb2d) ([#7838](https://github.com/yt-dlp/yt-dlp/issues/7838)) by [bashonly](https://github.com/bashonly)
- **niconicochannelplus**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/698beb9a497f51693e64d167e572ff9efa4bc25f) ([#5686](https://github.com/yt-dlp/yt-dlp/issues/5686)) by [pzhlkj6612](https://github.com/pzhlkj6612)
- **nitter**: [Fix title extraction fallback](https://github.com/yt-dlp/yt-dlp/commit/a83da3717d30697102e76f63a6f29d77f9373c2a) ([#8102](https://github.com/yt-dlp/yt-dlp/issues/8102)) by [ApoorvShah111](https://github.com/ApoorvShah111)
- **noodlemagazine**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/bae4834245a708fff97219849ec880c319c88bc6) ([#7830](https://github.com/yt-dlp/yt-dlp/issues/7830)) by [RedDeffender](https://github.com/RedDeffender) (With fixes in [69dbfe0](https://github.com/yt-dlp/yt-dlp/commit/69dbfe01c47cd078682a87f179f5846e2679e927) by [bashonly](https://github.com/bashonly))
- **novaembed**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2269065ad60cb0ab62408ae6a7b20283e5252232) ([#7910](https://github.com/yt-dlp/yt-dlp/issues/7910)) by [std-move](https://github.com/std-move)
- **patreoncampaign**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/11de6fec9c9b8d34d1f90c8e6218ec58a3471b58) ([#7664](https://github.com/yt-dlp/yt-dlp/issues/7664)) by [bashonly](https://github.com/bashonly)
- **pbs**: [Add extractor `PBSKidsIE`](https://github.com/yt-dlp/yt-dlp/commit/6d6081dda1290a85bdab6717f239289e3aa74c8e) ([#7602](https://github.com/yt-dlp/yt-dlp/issues/7602)) by [snixon](https://github.com/snixon)
- **piapro**: [Support `/content` URL](https://github.com/yt-dlp/yt-dlp/commit/1bcb9fe8715b1f288efc322be3de409ee0597080) ([#7592](https://github.com/yt-dlp/yt-dlp/issues/7592)) by [FinnRG](https://github.com/FinnRG)
- **piaulizaportal**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6636021206dad17c7745ae6bce6cb73d6f2ef319) ([#7903](https://github.com/yt-dlp/yt-dlp/issues/7903)) by [pzhlkj6612](https://github.com/pzhlkj6612)
- **picartovod**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/db9743894071760f994f640a4c24358f749a78c0) ([#7727](https://github.com/yt-dlp/yt-dlp/issues/7727)) by [Frankgoji](https://github.com/Frankgoji)
- **pornbox**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/40999467f72db074a3f13057da9bf82a857530fe) ([#7386](https://github.com/yt-dlp/yt-dlp/issues/7386)) by [niemands](https://github.com/niemands)
- **pornhub**: [Update access cookies for UK](https://github.com/yt-dlp/yt-dlp/commit/1d3d579c2142f69831b6ae140e1d8e824e07fa0e) ([#7591](https://github.com/yt-dlp/yt-dlp/issues/7591)) by [zhong-yiyu](https://github.com/zhong-yiyu)
- **pr0gramm**: [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/b532556d0a85e7d76f8f0880861232fb706ddbc5) ([#8151](https://github.com/yt-dlp/yt-dlp/issues/8151)) by [Grub4K](https://github.com/Grub4K)
- **radiofrance**: [Add support for livestreams, podcasts, playlists](https://github.com/yt-dlp/yt-dlp/commit/ba8e9eb2c8bbb699f314169fab8e544437ad731e) ([#7006](https://github.com/yt-dlp/yt-dlp/issues/7006)) by [elyse0](https://github.com/elyse0)
- **rbgtum**: [Fix extraction and support new URL format](https://github.com/yt-dlp/yt-dlp/commit/5fccabac27ca3c1165ade1b0df6fbadc24258dc2) ([#7690](https://github.com/yt-dlp/yt-dlp/issues/7690)) by [simon300000](https://github.com/simon300000)
- **reddit**
- [Extract subtitles](https://github.com/yt-dlp/yt-dlp/commit/20c3c9b433dd47faf0dbde6b46e4e34eb76109a5) by [bashonly](https://github.com/bashonly)
- [Fix thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/9a04113dfbb69b904e4e2bea736da293505786b8) by [bashonly](https://github.com/bashonly)
- **rtvslo**: [Fix format extraction](https://github.com/yt-dlp/yt-dlp/commit/94389b225d9bcf29aa7ba8afaf1bbd7c62204eae) ([#8131](https://github.com/yt-dlp/yt-dlp/issues/8131)) by [bashonly](https://github.com/bashonly)
- **rule34video**: [Extract tags](https://github.com/yt-dlp/yt-dlp/commit/58493923e9b6f774947a2131e5258e9f3cf816be) ([#7117](https://github.com/yt-dlp/yt-dlp/issues/7117)) by [soundchaser128](https://github.com/soundchaser128)
- **rumble**: [Fix embed extraction](https://github.com/yt-dlp/yt-dlp/commit/23d829a3420450bcfb0788e6fb2cf4f6acdbe596) ([#8035](https://github.com/yt-dlp/yt-dlp/issues/8035)) by [trislee](https://github.com/trislee)
- **s4c**
- [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/b9de629d78ce31699f2de886071dc257830f9676) ([#7730](https://github.com/yt-dlp/yt-dlp/issues/7730)) by [ifan-t](https://github.com/ifan-t)
- [Add series support and extract subs/thumbs](https://github.com/yt-dlp/yt-dlp/commit/fe371dcf0ba5ce8d42480eade54eeeac99ab3cb0) ([#7776](https://github.com/yt-dlp/yt-dlp/issues/7776)) by [ifan-t](https://github.com/ifan-t)
- **sohu**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5be7e978867b5f66ad6786c674d79d40e950ae16) ([#7628](https://github.com/yt-dlp/yt-dlp/issues/7628)) by [bashonly](https://github.com/bashonly), [c-basalt](https://github.com/c-basalt)
- **stageplus**: [Fix m3u8 extraction](https://github.com/yt-dlp/yt-dlp/commit/56b3dc03354b75be995759d8441d2754c0442b9a) ([#7929](https://github.com/yt-dlp/yt-dlp/issues/7929)) by [bashonly](https://github.com/bashonly)
- **streamanity**: [Remove](https://github.com/yt-dlp/yt-dlp/commit/2cfe221fbbe46faa3f46552c08d947a51f424903) ([#7571](https://github.com/yt-dlp/yt-dlp/issues/7571)) by [alerikaisattera](https://github.com/alerikaisattera)
- **svtplay**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/2301b5c1b77a65abbb46b72f91e1e4666fd5d985) ([#7789](https://github.com/yt-dlp/yt-dlp/issues/7789)) by [dirkf](https://github.com/dirkf), [wader](https://github.com/wader)
- **tbsjp**: [Add episode, program, playlist extractors](https://github.com/yt-dlp/yt-dlp/commit/876b70c8edf4c0147f180bd981fbc4d625cbfb9c) ([#7765](https://github.com/yt-dlp/yt-dlp/issues/7765)) by [garret1317](https://github.com/garret1317)
- **tiktok**
- [Fix audio-only format extraction](https://github.com/yt-dlp/yt-dlp/commit/b09bd0c19648f60c59fb980cd454cb0069959fb9) ([#7712](https://github.com/yt-dlp/yt-dlp/issues/7712)) by [bashonly](https://github.com/bashonly)
- [Fix webpage extraction](https://github.com/yt-dlp/yt-dlp/commit/069cbece9dba6384f1cc5fcfc7ce562a31af42fc) by [bashonly](https://github.com/bashonly)
- **triller**: [Fix unlisted video extraction](https://github.com/yt-dlp/yt-dlp/commit/39837ae3199aa934299badbd0d63243ed639e6c8) ([#7670](https://github.com/yt-dlp/yt-dlp/issues/7670)) by [bashonly](https://github.com/bashonly)
- **tv5mondeplus**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7d3d658f4c558ee7d72b1c01b46f2126948681cd) ([#7952](https://github.com/yt-dlp/yt-dlp/issues/7952)) by [dirkf](https://github.com/dirkf), [korli](https://github.com/korli)
- **twitcasting**
- [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/cebbd33b1c678149fc8f0e254db6fc0da317ea80) ([#8120](https://github.com/yt-dlp/yt-dlp/issues/8120)) by [c-basalt](https://github.com/c-basalt)
- [Support `--wait-for-video`](https://github.com/yt-dlp/yt-dlp/commit/c1d71d0d9f41db5e4306c86af232f5f6220a130b) ([#7975](https://github.com/yt-dlp/yt-dlp/issues/7975)) by [at-wat](https://github.com/at-wat)
- **twitter**
- [Add fallback, improve error handling](https://github.com/yt-dlp/yt-dlp/commit/6014355c6142f68e20c8374e3787e5b5820f19e2) ([#7621](https://github.com/yt-dlp/yt-dlp/issues/7621)) by [bashonly](https://github.com/bashonly)
- [Fix GraphQL and legacy API](https://github.com/yt-dlp/yt-dlp/commit/92315c03774cfabb3a921884326beb4b981f786b) ([#7516](https://github.com/yt-dlp/yt-dlp/issues/7516)) by [bashonly](https://github.com/bashonly)
- [Fix retweet extraction and syndication API](https://github.com/yt-dlp/yt-dlp/commit/a006ce2b27357c15792eb5c18f06765e640b801c) ([#8016](https://github.com/yt-dlp/yt-dlp/issues/8016)) by [bashonly](https://github.com/bashonly)
- [Revert 92315c03774cfabb3a921884326beb4b981f786b](https://github.com/yt-dlp/yt-dlp/commit/b03fa7834579a01cc5fba48c0e73488a16683d48) by [pukkandan](https://github.com/pukkandan)
- spaces
- [Fix format protocol](https://github.com/yt-dlp/yt-dlp/commit/613dbce177d34ffc31053e8e01acf4bb107bcd1e) ([#7550](https://github.com/yt-dlp/yt-dlp/issues/7550)) by [bashonly](https://github.com/bashonly)
- [Pass referer header to downloader](https://github.com/yt-dlp/yt-dlp/commit/c6ef553792ed48462f9fd0e78143bef6b1a71c2e) by [bashonly](https://github.com/bashonly)
- **unsupported**: [List more sites with DRM](https://github.com/yt-dlp/yt-dlp/commit/e7057383380d7d53815f8feaf90ca3dcbde88983) by [pukkandan](https://github.com/pukkandan)
- **videa**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/98eac0e6ba0e510ae7dfdfd249d42ee71fb272b1) ([#8003](https://github.com/yt-dlp/yt-dlp/issues/8003)) by [aky-01](https://github.com/aky-01), [hatsomatt](https://github.com/hatsomatt)
- **vrt**: [Update token signing key](https://github.com/yt-dlp/yt-dlp/commit/325191d0c9bf3fe257b8a7c2eb95080f44f6ddfc) ([#7519](https://github.com/yt-dlp/yt-dlp/issues/7519)) by [Zprokkel](https://github.com/Zprokkel)
- **wat.tv**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/7cccab79e7d00ed965b48b8cefce1da8a0513409) ([#7898](https://github.com/yt-dlp/yt-dlp/issues/7898)) by [davinkevin](https://github.com/davinkevin)
- **wdr**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/5d0395498d7065aa5e55bac85fa9354b4b0d48eb) ([#7979](https://github.com/yt-dlp/yt-dlp/issues/7979)) by [szabyg](https://github.com/szabyg)
- **web.archive**: vlive: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/9652bca1bd02f6bc1b8cb1e186f2ccbf32225561) ([#8132](https://github.com/yt-dlp/yt-dlp/issues/8132)) by [bashonly](https://github.com/bashonly)
- **weibo**: [Fix extractor and support user extraction](https://github.com/yt-dlp/yt-dlp/commit/69b03f84f8378b0b5a2fbae56f9b7d860b2f529e) ([#7657](https://github.com/yt-dlp/yt-dlp/issues/7657)) by [c-basalt](https://github.com/c-basalt)
- **weverse**: [Support extraction without auth](https://github.com/yt-dlp/yt-dlp/commit/c2d8ee0000302aba63476b7d5bd8793e57b6c8c6) ([#7924](https://github.com/yt-dlp/yt-dlp/issues/7924)) by [seproDev](https://github.com/seproDev)
- **wimbledon**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a15fcd299e767a510debd8dc1646fe863b96ce0e) ([#7551](https://github.com/yt-dlp/yt-dlp/issues/7551)) by [nnoboa](https://github.com/nnoboa)
- **wrestleuniverseppv**: [Fix HLS AES key extraction](https://github.com/yt-dlp/yt-dlp/commit/dae349da97cafe7357106a8f3187fd48a2ad1210) by [bashonly](https://github.com/bashonly)
- **youtube**
- [Add `player_params` extractor arg](https://github.com/yt-dlp/yt-dlp/commit/ba06d77a316650ff057347d224b5afa8b203ad65) ([#7719](https://github.com/yt-dlp/yt-dlp/issues/7719)) by [coletdjnz](https://github.com/coletdjnz)
- [Fix `player_params` arg being converted to lowercase](https://github.com/yt-dlp/yt-dlp/commit/546b2c28a106cf8101d481b215b676d1b091d276) by [coletdjnz](https://github.com/coletdjnz)
- [Fix consent cookie](https://github.com/yt-dlp/yt-dlp/commit/378ae9f9fb8e8c86e6ac89c4c5b815b48ce93620) ([#7774](https://github.com/yt-dlp/yt-dlp/issues/7774)) by [coletdjnz](https://github.com/coletdjnz)
- tab: [Detect looping feeds](https://github.com/yt-dlp/yt-dlp/commit/1ba6fe9db5f660d5538588315c23ad6cf0371c5f) ([#6621](https://github.com/yt-dlp/yt-dlp/issues/6621)) by [coletdjnz](https://github.com/coletdjnz)
- **zaiko**: [Improve thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/ecef42c3adbcb6a84405139047923c4967316f28) ([#8054](https://github.com/yt-dlp/yt-dlp/issues/8054)) by [pzhlkj6612](https://github.com/pzhlkj6612)
- **zee5**: [Update access token endpoint](https://github.com/yt-dlp/yt-dlp/commit/a0de8bb8601146b8f87bf7cd562eef8bfb4690be) ([#7914](https://github.com/yt-dlp/yt-dlp/issues/7914)) by [bashonly](https://github.com/bashonly)
- **zoom**: [Extract duration](https://github.com/yt-dlp/yt-dlp/commit/66cc64ff6696f9921ff112a278542f8d999ffea4) by [bashonly](https://github.com/bashonly)
#### Downloader changes
- **external**
- [Fix ffmpeg input from stdin](https://github.com/yt-dlp/yt-dlp/commit/e57eb98222d29cc4c09ee975d3c492274a6e5be3) ([#7655](https://github.com/yt-dlp/yt-dlp/issues/7655)) by [bashonly](https://github.com/bashonly)
- [Fixes to cookie handling](https://github.com/yt-dlp/yt-dlp/commit/42ded0a429c20ec13dc006825e1508d9a02f0ad4) by [bashonly](https://github.com/bashonly)
#### Postprocessor changes
- **embedthumbnail**: [Support `m4v`](https://github.com/yt-dlp/yt-dlp/commit/8a4cd12c8f8e93292e3e95200b9d17a3af39624c) ([#7583](https://github.com/yt-dlp/yt-dlp/issues/7583)) by [Neurognostic](https://github.com/Neurognostic)
#### Networking changes
- [Add module](https://github.com/yt-dlp/yt-dlp/commit/c365dba8430ee33abda85d31f95128605bf240eb) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [pukkandan](https://github.com/pukkandan)
- [Add request handler preference framework](https://github.com/yt-dlp/yt-dlp/commit/db7b054a6111ca387220d0eb87bf342f9c130eb8) ([#7603](https://github.com/yt-dlp/yt-dlp/issues/7603)) by [coletdjnz](https://github.com/coletdjnz)
- [Add strict Request extension checking](https://github.com/yt-dlp/yt-dlp/commit/86aea0d3a213da3be1da638b9b828e6f0ee1d59f) ([#7604](https://github.com/yt-dlp/yt-dlp/issues/7604)) by [coletdjnz](https://github.com/coletdjnz)
- [Fix POST requests with zero-length payloads](https://github.com/yt-dlp/yt-dlp/commit/71baa490ebd3655746430f208a9b605d120cd315) ([#7648](https://github.com/yt-dlp/yt-dlp/issues/7648)) by [bashonly](https://github.com/bashonly)
- [Fix `--legacy-server-connect`](https://github.com/yt-dlp/yt-dlp/commit/75dc8e673b481a82d0688aeec30f6c65d82bb359) ([#7645](https://github.com/yt-dlp/yt-dlp/issues/7645)) by [bashonly](https://github.com/bashonly)
- [Fix various socks proxy bugs](https://github.com/yt-dlp/yt-dlp/commit/20fbbd9249a2f26c7ae579bde5ba5d69aa8fac69) ([#8065](https://github.com/yt-dlp/yt-dlp/issues/8065)) by [coletdjnz](https://github.com/coletdjnz)
- [Ignore invalid proxies in env](https://github.com/yt-dlp/yt-dlp/commit/bbeacff7fcaa3b521066088a5ccbf34ef5070d1d) ([#7704](https://github.com/yt-dlp/yt-dlp/issues/7704)) by [coletdjnz](https://github.com/coletdjnz)
- [Rewrite architecture](https://github.com/yt-dlp/yt-dlp/commit/227bf1a33be7b89cd7d44ad046844c4ccba104f4) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [coletdjnz](https://github.com/coletdjnz)
- **Request Handler**
- urllib
- [Remove dot segments during URL normalization](https://github.com/yt-dlp/yt-dlp/commit/4bf912282a34b58b6b35d8f7e6be535770c89c76) ([#7662](https://github.com/yt-dlp/yt-dlp/issues/7662)) by [coletdjnz](https://github.com/coletdjnz)
- [Simplify gzip decoding](https://github.com/yt-dlp/yt-dlp/commit/59e92b1f1833440bb2190f847eb735cf0f90bc85) ([#7611](https://github.com/yt-dlp/yt-dlp/issues/7611)) by [Grub4K](https://github.com/Grub4K) (With fixes in [77bff23](https://github.com/yt-dlp/yt-dlp/commit/77bff23ee97565bab2e0d75b893a21bf7983219a))
#### Misc. changes
- **build**: [Make sure deprecated modules are added](https://github.com/yt-dlp/yt-dlp/commit/131d132da5c98c6c78bd7eed4b37f4458561b3d9) by [pukkandan](https://github.com/pukkandan)
- **cleanup**
- [Add color to `download-archive` message](https://github.com/yt-dlp/yt-dlp/commit/2b029ca0a9f9105c4f7626993fa60e54c9782749) ([#5138](https://github.com/yt-dlp/yt-dlp/issues/5138)) by [aaruni96](https://github.com/aaruni96), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan)
- Miscellaneous
- [6148833](https://github.com/yt-dlp/yt-dlp/commit/6148833f5ceb7674142ddb8d761ffe03cee7df69), [62b5c94](https://github.com/yt-dlp/yt-dlp/commit/62b5c94cadaa5f596dc1a7083db9db12efe357be) by [pukkandan](https://github.com/pukkandan)
- [5ca095c](https://github.com/yt-dlp/yt-dlp/commit/5ca095cbcde3e32642a4fe5b2d69e8e3c785a021) by [barsnick](https://github.com/barsnick), [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [gamer191](https://github.com/gamer191), [Grub4K](https://github.com/Grub4K), [sqrtNOT](https://github.com/sqrtNOT)
- [088add9](https://github.com/yt-dlp/yt-dlp/commit/088add9567d39b758737e4299a0e619fd89d2e8f) by [Grub4K](https://github.com/Grub4K)
- **devscripts**: `make_changelog`: [Fix changelog grouping and add networking group](https://github.com/yt-dlp/yt-dlp/commit/30ba233d4cee945756ed7344e7ddb3a90d2ae608) ([#8124](https://github.com/yt-dlp/yt-dlp/issues/8124)) by [Grub4K](https://github.com/Grub4K)
- **docs**: [Update collaborators](https://github.com/yt-dlp/yt-dlp/commit/1be0a96a4d14f629097509fcc89d15f69a8243c7) by [Grub4K](https://github.com/Grub4K)
- **test**
- [Add tests for socks proxies](https://github.com/yt-dlp/yt-dlp/commit/fcd6a76adc49d5cd8783985c7ce35384b72e545f) ([#7908](https://github.com/yt-dlp/yt-dlp/issues/7908)) by [coletdjnz](https://github.com/coletdjnz)
- [Fix `httplib_validation_errors` test for old Python versions](https://github.com/yt-dlp/yt-dlp/commit/95abea9a03289da1384e5bda3d590223ccc0a238) ([#7677](https://github.com/yt-dlp/yt-dlp/issues/7677)) by [coletdjnz](https://github.com/coletdjnz)
- [Fix `test_load_certifi`](https://github.com/yt-dlp/yt-dlp/commit/de20687ee6b742646128a7629b57096631a20619) by [pukkandan](https://github.com/pukkandan)
- download: [Test for `expected_exception`](https://github.com/yt-dlp/yt-dlp/commit/661c9a1d029296b28e0b2f8be8a72a43abaf6536) by [at-wat](https://github.com/at-wat)
### 2023.07.06
#### Important changes

View File

@ -76,7 +76,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t
# NEW FEATURES
* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@42f2d4**](https://github.com/ytdl-org/youtube-dl/commit/07af47960f3bb262ead02490ce65c8c45c01741e) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@66ab08**](https://github.com/ytdl-org/youtube-dl/commit/66ab0814c4baa2dc79c2dd5287bc0ad61a37c5b9) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
@ -1800,7 +1800,7 @@ The following extractors use this feature:
#### youtube
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
@ -1809,6 +1809,7 @@ The following extractors use this feature:
* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8)
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
* `innertube_key`: Innertube API key to use for all API requests
* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
#### youtubetab (YouTube playlists, channels, feeds, etc.)
* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)
@ -1845,6 +1846,9 @@ The following extractors use this feature:
* `vcodec`: vcodec to ignore - one or more of `h264`, `h265`, `dvh265`
* `dr`: dynamic range to ignore - one or more of `sdr`, `hdr10`, `dv`
#### niconicochannelplus
* `max_comments`: Maximum number of comments to extract - default is `120`
#### tiktok
* `api_hostname`: Hostname to use for mobile API requests, e.g. `api-h2.tiktokv.com`
* `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`, e.g. `20.2.1`
@ -1865,6 +1869,9 @@ The following extractors use this feature:
#### nhkradirulive (NHK らじる★らじる LIVE)
* `area`: Which regional variation to extract. Valid areas are: `sapporo`, `sendai`, `tokyo`, `nagoya`, `osaka`, `hiroshima`, `matsuyama`, `fukuoka`. Defaults to `tokyo`
#### nflplusreplay
* `type`: Type(s) of game replays to extract. Valid types are: `full_game`, `full_game_spanish`, `condensed_game` and `all_22`. You can use `all` to extract all available replay types, which is the default
**Note**: These options may be changed/removed in the future without concern for backward compatibility
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->

View File

@ -88,5 +88,15 @@
"when": "59e92b1f1833440bb2190f847eb735cf0f90bc85",
"short": "[rh:urllib] Simplify gzip decoding (#7611)",
"authors": ["Grub4K"]
},
{
"action": "add",
"when": "c1d71d0d9f41db5e4306c86af232f5f6220a130b",
"short": "[priority] **The minimum *recommended* Python version has been raised to 3.8**\nSince Python 3.7 has reached end-of-life, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/7803)"
},
{
"action": "add",
"when": "61bdf15fc7400601c3da1aa7a43917310a5bf391",
"short": "[priority] Security: [[CVE-2023-40581](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-40581)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg)\n - The shell escape function is now using `\"\"` instead of `\\\"`.\n - `utils.Popen` has been patched to properly quote commands."
}
]

View File

@ -260,7 +260,7 @@ class CommitRange:
AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE)
MESSAGE_RE = re.compile(r'''
(?:\[(?P<prefix>[^\]]+)\]\ )?
(?:(?P<sub_details>`?[^:`]+`?): )?
(?:(?P<sub_details>`?[\w.-]+`?): )?
(?P<message>.+?)
(?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))?
''', re.VERBOSE | re.DOTALL)

View File

@ -77,7 +77,7 @@
- **AnimalPlanet**
- **ant1newsgr:article**: ant1news.gr articles
- **ant1newsgr:embed**: ant1news.gr embedded videos
- **ant1newsgr:watch**: ant1news.gr videos
- **antenna:watch**: antenna.gr and ant1news.gr videos
- **Anvato**
- **aol.com**: Yahoo screen and movies
- **APA**
@ -98,8 +98,6 @@
- **ArteTVCategory**
- **ArteTVEmbed**
- **ArteTVPlaylist**
- **AsianCrush**
- **AsianCrushPlaylist**
- **AtresPlayer**: [*atresplayer*](## "netrc machine")
- **AtScaleConfEvent**
- **ATTTechChannel**
@ -118,6 +116,7 @@
- **awaan:live**
- **awaan:season**
- **awaan:video**
- **axs.tv**
- **AZMedien**: AZ Medien videos
- **BaiduVideo**: 百度视频
- **BanBye**
@ -162,11 +161,16 @@
- **BilibiliAudioAlbum**
- **BiliBiliBangumi**
- **BiliBiliBangumiMedia**
- **BiliBiliBangumiSeason**
- **BilibiliCollectionList**
- **BilibiliFavoritesList**
- **BiliBiliPlayer**
- **BilibiliPlaylist**
- **BiliBiliSearch**: Bilibili video search; "bilisearch:" prefix
- **BilibiliSeriesList**
- **BilibiliSpaceAudio**
- **BilibiliSpacePlaylist**
- **BilibiliSpaceVideo**
- **BilibiliWatchlater**
- **BiliIntl**: [*biliintl*](## "netrc machine")
- **biliIntl:series**: [*biliintl*](## "netrc machine")
- **BiliLive**
@ -201,6 +205,8 @@
- **BreitBart**
- **brightcove:legacy**
- **brightcove:new**
- **Brilliantpala:Classes**: [*brilliantpala*](## "netrc machine") VoD on classes.brilliantpala.org
- **Brilliantpala:Elearn**: [*brilliantpala*](## "netrc machine") VoD on elearn.brilliantpala.org
- **BRMediathek**: Bayerischer Rundfunk Mediathek
- **bt:article**: Bergens Tidende Articles
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen
@ -220,14 +226,17 @@
- **Camsoda**
- **CamtasiaEmbed**
- **CamWithHer**
- **Canal1**
- **CanalAlpha**
- **canalc2.tv**
- **Canalplus**: mycanal.fr and piwiplus.fr
- **CaracolTvPlay**: [*caracoltv-play*](## "netrc machine")
- **CarambaTV**
- **CarambaTVPage**
- **CartoonNetwork**
- **cbc.ca**
- **cbc.ca:player**
- **cbc.ca:player:playlist**
- **CBS**
- **CBSInteractive**
- **CBSLocal**
@ -257,6 +266,8 @@
- **Cinchcast**
- **Cinemax**
- **CinetecaMilano**
- **Cineverse**
- **CineverseDetails**
- **CiscoLiveSearch**
- **CiscoLiveSession**
- **ciscowebex**: Cisco Webex
@ -365,7 +376,7 @@
- **Dotsub**
- **Douyin**
- **DouyuShow**
- **DouyuTV**: 斗鱼
- **DouyuTV**: 斗鱼直播
- **DPlay**
- **DRBonanza**
- **Drooble**
@ -408,6 +419,7 @@
- **Engadget**
- **Epicon**
- **EpiconSeries**
- **eplus:inbound**: e+ (イープラス) overseas
- **Epoch**
- **Eporner**
- **EroProfile**: [*eroprofile*](## "netrc machine")
@ -732,6 +744,7 @@
- **lynda**: [*lynda*](## "netrc machine") lynda.com videos
- **lynda:course**: [*lynda*](## "netrc machine") lynda.com online courses
- **m6**
- **MagellanTV**
- **MagentaMusik360**
- **mailru**: Видео@Mail.Ru
- **mailru:music**: Музыка@Mail.Ru
@ -812,6 +825,7 @@
- **Mofosex**
- **MofosexEmbed**
- **Mojvideo**
- **Monstercat**
- **MonsterSirenHypergryphMusic**
- **Morningstar**: morningstar.com
- **Motherless**
@ -840,6 +854,7 @@
- **MujRozhlas**
- **Murrtube**
- **MurrtubeUser**: Murrtube user profile
- **MuseAI**
- **MuseScore**
- **MusicdexAlbum**
- **MusicdexArtist**
@ -944,6 +959,9 @@
- **niconico:playlist**
- **niconico:series**
- **niconico:tag**: NicoNico video tag URLs
- **NiconicoChannelPlus**: ニコニコチャンネルプラス
- **NiconicoChannelPlus:channel:lives**: ニコニコチャンネルプラス - チャンネル - ライブリスト. nicochannel.jp/channel/lives
- **NiconicoChannelPlus:channel:videos**: ニコニコチャンネルプラス - チャンネル - 動画リスト. nicochannel.jp/channel/videos
- **NiconicoUser**
- **nicovideo:search**: Nico video search; "nicosearch:" prefix
- **nicovideo:search:date**: Nico video search, newest first; "nicosearchdate:" prefix
@ -1046,6 +1064,7 @@
- **Patreon**
- **PatreonCampaign**
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
- **PBSKids**
- **PearVideo**
- **PeekVids**
- **peer.tv**
@ -1062,6 +1081,7 @@
- **phoenix.de**
- **Photobucket**
- **Piapro**: [*piapro*](## "netrc machine")
- **PIAULIZAPortal**: ulizaportal.jp - PIA LIVE STREAM
- **Picarto**
- **PicartoVod**
- **Piksel**
@ -1105,6 +1125,7 @@
- **polskieradio:podcast:list**
- **Popcorntimes**
- **PopcornTV**
- **Pornbox**
- **PornCom**
- **PornerBros**
- **Pornez**
@ -1121,7 +1142,6 @@
- **PornTop**
- **PornTube**
- **Pr0gramm**
- **Pr0grammStatic**
- **PrankCast**
- **PremiershipRugby**
- **PressTV**
@ -1156,6 +1176,10 @@
- **radiocanada**
- **radiocanada:audiovideo**
- **radiofrance**
- **RadioFranceLive**
- **RadioFrancePodcast**
- **RadioFranceProfile**
- **RadioFranceProgramSchedule**
- **RadioJavan**
- **radiokapital**
- **radiokapital:show**
@ -1177,6 +1201,7 @@
- **RayWenderlichCourse**
- **RbgTum**
- **RbgTumCourse**
- **RbgTumNewCourse**
- **RBMARadio**
- **RCS**
- **RCSEmbeds**
@ -1259,6 +1284,8 @@
- **Ruutu**
- **Ruv**
- **ruv.is:spila**
- **S4C**
- **S4CSeries**
- **safari**: [*safari*](## "netrc machine") safaribooksonline.com online video
- **safari:api**: [*safari*](## "netrc machine")
- **safari:course**: [*safari*](## "netrc machine") safaribooksonline.com online courses
@ -1325,6 +1352,7 @@
- **Smotrim**
- **Snotr**
- **Sohu**
- **SohuV**
- **SonyLIV**: [*sonyliv*](## "netrc machine")
- **SonyLIVSeries**
- **soundcloud**: [*soundcloud*](## "netrc machine")
@ -1378,7 +1406,6 @@
- **StoryFireSeries**
- **StoryFireUser**
- **Streamable**
- **Streamanity**
- **streamcloud.eu**
- **StreamCZ**
- **StreamFF**
@ -1403,6 +1430,9 @@
- **Tagesschau**
- **Tass**
- **TBS**
- **TBSJPEpisode**
- **TBSJPPlaylist**
- **TBSJPProgram**
- **TDSLifeway**
- **Teachable**: [*teachable*](## "netrc machine")
- **TeachableCourse**: [*teachable*](## "netrc machine")
@ -1702,7 +1732,6 @@
- **wdr:mobile**: (**Currently broken**)
- **WDRElefant**
- **WDRPage**
- **web.archive:vlive**: web.archive.org saved vlive videos
- **web.archive:youtube**: web.archive.org saved youtube videos, "ytarchive:" prefix
- **Webcamerapl**
- **Webcaster**
@ -1710,7 +1739,8 @@
- **WebOfStories**
- **WebOfStoriesPlaylist**
- **Weibo**
- **WeiboMobile**
- **WeiboUser**
- **WeiboVideo**
- **WeiqiTV**: WQTV
- **wetv:episode**
- **WeTvSeries**
@ -1726,6 +1756,7 @@
- **Whyp**
- **wikimedia.org**
- **Willow**
- **Wimbledon**
- **WimTV**
- **WinSportsVideo**
- **Wistia**

View File

@ -631,7 +631,6 @@ class TestYoutubeDL(unittest.TestCase):
self.assertEqual(test_dict['playlist'], 'funny videos')
outtmpl_info = {
'id': '1234',
'id': '1234',
'ext': 'mp4',
'width': None,
@ -785,9 +784,9 @@ class TestYoutubeDL(unittest.TestCase):
test('%(title4)#S', 'foo_bar_test')
test('%(title4).10S', ('foo bar ', 'foo bar' + ('#' if compat_os_name == 'nt' else ' ')))
if compat_os_name == 'nt':
test('%(title4)q', ('"foo \\"bar\\" test"', "foo bar test"))
test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', 'id 1 id 2 id 3'))
test('%(formats.0.id)#q', ('"id 1"', 'id 1'))
test('%(title4)q', ('"foo ""bar"" test"', None))
test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', None))
test('%(formats.0.id)#q', ('"id 1"', None))
else:
test('%(title4)q', ('\'foo "bar" test\'', '\'foo bar test\''))
test('%(formats.:.id)#q', "'id 1' 'id 2' 'id 3'")

View File

@ -31,6 +31,7 @@ from yt_dlp.utils import (
DownloadError,
ExtractorError,
UnavailableVideoError,
YoutubeDLError,
format_bytes,
join_nonempty,
)
@ -100,6 +101,8 @@ def generator(test_case, tname):
print_skipping('IE marked as not _WORKING')
for tc in test_cases:
if tc.get('expected_exception'):
continue
info_dict = tc.get('info_dict', {})
params = tc.get('params', {})
if not info_dict.get('id'):
@ -139,6 +142,17 @@ def generator(test_case, tname):
res_dict = None
def match_exception(err):
expected_exception = test_case.get('expected_exception')
if not expected_exception:
return False
if err.__class__.__name__ == expected_exception:
return True
for exc in err.exc_info:
if exc.__class__.__name__ == expected_exception:
return True
return False
def try_rm_tcs_files(tcs=None):
if tcs is None:
tcs = test_cases
@ -161,6 +175,8 @@ def generator(test_case, tname):
except (DownloadError, ExtractorError) as err:
# Check if the exception is not a network related one
if not isinstance(err.exc_info[1], (TransportError, UnavailableVideoError)) or (isinstance(err.exc_info[1], HTTPError) and err.exc_info[1].status == 503):
if match_exception(err):
return
err.msg = f'{getattr(err, "msg", err)} ({tname})'
raise
@ -171,6 +187,10 @@ def generator(test_case, tname):
print(f'Retrying: {try_num} failed tries\n\n##########\n\n')
try_num += 1
except YoutubeDLError as err:
if match_exception(err):
return
raise
else:
break

View File

@ -45,6 +45,9 @@ class TestExecution(unittest.TestCase):
self.assertTrue(os.path.exists(LAZY_EXTRACTORS))
_, stderr = self.run_yt_dlp(opts=('-s', 'test:'))
# `MIN_RECOMMENDED` emits a deprecated feature warning for deprecated python versions
if stderr and stderr.startswith('Deprecated Feature: Support for Python'):
stderr = ''
self.assertFalse(stderr)
subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=subprocess.DEVNULL)

View File

@ -269,14 +269,14 @@ class TestNetworkingExceptions:
assert not response.closed
def test_incomplete_read_error(self):
error = IncompleteRead(b'test', 3, cause='test')
error = IncompleteRead(4, 3, cause='test')
assert isinstance(error, IncompleteRead)
assert repr(error) == '<IncompleteRead: 4 bytes read, 3 more expected>'
assert str(error) == error.msg == '4 bytes read, 3 more expected'
assert error.partial == b'test'
assert error.partial == 4
assert error.expected == 3
assert error.cause == 'test'
error = IncompleteRead(b'aaa')
error = IncompleteRead(3)
assert repr(error) == '<IncompleteRead: 3 bytes read>'
assert str(error) == '3 bytes read'

View File

@ -14,6 +14,7 @@ import contextlib
import io
import itertools
import json
import subprocess
import xml.etree.ElementTree
from yt_dlp.compat import (
@ -28,6 +29,7 @@ from yt_dlp.utils import (
InAdvancePagedList,
LazyList,
OnDemandPagedList,
Popen,
age_restricted,
args_to_str,
base_url,
@ -1218,6 +1220,12 @@ class TestUtil(unittest.TestCase):
self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""')
self.assertEqual(js_to_json('`${name}`', {}), '"name"')
def test_js_to_json_map_array_constructors(self):
self.assertEqual(json.loads(js_to_json('new Map([["a", 5]])')), {'a': 5})
self.assertEqual(json.loads(js_to_json('Array(5, 10)')), [5, 10])
self.assertEqual(json.loads(js_to_json('new Array(15,5)')), [15, 5])
self.assertEqual(json.loads(js_to_json('new Map([Array(5, 10),new Array(15,5)])')), {'5': 10, '15': 5})
def test_extract_attributes(self):
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
@ -2382,6 +2390,21 @@ Line 1
assert extract_basic_auth('http://user:@foo.bar') == ('http://foo.bar', 'Basic dXNlcjo=')
assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz')
@unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows')
def test_Popen_windows_escaping(self):
def run_shell(args):
stdout, stderr, error = Popen.run(
args, text=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
assert not stderr
assert not error
return stdout
# Test escaping
assert run_shell(['echo', 'test"&']) == '"test""&"\n'
# Test if delayed expansion is disabled
assert run_shell(['echo', '^!']) == '"^!"\n'
assert run_shell('echo "^!"') == '"^!"\n'
if __name__ == '__main__':
unittest.main()

View File

@ -60,7 +60,7 @@ from .postprocessor import (
get_postprocessor,
)
from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
from .update import REPOSITORY, current_git_head, detect_variant
from .update import REPOSITORY, _get_system_deprecation, current_git_head, detect_variant
from .utils import (
DEFAULT_OUTTMPL,
IDENTITY,
@ -239,9 +239,9 @@ class YoutubeDL:
'selected' (check selected formats),
or None (check only if requested by extractor)
paths: Dictionary of output paths. The allowed keys are 'home'
'temp' and the keys of OUTTMPL_TYPES (in utils.py)
'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py)
outtmpl: Dictionary of templates for output names. Allowed keys
are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py).
For compatibility with youtube-dl, a single string can also be used
outtmpl_na_placeholder: Placeholder for unavailable meta fields.
restrictfilenames: Do not allow "&" and spaces in file names
@ -422,7 +422,7 @@ class YoutubeDL:
asked whether to download the video.
- Raise utils.DownloadCancelled(msg) to abort remaining
downloads when a video is rejected.
match_filter_func in utils.py is one example for this.
match_filter_func in utils/_utils.py is one example for this.
color: A Dictionary with output stream names as keys
and their respective color policy as values.
Can also just be a single color policy,
@ -640,17 +640,9 @@ class YoutubeDL:
for name, stream in self._out_files.items_ if name != 'console'
})
# The code is left like this to be reused for future deprecations
MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)
current_version = sys.version_info[:2]
if current_version < MIN_RECOMMENDED:
msg = ('Support for Python version %d.%d has been deprecated. '
'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'
'\n You will no longer receive updates on this version')
if current_version < MIN_SUPPORTED:
msg = 'Python version %d.%d is no longer supported'
self.deprecated_feature(
f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
system_deprecation = _get_system_deprecation()
if system_deprecation:
self.deprecated_feature(system_deprecation.replace('\n', '\n '))
if self.params.get('allow_unplayable_formats'):
self.report_warning(

View File

@ -30,7 +30,7 @@ compat_os_name = os._name if os.name == 'java' else os.name
if compat_os_name == 'nt':
def compat_shlex_quote(s):
import re
return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
return s if re.match(r'^[-_\w./]+$', s) else s.replace('"', '""').join('""')
else:
from shlex import quote as compat_shlex_quote # noqa: F401

View File

@ -15,7 +15,7 @@ def get_package_info(module):
name=getattr(module, '_yt_dlp__identifier', module.__name__),
version=str(next(filter(None, (
getattr(module, attr, None)
for attr in ('__version__', 'version_string', 'version')
for attr in ('_yt_dlp__version', '__version__', 'version_string', 'version')
)), None)))

View File

@ -1,7 +1,7 @@
# flake8: noqa: F405
from urllib import * # noqa: F403
del request
del request # noqa: F821
from . import request # noqa: F401
from ..compat_utils import passthrough_module

View File

@ -43,6 +43,8 @@ except Exception as _err:
try:
import sqlite3
# We need to get the underlying `sqlite` version, see https://github.com/yt-dlp/yt-dlp/issues/8152
sqlite3._yt_dlp__version = sqlite3.sqlite_version
except ImportError:
# although sqlite3 is part of the standard library, it is possible to compile python without
# sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544

View File

@ -137,10 +137,6 @@ from .arte import (
ArteTVCategoryIE,
)
from .arnes import ArnesIE
from .asiancrush import (
AsianCrushIE,
AsianCrushPlaylistIE,
)
from .atresplayer import AtresPlayerIE
from .atscaleconf import AtScaleConfEventIE
from .atttechchannel import ATTTechChannelIE
@ -275,6 +271,10 @@ from .brightcove import (
BrightcoveLegacyIE,
BrightcoveNewIE,
)
from .brilliantpala import (
BrilliantpalaElearnIE,
BrilliantpalaClassesIE,
)
from .businessinsider import BusinessInsiderIE
from .bundesliga import BundesligaIE
from .buzzfeed import BuzzFeedIE
@ -296,9 +296,11 @@ from .cammodels import CamModelsIE
from .camsoda import CamsodaIE
from .camtasia import CamtasiaEmbedIE
from .camwithher import CamWithHerIE
from .canal1 import Canal1IE
from .canalalpha import CanalAlphaIE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .caracoltv import CaracolTvPlayIE
from .carambatv import (
CarambaTVIE,
CarambaTVPageIE,
@ -356,6 +358,10 @@ from .chirbit import (
from .cinchcast import CinchcastIE
from .cinemax import CinemaxIE
from .cinetecamilano import CinetecaMilanoIE
from .cineverse import (
CineverseIE,
CineverseDetailsIE,
)
from .ciscolive import (
CiscoLiveSessionIE,
CiscoLiveSearchIE,
@ -565,8 +571,10 @@ from .epicon import (
EpiconIE,
EpiconSeriesIE,
)
from .eplus import EplusIbIE
from .epoch import EpochIE
from .eporner import EpornerIE
from .erocast import ErocastIE
from .eroprofile import (
EroProfileIE,
EroProfileAlbumIE,
@ -944,6 +952,7 @@ from .lastfm import (
from .lbry import (
LBRYIE,
LBRYChannelIE,
LBRYPlaylistIE,
)
from .lci import LCIIE
from .lcp import (
@ -1124,6 +1133,7 @@ from .mofosex import (
)
from .mojvideo import MojvideoIE
from .mojevideo import MojevideoIE
from .monstercat import MonstercatIE
from .morningstar import MorningstarIE
from .motherless import (
MotherlessIE,
@ -1296,6 +1306,11 @@ from .ninecninemedia import (
NineCNineMediaIE,
CPTwentyFourIE,
)
from .niconicochannelplus import (
NiconicoChannelPlusIE,
NiconicoChannelPlusChannelVideosIE,
NiconicoChannelPlusChannelLivesIE,
)
from .ninegag import NineGagIE
from .ninenow import NineNowIE
from .nintendo import NintendoIE
@ -1449,6 +1464,7 @@ from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE
from .piapro import PiaproIE
from .piaulizaportal import PIAULIZAPortalIE
from .picarto import (
PicartoIE,
PicartoVodIE,
@ -1599,6 +1615,7 @@ from .rbmaradio import RBMARadioIE
from .rbgtum import (
RbgTumIE,
RbgTumCourseIE,
RbgTumNewCourseIE,
)
from .rcs import (
RCSIE,

View File

@ -180,20 +180,103 @@ class ABCIViewIE(InfoExtractor):
_VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
_GEO_COUNTRIES = ['AU']
# ABC iview programs are normally available for 14 days only.
_TESTS = [{
'url': 'https://iview.abc.net.au/show/utopia/series/1/video/CO1211V001S00',
'md5': '52a942bfd7a0b79a6bfe9b4ce6c9d0ed',
'info_dict': {
'id': 'CO1211V001S00',
'ext': 'mp4',
'title': 'Series 1 Ep 1 Wood For The Trees',
'series': 'Utopia',
'description': 'md5:0cfb2c183c1b952d1548fd65c8a95c00',
'upload_date': '20230726',
'uploader_id': 'abc1',
'series_id': 'CO1211V',
'episode_id': 'CO1211V001S00',
'season_number': 1,
'season': 'Season 1',
'episode_number': 1,
'episode': 'Wood For The Trees',
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/co/CO1211V001S00_5ad8353f4df09_1280.jpg',
'timestamp': 1690403700,
},
'params': {
'skip_download': True,
},
}, {
'note': 'No episode name',
'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00',
'md5': '67715ce3c78426b11ba167d875ac6abf',
'info_dict': {
'id': 'LE1927H001S00',
'ext': 'mp4',
'title': "Series 11 Ep 1",
'series': "Gruen",
'title': 'Series 11 Ep 1',
'series': 'Gruen',
'description': 'md5:52cc744ad35045baf6aded2ce7287f67',
'upload_date': '20190925',
'uploader_id': 'abc1',
'series_id': 'LE1927H',
'episode_id': 'LE1927H001S00',
'season_number': 11,
'season': 'Season 11',
'episode_number': 1,
'episode': 'Episode 1',
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/le/LE1927H001S00_5d954fbd79e25_1280.jpg',
'timestamp': 1569445289,
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
'params': {
'skip_download': True,
},
}, {
'note': 'No episode number',
'url': 'https://iview.abc.net.au/show/four-corners/series/2022/video/NC2203H039S00',
'md5': '77cb7d8434440e3b28fbebe331c2456a',
'info_dict': {
'id': 'NC2203H039S00',
'ext': 'mp4',
'title': 'Series 2022 Locking Up Kids',
'series': 'Four Corners',
'description': 'md5:54829ca108846d1a70e1fcce2853e720',
'upload_date': '20221114',
'uploader_id': 'abc1',
'series_id': 'NC2203H',
'episode_id': 'NC2203H039S00',
'season_number': 2022,
'season': 'Season 2022',
'episode_number': None,
'episode': 'Locking Up Kids',
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/nc/NC2203H039S00_636d8a0944a22_1920.jpg',
'timestamp': 1668460497,
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
'params': {
'skip_download': True,
},
}, {
'note': 'No episode name or number',
'url': 'https://iview.abc.net.au/show/landline/series/2021/video/RF2004Q043S00',
'md5': '2e17dec06b13cc81dc119d2565289396',
'info_dict': {
'id': 'RF2004Q043S00',
'ext': 'mp4',
'title': 'Series 2021',
'series': 'Landline',
'description': 'md5:c9f30d9c0c914a7fd23842f6240be014',
'upload_date': '20211205',
'uploader_id': 'abc1',
'series_id': 'RF2004Q',
'episode_id': 'RF2004Q043S00',
'season_number': 2021,
'season': 'Season 2021',
'episode_number': None,
'episode': None,
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/rf/RF2004Q043S00_61a950639dbc0_1920.jpg',
'timestamp': 1638710705,
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
'params': {
'skip_download': True,
},
@ -255,6 +338,8 @@ class ABCIViewIE(InfoExtractor):
'episode_number': int_or_none(self._search_regex(
r'\bEp\s+(\d+)\b', title, 'episode number', default=None)),
'episode_id': house_number,
'episode': self._search_regex(
r'^(?:Series\s+\d+)?\s*(?:Ep\s+\d+)?\s*(.*)$', title, 'episode', default='') or None,
'uploader_id': video_params.get('channel'),
'formats': formats,
'subtitles': subtitles,

View File

@ -169,7 +169,7 @@ class ArteTVIE(ArteTVBaseIE):
)))
short_label = traverse_obj(stream_version, 'shortLabel', expected_type=str, default='?')
if stream['protocol'].startswith('HLS'):
if 'HLS' in stream['protocol']:
fmts, subs = self._extract_m3u8_formats_and_subtitles(
stream['url'], video_id=video_id, ext='mp4', m3u8_id=stream_version_code, fatal=False)
for fmt in fmts:

View File

@ -1,196 +0,0 @@
import functools
import re
from .common import InfoExtractor
from .kaltura import KalturaIE
from ..utils import (
extract_attributes,
int_or_none,
OnDemandPagedList,
parse_age_limit,
strip_or_none,
try_get,
)
class AsianCrushBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|(?:cocoro|retrocrush)\.tv))'
_KALTURA_KEYS = [
'video_url', 'progressive_url', 'download_url', 'thumbnail_url',
'widescreen_thumbnail_url', 'screencap_widescreen',
]
_API_SUFFIX = {'retrocrush.tv': '-ott'}
def _call_api(self, host, endpoint, video_id, query, resource):
return self._download_json(
'https://api%s.%s/%s' % (self._API_SUFFIX.get(host, ''), host, endpoint), video_id,
'Downloading %s JSON metadata' % resource, query=query,
headers=self.geo_verification_headers())['objects']
def _download_object_data(self, host, object_id, resource):
return self._call_api(
host, 'search', object_id, {'id': object_id}, resource)[0]
def _get_object_description(self, obj):
return strip_or_none(obj.get('long_description') or obj.get('short_description'))
def _parse_video_data(self, video):
title = video['name']
entry_id, partner_id = [None] * 2
for k in self._KALTURA_KEYS:
k_url = video.get(k)
if k_url:
mobj = re.search(r'/p/(\d+)/.+?/entryId/([^/]+)/', k_url)
if mobj:
partner_id, entry_id = mobj.groups()
break
meta_categories = try_get(video, lambda x: x['meta']['categories'], list) or []
categories = list(filter(None, [c.get('name') for c in meta_categories]))
show_info = video.get('show_info') or {}
return {
'_type': 'url_transparent',
'url': 'kaltura:%s:%s' % (partner_id, entry_id),
'ie_key': KalturaIE.ie_key(),
'id': entry_id,
'title': title,
'description': self._get_object_description(video),
'age_limit': parse_age_limit(video.get('mpaa_rating') or video.get('tv_rating')),
'categories': categories,
'series': show_info.get('show_name'),
'season_number': int_or_none(show_info.get('season_num')),
'season_id': show_info.get('season_id'),
'episode_number': int_or_none(show_info.get('episode_num')),
}
class AsianCrushIE(AsianCrushBaseIE):
_VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % AsianCrushBaseIE._VALID_URL_BASE
_TESTS = [{
'url': 'https://www.asiancrush.com/video/004289v/women-who-flirt',
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
'info_dict': {
'id': '1_y4tmjm5r',
'ext': 'mp4',
'title': 'Women Who Flirt',
'description': 'md5:b65c7e0ae03a85585476a62a186f924c',
'timestamp': 1496936429,
'upload_date': '20170608',
'uploader_id': 'craig@crifkin.com',
'age_limit': 13,
'categories': 'count:5',
'duration': 5812,
},
}, {
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
'only_matching': True,
}, {
'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/',
'only_matching': True,
}, {
'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/',
'only_matching': True,
}, {
'url': 'https://www.midnightpulp.com/video/010400v/drifters/',
'only_matching': True,
}, {
'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/',
'only_matching': True,
}, {
'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/',
'only_matching': True,
}, {
'url': 'https://www.retrocrush.tv/video/true-tears/012328v-i...gave-away-my-tears',
'only_matching': True,
}]
def _real_extract(self, url):
host, video_id = self._match_valid_url(url).groups()
if host == 'cocoro.tv':
webpage = self._download_webpage(url, video_id)
embed_vars = self._parse_json(self._search_regex(
r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars',
default='{}'), video_id, fatal=False) or {}
video_id = embed_vars.get('entry_id') or video_id
video = self._download_object_data(host, video_id, 'video')
return self._parse_video_data(video)
class AsianCrushPlaylistIE(AsianCrushBaseIE):
_VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushBaseIE._VALID_URL_BASE
_TESTS = [{
'url': 'https://www.asiancrush.com/series/006447s/fruity-samurai',
'info_dict': {
'id': '6447',
'title': 'Fruity Samurai',
'description': 'md5:7535174487e4a202d3872a7fc8f2f154',
},
'playlist_count': 13,
}, {
'url': 'https://www.yuyutv.com/series/013920s/peep-show/',
'only_matching': True,
}, {
'url': 'https://www.midnightpulp.com/series/016375s/mononoke/',
'only_matching': True,
}, {
'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/',
'only_matching': True,
}, {
'url': 'https://www.retrocrush.tv/series/012355s/true-tears',
'only_matching': True,
}]
_PAGE_SIZE = 1000000000
def _fetch_page(self, domain, parent_id, page):
videos = self._call_api(
domain, 'getreferencedobjects', parent_id, {
'max': self._PAGE_SIZE,
'object_type': 'video',
'parent_id': parent_id,
'start': page * self._PAGE_SIZE,
}, 'page %d' % (page + 1))
for video in videos:
yield self._parse_video_data(video)
def _real_extract(self, url):
host, playlist_id = self._match_valid_url(url).groups()
if host == 'cocoro.tv':
webpage = self._download_webpage(url, playlist_id)
entries = []
for mobj in re.finditer(
r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
webpage):
attrs = extract_attributes(mobj.group(0))
if attrs.get('class') == 'clearfix':
entries.append(self.url_result(
mobj.group('url'), ie=AsianCrushIE.ie_key()))
title = self._html_search_regex(
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
'title', default=None) or self._og_search_title(
webpage, default=None) or self._html_search_meta(
'twitter:title', webpage, 'title',
default=None) or self._html_extract_title(webpage)
if title:
title = re.sub(r'\s*\|\s*.+?$', '', title)
description = self._og_search_description(
webpage, default=None) or self._html_search_meta(
'twitter:description', webpage, 'description', fatal=False)
else:
show = self._download_object_data(host, playlist_id, 'show')
title = show.get('name')
description = self._get_object_description(show)
entries = OnDemandPagedList(
functools.partial(self._fetch_page, host, playlist_id),
self._PAGE_SIZE)
return self.playlist_result(entries, playlist_id, title, description)

View File

@ -49,14 +49,14 @@ class BilibiliBaseIE(InfoExtractor):
for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
}
audios = traverse_obj(play_info, ('dash', 'audio', ...))
audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
if flac_audio:
audios.append(flac_audio)
formats = [{
'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
'acodec': audio.get('codecs'),
'acodec': traverse_obj(audio, ('codecs', {str.lower})),
'vcodec': 'none',
'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
'filesize': int_or_none(audio.get('size')),
@ -71,6 +71,7 @@ class BilibiliBaseIE(InfoExtractor):
'height': int_or_none(video.get('height')),
'vcodec': video.get('codecs'),
'acodec': 'none' if audios else None,
'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
'tbr': float_or_none(video.get('bandwidth'), scale=1000),
'filesize': int_or_none(video.get('size')),
'quality': int_or_none(video.get('id')),

View File

@ -0,0 +1,127 @@
import hashlib
from .common import InfoExtractor
from ..utils import (
ExtractorError,
traverse_obj,
urlencode_postdata,
)
class BrilliantpalaBaseIE(InfoExtractor):
_NETRC_MACHINE = 'brilliantpala'
_DOMAIN = '{subdomain}.brilliantpala.org'
def _initialize_pre_login(self):
self._HOMEPAGE = f'https://{self._DOMAIN}'
self._LOGIN_API = f'{self._HOMEPAGE}/login/'
self._LOGOUT_DEVICES_API = f'{self._HOMEPAGE}/logout_devices/?next=/'
self._CONTENT_API = f'{self._HOMEPAGE}/api/v2.4/contents/{{content_id}}/'
self._HLS_AES_URI = f'{self._HOMEPAGE}/api/v2.5/video_contents/{{content_id}}/key/'
def _get_logged_in_username(self, url, video_id):
webpage, urlh = self._download_webpage_handle(url, video_id)
if self._LOGIN_API == urlh.url:
self.raise_login_required()
return self._html_search_regex(
r'"username"\s*:\s*"(?P<username>[^"]+)"', webpage, 'stream page info', 'username')
def _perform_login(self, username, password):
login_form = self._hidden_inputs(self._download_webpage(
self._LOGIN_API, None, 'Downloading login page'))
login_form.update({
'username': username,
'password': password,
})
self._set_cookie(self._DOMAIN, 'csrftoken', login_form['csrfmiddlewaretoken'])
logged_page = self._download_webpage(
self._LOGIN_API, None, note='Logging in', headers={'Referer': self._LOGIN_API},
data=urlencode_postdata(login_form))
if self._html_search_regex(
r'(Your username / email and password)', logged_page, 'auth fail', default=None):
raise ExtractorError('wrong username or password', expected=True)
# the maximum number of logins is one
if self._html_search_regex(
r'(Logout Other Devices)', logged_page, 'logout devices button', default=None):
logout_device_form = self._hidden_inputs(logged_page)
self._download_webpage(
self._LOGOUT_DEVICES_API, None, headers={'Referer': self._LOGIN_API},
note='Logging out other devices', data=urlencode_postdata(logout_device_form))
def _real_extract(self, url):
course_id, content_id = self._match_valid_url(url).group('course_id', 'content_id')
video_id = f'{course_id}-{content_id}'
username = self._get_logged_in_username(url, video_id)
content_json = self._download_json(
self._CONTENT_API.format(content_id=content_id), video_id,
note='Fetching content info', errnote='Unable to fetch content info')
entries = []
for stream in traverse_obj(content_json, ('video', 'streams', lambda _, v: v['id'] and v['url'])):
formats = self._extract_m3u8_formats(stream['url'], video_id, fatal=False)
if not formats:
continue
entries.append({
'id': str(stream['id']),
'title': content_json.get('title'),
'formats': formats,
'hls_aes': {'uri': self._HLS_AES_URI.format(content_id=content_id)},
'http_headers': {'X-Key': hashlib.sha256(username.encode('ascii')).hexdigest()},
'thumbnail': content_json.get('cover_image'),
})
return self.playlist_result(
entries, playlist_id=video_id, playlist_title=content_json.get('title'))
class BrilliantpalaElearnIE(BrilliantpalaBaseIE):
IE_NAME = 'Brilliantpala:Elearn'
IE_DESC = 'VoD on elearn.brilliantpala.org'
_VALID_URL = r'https?://elearn\.brilliantpala\.org/courses/(?P<course_id>\d+)/contents/(?P<content_id>\d+)/?'
_TESTS = [{
'url': 'https://elearn.brilliantpala.org/courses/42/contents/12345/',
'only_matching': True,
}, {
'url': 'https://elearn.brilliantpala.org/courses/98/contents/36683/',
'info_dict': {
'id': '23577',
'ext': 'mp4',
'title': 'Physical World, Units and Measurements - 1',
'thumbnail': 'https://d1j3vi2u94ebt0.cloudfront.net/institute/brilliantpalalms/chapter_contents/26237/e657f81b90874be19795c7ea081f8d5c.png',
'live_status': 'not_live',
},
'params': {
'skip_download': True,
},
}]
_DOMAIN = BrilliantpalaBaseIE._DOMAIN.format(subdomain='elearn')
class BrilliantpalaClassesIE(BrilliantpalaBaseIE):
IE_NAME = 'Brilliantpala:Classes'
IE_DESC = 'VoD on classes.brilliantpala.org'
_VALID_URL = r'https?://classes\.brilliantpala\.org/courses/(?P<course_id>\d+)/contents/(?P<content_id>\d+)/?'
_TESTS = [{
'url': 'https://classes.brilliantpala.org/courses/42/contents/12345/',
'only_matching': True,
}, {
'url': 'https://classes.brilliantpala.org/courses/416/contents/25445/',
'info_dict': {
'id': '9128',
'ext': 'mp4',
'title': 'Motion in a Straight Line - Class 1',
'thumbnail': 'https://d3e4y8hquds3ek.cloudfront.net/institute/brilliantpalaelearn/chapter_contents/ff5ba838d0ec43419f67387fe1a01fa8.png',
'live_status': 'not_live',
},
'params': {
'skip_download': True,
},
}]
_DOMAIN = BrilliantpalaBaseIE._DOMAIN.format(subdomain='classes')

View File

@ -0,0 +1,39 @@
from .common import InfoExtractor
class Canal1IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|noticias\.)?canal1\.com\.co/(?:[^?#&])+/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://canal1.com.co/noticias/napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco/',
'info_dict': {
'id': '63b39f6b354977084b85ab54',
'display_id': 'napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco',
'title': 'Ñapa I Una cadena de producción de arroz que se quedó en veremos y abandonada en el departamento del Chocó',
'description': 'md5:bc49c6d64d20610ea1e7daf079a0d013',
'thumbnail': r're:^https?://[^?#]+63b39f6b354977084b85ab54',
'ext': 'mp4',
},
}, {
'url': 'https://noticias.canal1.com.co/noticias/tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter/',
'info_dict': {
'id': '63b39e93f5fd223aa32250fb',
'display_id': 'tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter',
'title': 'Tres I El triste récord que impuso Elon Musk, el dueño de Tesla y de Twitter',
'description': 'md5:d9f691f131a21ce6767ca6c05d17d791',
'thumbnail': r're:^https?://[^?#]+63b39e93f5fd223aa32250fb',
'ext': 'mp4',
},
}, {
# Geo-restricted to Colombia
'url': 'https://canal1.com.co/programas/guerreros-canal-1/video-inedito-guerreros-despedida-kewin-zarate/',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
return self.url_result(
self._search_regex(r'"embedUrl"\s*:\s*"([^"]+)', webpage, 'embed url'),
display_id=display_id, url_transparent=True)

View File

@ -0,0 +1,136 @@
import base64
import json
import uuid
from .common import InfoExtractor
from ..utils import (
int_or_none,
js_to_json,
traverse_obj,
urljoin,
)
class CaracolTvPlayIE(InfoExtractor):
_VALID_URL = r'https?://play\.caracoltv\.com/videoDetails/(?P<id>[^/?#]+)'
_NETRC_MACHINE = 'caracoltv-play'
_TESTS = [{
'url': 'https://play.caracoltv.com/videoDetails/OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
'info_dict': {
'id': 'OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
'title': 'La teoría del promedio',
'description': 'md5:1cdd6d2c13f19ef0d9649ab81a023ac3',
},
'playlist_count': 6,
}, {
'url': 'https://play.caracoltv.com/videoDetails/OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==/ella?season=0',
'info_dict': {
'id': 'OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==',
'title': 'Ella',
'description': 'md5:a639b1feb5ddcc0cff92a489b4e544b8',
},
'playlist_count': 10,
}, {
'url': 'https://play.caracoltv.com/videoDetails/OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==/la-vuelta-al-mundo-en-80-risas-2022?season=0',
'info_dict': {
'id': 'OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==',
'title': 'La vuelta al mundo en 80 risas 2022',
'description': 'md5:e97aac36106e5c37ebf947b3350106a4',
},
'playlist_count': 17,
}, {
'url': 'https://play.caracoltv.com/videoDetails/MzoxX3BwbjRmNjB1',
'only_matching': True,
}]
_USER_TOKEN = None
def _extract_app_token(self, webpage):
config_js_path = self._search_regex(
r'<script[^>]+src\s*=\s*"([^"]+coreConfig.js[^"]+)', webpage, 'config js url', fatal=False)
mediation_config = {} if not config_js_path else self._search_json(
r'mediation\s*:', self._download_webpage(
urljoin('https://play.caracoltv.com/', config_js_path), None, fatal=False, note='Extracting JS config'),
'mediation_config', None, transform_source=js_to_json, fatal=False)
key = traverse_obj(
mediation_config, ('live', 'key')) or '795cd9c089a1fc48094524a5eba85a3fca1331817c802f601735907c8bbb4f50'
secret = traverse_obj(
mediation_config, ('live', 'secret')) or '64dec00a6989ba83d087621465b5e5d38bdac22033b0613b659c442c78976fa0'
return base64.b64encode(f'{key}:{secret}'.encode()).decode()
def _perform_login(self, email, password):
webpage = self._download_webpage('https://play.caracoltv.com/', None, fatal=False)
app_token = self._extract_app_token(webpage)
bearer_token = self._download_json(
'https://eu-gateway.inmobly.com/applications/oauth', None, data=b'', note='Retrieving bearer token',
headers={'Authorization': f'Basic {app_token}'})['token']
self._USER_TOKEN = self._download_json(
'https://eu-gateway.inmobly.com/user/login', None, note='Performing login', headers={
'Content-Type': 'application/json',
'Authorization': f'Bearer {bearer_token}',
}, data=json.dumps({
'device_data': {
'device_id': str(uuid.uuid4()),
'device_token': '',
'device_type': 'web'
},
'login_data': {
'enabled': True,
'email': email,
'password': password,
}
}).encode())['user_token']
def _extract_video(self, video_data, series_id=None, season_id=None, season_number=None):
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_data['stream_url'], series_id, 'mp4')
return {
'id': video_data['id'],
'title': video_data.get('name'),
'description': video_data.get('description'),
'formats': formats,
'subtitles': subtitles,
'thumbnails': traverse_obj(
video_data, ('extra_thumbs', ..., {'url': 'thumb_url', 'height': 'height', 'width': 'width'})),
'series_id': series_id,
'season_id': season_id,
'season_number': int_or_none(season_number),
'episode_number': int_or_none(video_data.get('item_order')),
'is_live': video_data.get('entry_type') == 3,
}
def _extract_series_seasons(self, seasons, series_id):
for season in seasons:
api_response = self._download_json(
'https://eu-gateway.inmobly.com/feed', series_id, query={'season_id': season['id']},
headers={'Authorization': f'Bearer {self._USER_TOKEN}'})
season_number = season.get('order')
for episode in api_response['items']:
yield self._extract_video(episode, series_id, season['id'], season_number)
def _real_extract(self, url):
series_id = self._match_id(url)
if self._USER_TOKEN is None:
self._perform_login('guest@inmobly.com', 'Test@gus1')
api_response = self._download_json(
'https://eu-gateway.inmobly.com/feed', series_id, query={'include_ids': series_id},
headers={'Authorization': f'Bearer {self._USER_TOKEN}'})['items'][0]
if not api_response.get('seasons'):
return self._extract_video(api_response)
return self.playlist_result(
self._extract_series_seasons(api_response['seasons'], series_id),
series_id, **traverse_obj(api_response, {
'title': 'name',
'description': 'description',
}))

View File

@ -0,0 +1,136 @@
import re
from .common import InfoExtractor
from ..utils import (
filter_dict,
int_or_none,
parse_age_limit,
smuggle_url,
traverse_obj,
unsmuggle_url,
url_or_none,
)
class CineverseBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https://www\.(?P<host>%s)' % '|'.join(map(re.escape, (
'cineverse.com',
'asiancrush.com',
'dovechannel.com',
'screambox.com',
'midnightpulp.com',
'fandor.com',
'retrocrush.tv',
)))
class CineverseIE(CineverseBaseIE):
_VALID_URL = rf'{CineverseBaseIE._VALID_URL_BASE}/watch/(?P<id>[A-Z0-9]+)'
_TESTS = [{
'url': 'https://www.asiancrush.com/watch/DMR00018919/Women-Who-Flirt',
'skip': 'geo-blocked',
'info_dict': {
'title': 'Women Who Flirt',
'ext': 'mp4',
'id': 'DMR00018919',
'modified_timestamp': 1678744575289,
'cast': ['Xun Zhou', 'Xiaoming Huang', 'Yi-Lin Sie', 'Sonia Sui', 'Quniciren'],
'duration': 5811.597,
'description': 'md5:892fd62a05611d394141e8394ace0bc6',
'age_limit': 13,
}
}, {
'url': 'https://www.retrocrush.tv/watch/1000000023016/Archenemy! Crystal Bowie',
'skip': 'geo-blocked',
'info_dict': {
'title': 'Archenemy! Crystal Bowie',
'ext': 'mp4',
'id': '1000000023016',
'episode_number': 3,
'season_number': 1,
'cast': ['Nachi Nozawa', 'Yoshiko Sakakibara', 'Toshiko Fujita'],
'age_limit': 0,
'episode': 'Episode 3',
'season': 'Season 1',
'duration': 1485.067,
'description': 'Cobra meets a beautiful bounty hunter by the name of Jane Royal.',
'series': 'Space Adventure COBRA (Original Japanese)',
}
}]
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, default={})
self._initialize_geo_bypass({
'countries': smuggled_data.get('geo_countries'),
})
video_id = self._match_id(url)
html = self._download_webpage(url, video_id)
idetails = self._search_nextjs_data(html, video_id)['props']['pageProps']['idetails']
if idetails.get('err_code') == 1200:
self.raise_geo_restricted(
'This video is not available from your location due to geo restriction. '
'You may be able to bypass it by using the /details/ page instead of the /watch/ page',
countries=smuggled_data.get('geo_countries'))
return {
'subtitles': filter_dict({
'en': traverse_obj(idetails, (('cc_url_vtt', 'subtitle_url'), {'url': {url_or_none}})) or None,
}),
'formats': self._extract_m3u8_formats(idetails['url'], video_id),
**traverse_obj(idetails, {
'title': 'title',
'id': ('details', 'item_id'),
'description': ('details', 'description'),
'duration': ('duration', {lambda x: x / 1000}),
'cast': ('details', 'cast', {lambda x: x.split(', ')}),
'modified_timestamp': ('details', 'updated_by', 0, 'update_time', 'time', {int_or_none}),
'season_number': ('details', 'season', {int_or_none}),
'episode_number': ('details', 'episode', {int_or_none}),
'age_limit': ('details', 'rating_code', {parse_age_limit}),
'series': ('details', 'series_details', 'title'),
}),
}
class CineverseDetailsIE(CineverseBaseIE):
_VALID_URL = rf'{CineverseBaseIE._VALID_URL_BASE}/details/(?P<id>[A-Z0-9]+)'
_TESTS = [{
'url': 'https://www.retrocrush.tv/details/1000000023012/Space-Adventure-COBRA-(Original-Japanese)',
'playlist_mincount': 30,
'info_dict': {
'title': 'Space Adventure COBRA (Original Japanese)',
'id': '1000000023012',
}
}, {
'url': 'https://www.asiancrush.com/details/NNVG4938/Hansel-and-Gretel',
'info_dict': {
'id': 'NNVG4938',
'ext': 'mp4',
'title': 'Hansel and Gretel',
'description': 'md5:e3e4c35309c2e82aee044f972c2fb05d',
'cast': ['Jeong-myeong Cheon', 'Eun Won-jae', 'Shim Eun-gyeong', 'Ji-hee Jin', 'Hee-soon Park', 'Lydia Park', 'Kyeong-ik Kim'],
'duration': 7030.732,
},
}]
def _real_extract(self, url):
host, series_id = self._match_valid_url(url).group('host', 'id')
html = self._download_webpage(url, series_id)
pageprops = self._search_nextjs_data(html, series_id)['props']['pageProps']
geo_countries = traverse_obj(pageprops, ('itemDetailsData', 'geo_country', {lambda x: x.split(', ')}))
geoblocked = traverse_obj(pageprops, (
'itemDetailsData', 'playback_err_msg')) == 'This title is not available in your location.'
def item_result(item):
item_url = f'https://www.{host}/watch/{item["item_id"]}/{item["title"]}'
if geoblocked:
item_url = smuggle_url(item_url, {'geo_countries': geo_countries})
return self.url_result(item_url, CineverseIE)
season = traverse_obj(pageprops, ('seasonEpisodes', ..., 'episodes', lambda _, v: v['item_id'] and v['title']))
if season:
return self.playlist_result([item_result(ep) for ep in season], playlist_id=series_id,
playlist_title=traverse_obj(pageprops, ('itemDetailsData', 'title')))
return item_result(pageprops['itemDetailsData'])

View File

@ -1687,7 +1687,7 @@ class InfoExtractor:
def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
rectx = re.escape(context_name)
FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){(?:.*?)return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
js, arg_keys, arg_vals = self._search_regex(
(rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'),
webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),

View File

@ -1,31 +1,72 @@
import time
import hashlib
import re
import urllib
import uuid
from .common import InfoExtractor
from .openload import PhantomJSwrapper
from ..utils import (
ExtractorError,
UserNotLive,
determine_ext,
int_or_none,
js_to_json,
parse_resolution,
str_or_none,
traverse_obj,
unescapeHTML,
unified_strdate,
url_or_none,
urlencode_postdata,
urljoin,
)
class DouyuTVIE(InfoExtractor):
IE_DESC = '斗鱼'
class DouyuBaseIE(InfoExtractor):
def _download_cryptojs_md5(self, video_id):
for url in [
'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
'https://cdn.bootcdn.net/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
]:
js_code = self._download_webpage(
url, video_id, note='Downloading signing dependency', fatal=False)
if js_code:
self.cache.store('douyu', 'crypto-js-md5', js_code)
return js_code
raise ExtractorError('Unable to download JS dependency (crypto-js/md5)')
def _get_cryptojs_md5(self, video_id):
return self.cache.load('douyu', 'crypto-js-md5') or self._download_cryptojs_md5(video_id)
def _calc_sign(self, sign_func, video_id, a):
b = uuid.uuid4().hex
c = round(time.time())
js_script = f'{self._get_cryptojs_md5(video_id)};{sign_func};console.log(ub98484234("{a}","{b}","{c}"))'
phantom = PhantomJSwrapper(self)
result = phantom.execute(js_script, video_id,
note='Executing JS signing script').strip()
return {i: v[0] for i, v in urllib.parse.parse_qs(result).items()}
def _search_js_sign_func(self, webpage, fatal=True):
# The greedy look-behind ensures last possible script tag is matched
return self._search_regex(
r'(?:<script.*)?<script[^>]*>(.*?ub98484234.*?)</script>', webpage, 'JS sign func', fatal=fatal)
class DouyuTVIE(DouyuBaseIE):
IE_DESC = '斗鱼直播'
_VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(topic/\w+\?rid=|(?:[^/]+/))*(?P<id>[A-Za-z0-9]+)'
_TESTS = [{
'url': 'http://www.douyutv.com/iseven',
'url': 'https://www.douyu.com/pigff',
'info_dict': {
'id': '17732',
'display_id': 'iseven',
'ext': 'flv',
'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': r're:.*m7show@163\.com.*',
'thumbnail': r're:^https?://.*\.png',
'uploader': '7师傅',
'id': '24422',
'display_id': 'pigff',
'ext': 'mp4',
'title': 're:^【PIGFF】.* [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': r'≥15级牌子看鱼吧置顶帖进粉丝vx群',
'thumbnail': str,
'uploader': 'pigff',
'is_live': True,
'live_status': 'is_live',
},
'params': {
'skip_download': True,
@ -85,15 +126,43 @@ class DouyuTVIE(InfoExtractor):
'only_matching': True,
}]
def _get_sign_func(self, room_id, video_id):
return self._download_json(
f'https://www.douyu.com/swf_api/homeH5Enc?rids={room_id}', video_id,
note='Getting signing script')['data'][f'room{room_id}']
def _extract_stream_formats(self, stream_formats):
formats = []
for stream_info in traverse_obj(stream_formats, (..., 'data')):
stream_url = urljoin(
traverse_obj(stream_info, 'rtmp_url'), traverse_obj(stream_info, 'rtmp_live'))
if stream_url:
rate_id = traverse_obj(stream_info, ('rate', {int_or_none}))
rate_info = traverse_obj(stream_info, ('multirates', lambda _, v: v['rate'] == rate_id), get_all=False)
ext = determine_ext(stream_url)
formats.append({
'url': stream_url,
'format_id': str_or_none(rate_id),
'ext': 'mp4' if ext == 'm3u8' else ext,
'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
'quality': rate_id % -10000 if rate_id is not None else None,
**traverse_obj(rate_info, {
'format': ('name', {str_or_none}),
'tbr': ('bit', {int_or_none}),
}),
})
return formats
def _real_extract(self, url):
video_id = self._match_id(url)
if video_id.isdigit():
room_id = video_id
else:
page = self._download_webpage(url, video_id)
room_id = self._html_search_regex(
r'"room_id\\?"\s*:\s*(\d+),', page, 'room id')
webpage = self._download_webpage(url, video_id)
room_id = self._search_regex(r'\$ROOM\.room_id\s*=\s*(\d+)', webpage, 'room id')
if self._search_regex(r'"videoLoop"\s*:\s*(\d+)', webpage, 'loop', default='') == '1':
raise UserNotLive('The channel is auto-playing VODs', video_id=video_id)
if self._search_regex(r'\$ROOM\.show_status\s*=\s*(\d+)', webpage, 'status', default='') == '2':
raise UserNotLive(video_id=video_id)
# Grab metadata from API
params = {
@ -102,110 +171,136 @@ class DouyuTVIE(InfoExtractor):
'time': int(time.time()),
}
params['auth'] = hashlib.md5(
f'room/{video_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
room = self._download_json(
f'room/{room_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
room = traverse_obj(self._download_json(
f'http://www.douyutv.com/api/v1/room/{room_id}', video_id,
note='Downloading room info', query=params)['data']
note='Downloading room info', query=params, fatal=False), 'data')
# 1 = live, 2 = offline
if room.get('show_status') == '2':
raise ExtractorError('Live stream is offline', expected=True)
if traverse_obj(room, 'show_status') == '2':
raise UserNotLive(video_id=video_id)
video_url = urljoin('https://hls3-akm.douyucdn.cn/', self._search_regex(r'(live/.*)', room['hls_url'], 'URL'))
formats, subs = self._extract_m3u8_formats_and_subtitles(video_url, room_id)
js_sign_func = self._search_js_sign_func(webpage, fatal=False) or self._get_sign_func(room_id, video_id)
form_data = {
'rate': 0,
**self._calc_sign(js_sign_func, video_id, room_id),
}
stream_formats = [self._download_json(
f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
video_id, note="Downloading livestream format",
data=urlencode_postdata(form_data))]
title = unescapeHTML(room['room_name'])
description = room.get('show_details')
thumbnail = room.get('room_src')
uploader = room.get('nickname')
for rate_id in traverse_obj(stream_formats[0], ('data', 'multirates', ..., 'rate')):
if rate_id != traverse_obj(stream_formats[0], ('data', 'rate')):
form_data['rate'] = rate_id
stream_formats.append(self._download_json(
f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
video_id, note=f'Downloading livestream format {rate_id}',
data=urlencode_postdata(form_data)))
return {
'id': room_id,
'display_id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'formats': self._extract_stream_formats(stream_formats),
'is_live': True,
'subtitles': subs,
'formats': formats,
**traverse_obj(room, {
'display_id': ('url', {str}, {lambda i: i[1:]}),
'title': ('room_name', {unescapeHTML}),
'description': ('show_details', {str}),
'uploader': ('nickname', {str}),
'thumbnail': ('room_src', {url_or_none}),
})
}
class DouyuShowIE(InfoExtractor):
class DouyuShowIE(DouyuBaseIE):
_VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)'
_TESTS = [{
'url': 'https://v.douyu.com/show/rjNBdvnVXNzvE2yw',
'md5': '0c2cfd068ee2afe657801269b2d86214',
'url': 'https://v.douyu.com/show/mPyq7oVNe5Yv1gLY',
'info_dict': {
'id': 'rjNBdvnVXNzvE2yw',
'id': 'mPyq7oVNe5Yv1gLY',
'ext': 'mp4',
'title': '陈一发儿:砒霜 我有个室友系列04-01 22点场',
'duration': 7150.08,
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': '陈一发儿',
'uploader_id': 'XrZwYelr5wbK',
'uploader_url': 'https://v.douyu.com/author/XrZwYelr5wbK',
'upload_date': '20170402',
'title': '四川人小时候的味道“蒜苗回锅肉”,传统菜不能丢,要常做来吃',
'duration': 633,
'thumbnail': str,
'uploader': '美食作家王刚V',
'uploader_id': 'OVAO4NVx1m7Q',
'timestamp': 1661850002,
'upload_date': '20220830',
'view_count': int,
'tags': ['美食', '美食综合'],
},
}, {
'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw',
'only_matching': True,
}]
_FORMATS = {
'super': '原画',
'high': '超清',
'normal': '高清',
}
_QUALITIES = {
'super': -1,
'high': -2,
'normal': -3,
}
_RESOLUTIONS = {
'super': '1920x1080',
'high': '1280x720',
'normal': '852x480',
}
def _real_extract(self, url):
url = url.replace('vmobile.', 'v.')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
room_info = self._parse_json(self._search_regex(
r'var\s+\$ROOM\s*=\s*({.+});', webpage, 'room info'), video_id)
video_info = self._search_json(
r'<script>\s*window\.\$DATA\s*=', webpage,
'video info', video_id, transform_source=js_to_json)
video_info = None
js_sign_func = self._search_js_sign_func(webpage)
form_data = {
'vid': video_id,
**self._calc_sign(js_sign_func, video_id, video_info['ROOM']['point_id']),
}
url_info = self._download_json(
'https://v.douyu.com/api/stream/getStreamUrl', video_id,
data=urlencode_postdata(form_data), note="Downloading video formats")
for trial in range(5):
# Sometimes Douyu rejects our request. Let's try it more times
try:
video_info = self._download_json(
'https://vmobile.douyu.com/video/getInfo', video_id,
query={'vid': video_id},
headers={
'Referer': url,
'x-requested-with': 'XMLHttpRequest',
})
break
except ExtractorError:
self._sleep(1, video_id)
if not video_info:
raise ExtractorError('Can\'t fetch video info')
formats = self._extract_m3u8_formats(
video_info['data']['video_url'], video_id,
entry_protocol='m3u8_native', ext='mp4')
upload_date = unified_strdate(self._html_search_regex(
r'<em>上传时间:</em><span>([^<]+)</span>', webpage,
'upload date', fatal=False))
uploader = uploader_id = uploader_url = None
mobj = re.search(
r'(?m)<a[^>]+href="/author/([0-9a-zA-Z]+)".+?<strong[^>]+title="([^"]+)"',
webpage)
if mobj:
uploader_id, uploader = mobj.groups()
uploader_url = urljoin(url, '/author/' + uploader_id)
formats = []
for name, url in traverse_obj(url_info, ('data', 'thumb_video', {dict.items}, ...)):
video_url = traverse_obj(url, ('url', {url_or_none}))
if video_url:
ext = determine_ext(video_url)
formats.append({
'format': self._FORMATS.get(name),
'format_id': name,
'url': video_url,
'quality': self._QUALITIES.get(name),
'ext': 'mp4' if ext == 'm3u8' else ext,
'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
**parse_resolution(self._RESOLUTIONS.get(name))
})
else:
self.to_screen(
f'"{self._FORMATS.get(name, name)}" format may require logging in. {self._login_hint()}')
return {
'id': video_id,
'title': room_info['name'],
'formats': formats,
'duration': room_info.get('duration'),
'thumbnail': room_info.get('pic'),
'upload_date': upload_date,
'uploader': uploader,
'uploader_id': uploader_id,
'uploader_url': uploader_url,
**traverse_obj(video_info, ('DATA', {
'title': ('content', 'title', {str}),
'uploader': ('content', 'author', {str}),
'uploader_id': ('content', 'up_id', {str_or_none}),
'duration': ('content', 'video_duration', {int_or_none}),
'thumbnail': ('content', 'video_pic', {url_or_none}),
'timestamp': ('content', 'create_time', {int_or_none}),
'view_count': ('content', 'view_num', {int_or_none}),
'tags': ('videoTag', ..., 'tagName', {str}),
}))
}

96
yt_dlp/extractor/eplus.py Normal file
View File

@ -0,0 +1,96 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
try_call,
unified_timestamp,
)
class EplusIbIE(InfoExtractor):
IE_NAME = 'eplus:inbound'
IE_DESC = 'e+ (イープラス) overseas'
_VALID_URL = r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)'
_TESTS = [{
'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D',
'info_dict': {
'id': '354502-0001-002',
'title': 'LoveLive!Series Presents COUNTDOWN LoveLive! 2021→2022LIVE with a smile!【Streaming+(配信)】',
'live_status': 'was_live',
'release_date': '20211231',
'release_timestamp': 1640952000,
'description': str,
},
'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
'expected_warnings': [
'Could not find the playlist URL. This event may not be accessible',
'No video formats found!',
'Requested format is not available',
],
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
data_json = self._search_json(r'<script>\s*var app\s*=', webpage, 'data json', video_id)
delivery_status = data_json.get('delivery_status')
archive_mode = data_json.get('archive_mode')
release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
release_timestamp_str = data_json.get('event_datetime_text') # JST
self.write_debug(f'delivery_status = {delivery_status}, archive_mode = {archive_mode}')
if delivery_status == 'PREPARING':
live_status = 'is_upcoming'
elif delivery_status == 'STARTED':
live_status = 'is_live'
elif delivery_status == 'STOPPED':
if archive_mode != 'ON':
raise ExtractorError(
'This event has ended and there is no archive for this event', expected=True)
live_status = 'post_live'
elif delivery_status == 'WAIT_CONFIRM_ARCHIVED':
live_status = 'post_live'
elif delivery_status == 'CONFIRMED_ARCHIVE':
live_status = 'was_live'
else:
self.report_warning(f'Unknown delivery_status {delivery_status}, treat it as a live')
live_status = 'is_live'
formats = []
m3u8_playlist_urls = self._search_json(
r'var listChannels\s*=', webpage, 'hls URLs', video_id, contains_pattern=r'\[.+\]', default=[])
if not m3u8_playlist_urls:
if live_status == 'is_upcoming':
self.raise_no_formats(
f'Could not find the playlist URL. This live event will begin at {release_timestamp_str} JST', expected=True)
else:
self.raise_no_formats(
'Could not find the playlist URL. This event may not be accessible', expected=True)
elif live_status == 'is_upcoming':
self.raise_no_formats(f'This live event will begin at {release_timestamp_str} JST', expected=True)
elif live_status == 'post_live':
self.raise_no_formats('This event has ended, and the archive will be available shortly', expected=True)
else:
for m3u8_playlist_url in m3u8_playlist_urls:
formats.extend(self._extract_m3u8_formats(m3u8_playlist_url, video_id))
# FIXME: HTTP request headers need to be updated to continue download
warning = 'Due to technical limitations, the download will be interrupted after one hour'
if live_status == 'is_live':
self.report_warning(warning)
elif live_status == 'was_live':
self.report_warning(f'{warning}. You can restart to continue the download')
return {
'id': data_json['app_id'],
'title': data_json.get('app_name'),
'formats': formats,
'live_status': live_status,
'description': data_json.get('content'),
'release_timestamp': release_timestamp,
}

View File

@ -0,0 +1,63 @@
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
str_or_none,
traverse_obj,
url_or_none,
)
class ErocastIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?erocast\.me/track/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://erocast.me/track/9787/f',
'md5': 'af63b91f5f231096aba54dd682abea3b',
'info_dict': {
'id': '9787',
'title': '[F4M] Your roommate, who is definitely not possessed by an alien, suddenly wants to fuck you',
'url': 'https://erocast.s3.us-east-2.wasabisys.com/1220419/track.m3u8',
'ext': 'm4a',
'age_limit': 18,
'release_timestamp': 1696178652,
'release_date': '20231001',
'modified_timestamp': int,
'modified_date': str,
'description': 'ExtraTerrestrial Tuesday!',
'uploader': 'clarissaisshy',
'uploader_id': '8113',
'uploader_url': 'https://erocast.me/clarissaisshy',
'thumbnail': 'https://erocast.s3.us-east-2.wasabisys.com/1220418/conversions/1696179247-lg.jpg',
'duration': 2307,
'view_count': int,
'comment_count': int,
'webpage_url': 'https://erocast.me/track/9787/f4m-your-roommate-who-is-definitely-not-possessed-by-an-alien-suddenly-wants-to-fuck-you',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
data = self._search_json(
rf'<script>\s*var song_data_{video_id}\s*=', webpage, 'data', video_id, end_pattern=r'</script>')
return {
'id': video_id,
'formats': self._extract_m3u8_formats(
data.get('file_url') or data['stream_url'], video_id, 'm4a', m3u8_id='hls'),
'age_limit': 18,
**traverse_obj(data, {
'title': ('title', {str}),
'description': ('description', {str}),
'release_timestamp': ('created_at', {parse_iso8601}),
'modified_timestamp': ('updated_at', {parse_iso8601}),
'uploader': ('user', 'name', {str}),
'uploader_id': ('user', 'id', {str_or_none}),
'uploader_url': ('user', 'permalink_url', {url_or_none}),
'thumbnail': ('artwork_url', {url_or_none}),
'duration': ('duration', {int_or_none}),
'view_count': ('plays', {int_or_none}),
'comment_count': ('comment_count', {int_or_none}),
'webpage_url': ('permalink_url', {url_or_none}),
}),
}

View File

@ -11,8 +11,8 @@ class ExpressenIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:www\.)?(?:expressen|di)\.se/
(?:(?:tvspelare/video|videoplayer/embed)/)?
tv/(?:[^/]+/)*
(?:(?:tvspelare/video|video-?player/embed)/)?
(?:tv|nyheter)/(?:[^/?#]+/)*
(?P<id>[^/?#&]+)
'''
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1']
@ -42,6 +42,12 @@ class ExpressenIE(InfoExtractor):
}, {
'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
'only_matching': True,
}, {
'url': 'https://www.expressen.se/video-player/embed/tv/nyheter/ekero-fodda-olof-gustafsson-forvaltar-knarkbaronen-pablo-escobars-namn',
'only_matching': True,
}, {
'url': 'https://www.expressen.se/nyheter/efter-egna-telefonbluffen-escobar-stammer-klarna/',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -60,7 +60,7 @@ class GofileIE(InfoExtractor):
account_data = self._download_json(
'https://api.gofile.io/createAccount', None, note='Getting a new guest account')
self._TOKEN = account_data['data']['token']
self._set_cookie('gofile.io', 'accountToken', self._TOKEN)
self._set_cookie('.gofile.io', 'accountToken', self._TOKEN)
def _entries(self, file_id):
query_params = {

View File

@ -197,10 +197,6 @@ class IGNVideoIE(IGNBaseIE):
'thumbnail': 'https://sm.ign.com/ign_me/video/h/how-hitman/how-hitman-aims-to-be-different-than-every-other-s_8z14.jpg',
'duration': 298,
'tags': 'count:13',
'display_id': '112203',
'thumbnail': 'https://sm.ign.com/ign_me/video/h/how-hitman/how-hitman-aims-to-be-different-than-every-other-s_8z14.jpg',
'duration': 298,
'tags': 'count:13',
},
'expected_warnings': ['HTTP Error 400: Bad Request'],
}, {

View File

@ -134,10 +134,17 @@ class IPrimaIE(InfoExtractor):
), webpage, 'real id', group='id', default=None)
if not video_id:
nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data')
nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data', fatal=False)
video_id = traverse_obj(
nuxt_data, (..., 'content', 'additionals', 'videoPlayId', {str}), get_all=False)
if not video_id:
nuxt_data = self._search_json(
r'<script[^>]+\bid=["\']__NUXT_DATA__["\'][^>]*>',
webpage, 'nuxt data', None, end_pattern=r'</script>', contains_pattern=r'\[(?s:.+)\]')
video_id = traverse_obj(nuxt_data, lambda _, v: re.fullmatch(r'p\d+', v), get_all=False)
if not video_id:
self.raise_no_formats('Unable to extract video ID from webpage')

View File

@ -499,9 +499,10 @@ class IqIE(InfoExtractor):
'tm': tm,
'qdy': 'a',
'qds': 0,
'k_ft1': 141287244169348,
'k_ft4': 34359746564,
'k_ft5': 1,
'k_ft1': '143486267424900',
'k_ft4': '1572868',
'k_ft7': '4',
'k_ft5': '1',
'bop': JSON.stringify({
'version': '10.0',
'dfp': dfp
@ -529,14 +530,22 @@ class IqIE(InfoExtractor):
webpack_js_url = self._proto_relative_url(self._search_regex(
r'<script src="((?:https?:)?//stc\.iqiyipic\.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL'))
webpack_js = self._download_webpage(webpack_js_url, video_id, note='Downloading webpack JS', errnote='Unable to download webpack JS')
webpack_map = self._search_json(
r'["\']\s*\+\s*', webpack_js, 'JS locations', video_id,
contains_pattern=r'{\s*(?:\d+\s*:\s*["\'][\da-f]+["\']\s*,?\s*)+}',
end_pattern=r'\[\w+\]\+["\']\.js', transform_source=js_to_json)
replacement_map = self._search_json(
r'["\']\s*\+\(\s*', webpack_js, 'replacement map', video_id,
contains_pattern=r'{\s*(?:\d+\s*:\s*["\'][\w.-]+["\']\s*,?\s*)+}',
end_pattern=r'\[\w+\]\|\|\w+\)\+["\']\.', transform_source=js_to_json,
fatal=False) or {}
for module_index in reversed(webpack_map):
real_module = replacement_map.get(module_index) or module_index
module_js = self._download_webpage(
f'https://stc.iqiyipic.com/_next/static/chunks/{module_index}.{webpack_map[module_index]}.js',
f'https://stc.iqiyipic.com/_next/static/chunks/{real_module}.{webpack_map[module_index]}.js',
video_id, note=f'Downloading #{module_index} module JS', errnote='Unable to download module JS', fatal=False) or ''
if 'vms request' in module_js:
self.cache.store('iq', 'player_js', module_js)

View File

@ -22,10 +22,11 @@ from ..utils import (
class LBRYBaseIE(InfoExtractor):
_BASE_URL_REGEX = r'(?:https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/|lbry://)'
_BASE_URL_REGEX = r'(?x)(?:https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/|lbry://)'
_CLAIM_ID_REGEX = r'[0-9a-f]{1,40}'
_OPT_CLAIM_ID = '[^:/?#&]+(?:[:#]%s)?' % _CLAIM_ID_REGEX
_OPT_CLAIM_ID = '[^$@:/?#&]+(?:[:#]%s)?' % _CLAIM_ID_REGEX
_SUPPORTED_STREAM_TYPES = ['video', 'audio']
_PAGE_SIZE = 50
def _call_api_proxy(self, method, display_id, params, resource):
headers = {'Content-Type': 'application/json-rpc'}
@ -69,18 +70,78 @@ class LBRYBaseIE(InfoExtractor):
'duration': ('value', stream_type, 'duration', {int_or_none}),
'channel': ('signing_channel', 'value', 'title', {str}),
'channel_id': ('signing_channel', 'claim_id', {str}),
'uploader_id': ('signing_channel', 'name', {str}),
})
channel_name = traverse_obj(stream, ('signing_channel', 'name', {str}))
if channel_name and info.get('channel_id'):
info['channel_url'] = self._permanent_url(url, channel_name, info['channel_id'])
if info.get('uploader_id') and info.get('channel_id'):
info['channel_url'] = self._permanent_url(url, info['uploader_id'], info['channel_id'])
return info
def _fetch_page(self, display_id, url, params, page):
page += 1
page_params = {
'no_totals': True,
'page': page,
'page_size': self._PAGE_SIZE,
**params,
}
result = self._call_api_proxy(
'claim_search', display_id, page_params, f'page {page}')
for item in traverse_obj(result, ('items', lambda _, v: v['name'] and v['claim_id'])):
yield {
**self._parse_stream(item, url),
'_type': 'url',
'id': item['claim_id'],
'url': self._permanent_url(url, item['name'], item['claim_id']),
}
def _playlist_entries(self, url, display_id, claim_param, metadata):
qs = parse_qs(url)
content = qs.get('content', [None])[0]
params = {
'fee_amount': qs.get('fee_amount', ['>=0'])[0],
'order_by': {
'new': ['release_time'],
'top': ['effective_amount'],
'trending': ['trending_group', 'trending_mixed'],
}[qs.get('order', ['new'])[0]],
'claim_type': 'stream',
'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES,
**claim_param,
}
duration = qs.get('duration', [None])[0]
if duration:
params['duration'] = {
'long': '>=1200',
'short': '<=240',
}[duration]
language = qs.get('language', ['all'])[0]
if language != 'all':
languages = [language]
if language == 'en':
languages.append('none')
params['any_languages'] = languages
entries = OnDemandPagedList(
functools.partial(self._fetch_page, display_id, url, params),
self._PAGE_SIZE)
return self.playlist_result(
entries, display_id, **traverse_obj(metadata, ('value', {
'title': 'title',
'description': 'description',
})))
class LBRYIE(LBRYBaseIE):
IE_NAME = 'lbry'
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>\$/[^/]+/[^/]+/{1}|@{0}/{0}|(?!@){0})'.format(LBRYBaseIE._OPT_CLAIM_ID, LBRYBaseIE._CLAIM_ID_REGEX)
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'''
(?:\$/(?:download|embed)/)?
(?P<id>
[^$@:/?#]+/{LBRYBaseIE._CLAIM_ID_REGEX}
|(?:@{LBRYBaseIE._OPT_CLAIM_ID}/)?{LBRYBaseIE._OPT_CLAIM_ID}
)'''
_TESTS = [{
# Video
'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1',
@ -98,6 +159,7 @@ class LBRYIE(LBRYBaseIE):
'height': 720,
'thumbnail': 'https://spee.ch/7/67f2d809c263288c.png',
'license': 'None',
'uploader_id': '@Mantega',
'duration': 346,
'channel': 'LBRY/Odysee rats united!!!',
'channel_id': '1c8ad6a2ab4e889a71146ae4deeb23bb92dab627',
@ -131,6 +193,7 @@ class LBRYIE(LBRYBaseIE):
'vcodec': 'none',
'thumbnail': 'https://spee.ch/d/0bc63b0e6bf1492d.png',
'license': 'None',
'uploader_id': '@LBRYFoundation',
}
}, {
'url': 'https://odysee.com/@gardeningincanada:b/plants-i-will-never-grow-again.-the:e',
@ -149,6 +212,7 @@ class LBRYIE(LBRYBaseIE):
'channel': 'Gardening In Canada',
'channel_id': 'b8be0e93b423dad221abe29545fbe8ec36e806bc',
'channel_url': 'https://odysee.com/@gardeningincanada:b8be0e93b423dad221abe29545fbe8ec36e806bc',
'uploader_id': '@gardeningincanada',
'formats': 'mincount:3',
'thumbnail': 'https://thumbnails.lbry.com/AgHSc_HzrrE',
'license': 'Copyrighted (contact publisher)',
@ -174,6 +238,7 @@ class LBRYIE(LBRYBaseIE):
'formats': 'mincount:1',
'thumbnail': 'startswith:https://thumb',
'license': 'None',
'uploader_id': '@RT',
},
'params': {'skip_download': True}
}, {
@ -184,12 +249,13 @@ class LBRYIE(LBRYBaseIE):
'id': '41fbfe805eb73c8d3012c0c49faa0f563274f634',
'ext': 'mp4',
'title': 'Biotechnological Invasion of Skin (April 2023)',
'description': 'md5:709a2f4c07bd8891cda3a7cc2d6fcf5c',
'description': 'md5:fe28689db2cb7ba3436d819ac3ffc378',
'channel': 'Wicked Truths',
'channel_id': '23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0',
'channel_url': 'https://odysee.com/@wickedtruths:23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0',
'timestamp': 1685790036,
'upload_date': '20230603',
'uploader_id': '@wickedtruths',
'timestamp': 1695114347,
'upload_date': '20230919',
'release_timestamp': 1685617473,
'release_date': '20230601',
'duration': 1063,
@ -229,10 +295,10 @@ class LBRYIE(LBRYBaseIE):
def _real_extract(self, url):
display_id = self._match_id(url)
if display_id.startswith('$/'):
display_id = display_id.split('/', 2)[-1].replace('/', ':')
else:
if display_id.startswith('@'):
display_id = display_id.replace(':', '#')
else:
display_id = display_id.replace('/', ':')
display_id = urllib.parse.unquote(display_id)
uri = 'lbry://' + display_id
result = self._resolve_url(uri, display_id, 'stream')
@ -299,7 +365,7 @@ class LBRYIE(LBRYBaseIE):
class LBRYChannelIE(LBRYBaseIE):
IE_NAME = 'lbry:channel'
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>@%s)/?(?:[?&]|$)' % LBRYBaseIE._OPT_CLAIM_ID
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'(?P<id>@{LBRYBaseIE._OPT_CLAIM_ID})/?(?:[?&]|$)'
_TESTS = [{
'url': 'https://lbry.tv/@LBRYFoundation:0',
'info_dict': {
@ -315,65 +381,50 @@ class LBRYChannelIE(LBRYBaseIE):
'url': 'lbry://@lbry#3f',
'only_matching': True,
}]
_PAGE_SIZE = 50
def _fetch_page(self, claim_id, url, params, page):
page += 1
page_params = {
'channel_ids': [claim_id],
'claim_type': 'stream',
'no_totals': True,
'page': page,
'page_size': self._PAGE_SIZE,
}
page_params.update(params)
result = self._call_api_proxy(
'claim_search', claim_id, page_params, 'page %d' % page)
for item in (result.get('items') or []):
stream_claim_name = item.get('name')
stream_claim_id = item.get('claim_id')
if not (stream_claim_name and stream_claim_id):
continue
yield {
**self._parse_stream(item, url),
'_type': 'url',
'id': stream_claim_id,
'url': self._permanent_url(url, stream_claim_name, stream_claim_id),
}
def _real_extract(self, url):
display_id = self._match_id(url).replace(':', '#')
result = self._resolve_url(
'lbry://' + display_id, display_id, 'channel')
result = self._resolve_url(f'lbry://{display_id}', display_id, 'channel')
claim_id = result['claim_id']
qs = parse_qs(url)
content = qs.get('content', [None])[0]
params = {
'fee_amount': qs.get('fee_amount', ['>=0'])[0],
'order_by': {
'new': ['release_time'],
'top': ['effective_amount'],
'trending': ['trending_group', 'trending_mixed'],
}[qs.get('order', ['new'])[0]],
'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES,
}
duration = qs.get('duration', [None])[0]
if duration:
params['duration'] = {
'long': '>=1200',
'short': '<=240',
}[duration]
language = qs.get('language', ['all'])[0]
if language != 'all':
languages = [language]
if language == 'en':
languages.append('none')
params['any_languages'] = languages
entries = OnDemandPagedList(
functools.partial(self._fetch_page, claim_id, url, params),
self._PAGE_SIZE)
result_value = result.get('value') or {}
return self.playlist_result(
entries, claim_id, result_value.get('title'),
result_value.get('description'))
return self._playlist_entries(url, claim_id, {'channel_ids': [claim_id]}, result)
class LBRYPlaylistIE(LBRYBaseIE):
IE_NAME = 'lbry:playlist'
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'\$/(?:play)?list/(?P<id>[0-9a-f-]+)'
_TESTS = [{
'url': 'https://odysee.com/$/playlist/ffef782f27486f0ac138bde8777f72ebdd0548c2',
'info_dict': {
'id': 'ffef782f27486f0ac138bde8777f72ebdd0548c2',
'title': 'Théâtre Classique',
'description': 'Théâtre Classique',
},
'playlist_mincount': 4,
}, {
'url': 'https://odysee.com/$/list/9c6658b3dd21e4f2a0602d523a13150e2b48b770',
'info_dict': {
'id': '9c6658b3dd21e4f2a0602d523a13150e2b48b770',
'title': 'Social Media Exposed',
'description': 'md5:98af97317aacd5b85d595775ea37d80e',
},
'playlist_mincount': 34,
}, {
'url': 'https://odysee.com/$/playlist/938fb11d-215f-4d1c-ad64-723954df2184',
'info_dict': {
'id': '938fb11d-215f-4d1c-ad64-723954df2184',
},
'playlist_mincount': 1000,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
result = traverse_obj(self._call_api_proxy('claim_search', display_id, {
'claim_ids': [display_id],
'no_totals': True,
'page': 1,
'page_size': self._PAGE_SIZE,
}, 'playlist'), ('items', 0))
claim_param = {'claim_ids': traverse_obj(result, ('value', 'claims', ..., {str}))}
return self._playlist_entries(url, display_id, claim_param, result)

View File

@ -13,7 +13,7 @@ from ..utils import (
class LiTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)'
_URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?id=%s'
_URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?content_id=%s'
_TESTS = [{
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
@ -21,16 +21,18 @@ class LiTVIE(InfoExtractor):
'id': 'VOD00041606',
'title': '花千骨',
},
'playlist_count': 50,
'playlist_count': 51, # 50 episodes + 1 trailer
}, {
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
'md5': '969e343d9244778cb29acec608e53640',
'md5': 'b90ff1e9f1d8f5cfcd0a44c3e2b34c7a',
'info_dict': {
'id': 'VOD00041610',
'ext': 'mp4',
'title': '花千骨第1集',
'thumbnail': r're:https?://.*\.jpg$',
'description': 'md5:c7017aa144c87467c4fb2909c4b05d6f',
'description': '《花千骨》陸劇線上看。十六年前,平靜的村莊內,一名女嬰隨異相出生,途徑此地的蜀山掌門清虛道長算出此女命運非同一般,她體內散發的異香易招惹妖魔。一念慈悲下,他在村莊周邊設下結界阻擋妖魔入侵,讓其年滿十六後去蜀山,並賜名花千骨。',
'categories': ['奇幻', '愛情', '中國', '仙俠'],
'episode': 'Episode 1',
'episode_number': 1,
},
'params': {
@ -46,20 +48,17 @@ class LiTVIE(InfoExtractor):
'title': '芈月傳第1集 霸星芈月降世楚國',
'description': '楚威王二年,太史令唐昧夜觀星象,發現霸星即將現世。王后得知霸星的預言後,想盡辦法不讓孩子順利出生,幸得莒姬相護化解危機。沒想到眾人期待下出生的霸星卻是位公主,楚威王對此失望至極。楚王后命人將女嬰丟棄河中,居然奇蹟似的被少司命像攔下,楚威王認為此女非同凡響,為她取名芈月。',
},
'skip': 'Georestricted to Taiwan',
'skip': 'No longer exists',
}]
def _extract_playlist(self, season_list, video_id, program_info, prompt=True):
episode_title = program_info['title']
content_id = season_list['contentId']
def _extract_playlist(self, playlist_data, content_type):
all_episodes = [
self.url_result(smuggle_url(
self._URL_TEMPLATE % (program_info['contentType'], episode['contentId']),
self._URL_TEMPLATE % (content_type, episode['contentId']),
{'force_noplaylist': True})) # To prevent infinite recursion
for episode in season_list['episode']]
for episode in traverse_obj(playlist_data, ('seasons', ..., 'episode', lambda _, v: v['contentId']))]
return self.playlist_result(all_episodes, content_id, episode_title)
return self.playlist_result(all_episodes, playlist_data['contentId'], playlist_data.get('title'))
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
@ -68,24 +67,31 @@ class LiTVIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
if self._search_regex(
r'(?i)<meta\s[^>]*http-equiv="refresh"\s[^>]*content="[0-9]+;\s*url=https://www\.litv\.tv/"',
webpage, 'meta refresh redirect', default=False, group=0):
raise ExtractorError('No such content found', expected=True)
program_info = self._parse_json(self._search_regex(
r'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
video_id)
season_list = list(program_info.get('seasonList', {}).values())
playlist_id = traverse_obj(season_list, 0, 'contentId')
if self._yes_playlist(playlist_id, video_id, smuggled_data):
return self._extract_playlist(season_list[0], video_id, program_info)
# In browsers `getMainUrl` request is always issued. Usually this
# In browsers `getProgramInfo` request is always issued. Usually this
# endpoint gives the same result as the data embedded in the webpage.
# If georestricted, there are no embedded data, so an extra request is
# necessary to get the error code
# If, for some reason, there are no embedded data, we do an extra request.
if 'assetId' not in program_info:
program_info = self._download_json(
'https://www.litv.tv/vod/ajax/getProgramInfo', video_id,
query={'contentId': video_id},
headers={'Accept': 'application/json'})
series_id = program_info['seriesId']
if self._yes_playlist(series_id, video_id, smuggled_data):
playlist_data = self._download_json(
'https://www.litv.tv/vod/ajax/getSeriesTree', video_id,
query={'seriesId': series_id}, headers={'Accept': 'application/json'})
return self._extract_playlist(playlist_data, program_info['contentType'])
video_data = self._parse_json(self._search_regex(
r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
webpage, 'video data', default='{}'), video_id)
@ -96,7 +102,7 @@ class LiTVIE(InfoExtractor):
'contentType': program_info['contentType'],
}
video_data = self._download_json(
'https://www.litv.tv/vod/getMainUrl', video_id,
'https://www.litv.tv/vod/ajax/getMainUrlNoAuth', video_id,
data=json.dumps(payload).encode('utf-8'),
headers={'Content-Type': 'application/json'})

View File

@ -106,8 +106,12 @@ class MediaStreamIE(MediaStreamBaseIE):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
if 'Debido a tu ubicación no puedes ver el contenido' in webpage:
self.raise_geo_restricted()
for message in [
'Debido a tu ubicación no puedes ver el contenido',
'You are not allowed to watch this video: Geo Fencing Restriction'
]:
if message in webpage:
self.raise_geo_restricted()
player_config = self._search_json(r'window\.MDSTRM\.OPTIONS\s*=', webpage, 'metadata', video_id)

View File

@ -0,0 +1,79 @@
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
extract_attributes,
get_element_by_class,
get_element_html_by_class,
get_element_text_and_html_by_tag,
int_or_none,
unified_strdate,
strip_or_none,
traverse_obj,
try_call,
)
class MonstercatIE(InfoExtractor):
_VALID_URL = r'https://www\.monstercat\.com/release/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.monstercat.com/release/742779548009',
'playlist_count': 20,
'info_dict': {
'title': 'The Secret Language of Trees',
'id': '742779548009',
'thumbnail': 'https://www.monstercat.com/release/742779548009/cover',
'release_year': 2023,
'release_date': '20230711',
'album': 'The Secret Language of Trees',
'album_artist': 'BT',
}
}]
def _extract_tracks(self, table, album_meta):
for td in re.findall(r'<tr[^<]*>((?:(?!</tr>)[\w\W])+)', table): # regex by chatgpt due to lack of get_elements_by_tag
title = clean_html(try_call(
lambda: get_element_by_class('d-inline-flex flex-column', td).partition(' <span')[0]))
ids = extract_attributes(try_call(lambda: get_element_html_by_class('btn-play cursor-pointer mr-small', td)) or '')
track_id = ids.get('data-track-id')
release_id = ids.get('data-release-id')
track_number = int_or_none(try_call(lambda: get_element_by_class('py-xsmall', td)))
if not track_id or not release_id:
self.report_warning(f'Skipping track {track_number}, ID(s) not found')
self.write_debug(f'release_id={repr(release_id)} track_id={repr(track_id)}')
continue
yield {
**album_meta,
'title': title,
'track': title,
'track_number': track_number,
'artist': clean_html(try_call(lambda: get_element_by_class('d-block fs-xxsmall', td))),
'url': f'https://www.monstercat.com/api/release/{release_id}/track-stream/{track_id}',
'id': track_id,
'ext': 'mp3'
}
def _real_extract(self, url):
url_id = self._match_id(url)
html = self._download_webpage(url, url_id)
# wrap all `get_elements` in `try_call`, HTMLParser has problems with site's html
tracklist_table = try_call(lambda: get_element_by_class('table table-small', html)) or ''
title = try_call(lambda: get_element_text_and_html_by_tag('h1', html)[0])
date = traverse_obj(html, ({lambda html: get_element_by_class('font-italic mb-medium d-tablet-none d-phone-block',
html).partition('Released ')}, 2, {strip_or_none}, {unified_strdate}))
album_meta = {
'title': title,
'album': title,
'thumbnail': f'https://www.monstercat.com/release/{url_id}/cover',
'album_artist': try_call(
lambda: get_element_by_class('h-normal text-uppercase mb-desktop-medium mb-smallish', html)),
'release_year': int_or_none(date[:4]) if date else None,
'release_date': date,
}
return self.playlist_result(
self._extract_tracks(tracklist_table, album_meta), playlist_id=url_id, **album_meta)

View File

@ -127,7 +127,6 @@ class NebulaIE(NebulaBaseIE):
'channel_id': 'lindsayellis',
'uploader': 'Lindsay Ellis',
'uploader_id': 'lindsayellis',
'timestamp': 1533009600,
'uploader_url': 'https://nebula.tv/lindsayellis',
'series': 'Lindsay Ellis',
'display_id': 'that-time-disney-remade-beauty-and-the-beast',

View File

@ -2,105 +2,74 @@ import itertools
import json
import re
import time
from base64 import b64encode
from binascii import hexlify
from datetime import datetime
from hashlib import md5
from random import randint
from .common import InfoExtractor
from ..aes import aes_ecb_encrypt, pkcs7_padding
from ..compat import compat_urllib_parse_urlencode
from ..networking import Request
from ..utils import (
ExtractorError,
bytes_to_intlist,
error_to_compat_str,
float_or_none,
int_or_none,
intlist_to_bytes,
try_get,
join_nonempty,
str_or_none,
strftime_or_none,
traverse_obj,
unified_strdate,
url_or_none,
urljoin,
variadic,
)
class NetEaseMusicBaseIE(InfoExtractor):
_FORMATS = ['bMusic', 'mMusic', 'hMusic']
_NETEASE_SALT = '3go8&$8*3*3h0k(2)2'
_API_BASE = 'http://music.163.com/api/'
_GEO_BYPASS = False
@classmethod
def _encrypt(cls, dfsid):
salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8'))
string_bytes = bytearray(str(dfsid).encode('ascii'))
salt_len = len(salt_bytes)
for i in range(len(string_bytes)):
string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len]
m = md5()
m.update(bytes(string_bytes))
result = b64encode(m.digest()).decode('ascii')
return result.replace('/', '_').replace('+', '-')
@staticmethod
def kilo_or_none(value):
return int_or_none(value, scale=1000)
def make_player_api_request_data_and_headers(self, song_id, bitrate):
KEY = b'e82ckenh8dichen8'
URL = '/api/song/enhance/player/url'
now = int(time.time() * 1000)
rand = randint(0, 1000)
cookie = {
'osver': None,
'deviceId': None,
def _create_eapi_cipher(self, api_path, query_body, cookies):
request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':'))
message = f'nobody{api_path}use{request_text}md5forencrypt'.encode('latin1')
msg_digest = md5(message).hexdigest()
data = pkcs7_padding(list(str.encode(
f'{api_path}-36cd479b6b5-{request_text}-36cd479b6b5-{msg_digest}')))
encrypted = bytes(aes_ecb_encrypt(data, list(b'e82ckenh8dichen8')))
return f'params={encrypted.hex().upper()}'.encode()
def _download_eapi_json(self, path, video_id, query_body, headers={}, **kwargs):
cookies = {
'osver': 'undefined',
'deviceId': 'undefined',
'appver': '8.0.0',
'versioncode': '140',
'mobilename': None,
'mobilename': 'undefined',
'buildver': '1623435496',
'resolution': '1920x1080',
'__csrf': '',
'os': 'pc',
'channel': None,
'requestId': '{0}_{1:04}'.format(now, rand),
'channel': 'undefined',
'requestId': f'{int(time.time() * 1000)}_{randint(0, 1000):04}',
**traverse_obj(self._get_cookies(self._API_BASE), {
'MUSIC_U': ('MUSIC_U', {lambda i: i.value}),
})
}
request_text = json.dumps(
{'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie},
separators=(',', ':'))
message = 'nobody{0}use{1}md5forencrypt'.format(
URL, request_text).encode('latin1')
msg_digest = md5(message).hexdigest()
data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format(
URL, request_text, msg_digest)
data = pkcs7_padding(bytes_to_intlist(data))
encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY)))
encrypted_params = hexlify(encrypted).decode('ascii').upper()
cookie = '; '.join(
['{0}={1}'.format(k, v if v is not None else 'undefined')
for [k, v] in cookie.items()])
headers = {
'User-Agent': self.extractor.get_param('http_headers')['User-Agent'],
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': 'https://music.163.com',
'Cookie': cookie,
}
return ('params={0}'.format(encrypted_params), headers)
return self._download_json(
urljoin('https://interface3.music.163.com/', f'/eapi{path}'), video_id,
data=self._create_eapi_cipher(f'/api{path}', query_body, cookies), headers={
'Referer': 'https://music.163.com',
'Cookie': '; '.join([f'{k}={v}' for k, v in cookies.items()]),
**headers,
}, **kwargs)
def _call_player_api(self, song_id, bitrate):
url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
try:
msg = 'empty result'
result = self._download_json(
url, song_id, data=data.encode('ascii'), headers=headers)
if result:
return result
except ExtractorError as e:
if type(e.cause) in (ValueError, TypeError):
# JSON load failure
raise
except Exception as e:
msg = error_to_compat_str(e)
self.report_warning('%s API call (%s) failed: %s' % (
song_id, bitrate, msg))
return {}
return self._download_eapi_json(
'/song/enhance/player/url', song_id, {'ids': f'[{song_id}]', 'br': bitrate},
note=f'Downloading song URL info: bitrate {bitrate}')
def extract_formats(self, info):
err = 0
@ -110,45 +79,50 @@ class NetEaseMusicBaseIE(InfoExtractor):
details = info.get(song_format)
if not details:
continue
bitrate = int_or_none(details.get('bitrate')) or 999000
data = self._call_player_api(song_id, bitrate)
for song in try_get(data, lambda x: x['data'], list) or []:
song_url = try_get(song, lambda x: x['url'])
if not song_url:
continue
for song in traverse_obj(self._call_player_api(song_id, bitrate), ('data', lambda _, v: url_or_none(v['url']))):
song_url = song['url']
if self._is_valid_url(song_url, info['id'], 'song'):
formats.append({
'url': song_url,
'ext': details.get('extension'),
'abr': float_or_none(song.get('br'), scale=1000),
'format_id': song_format,
'filesize': int_or_none(song.get('size')),
'asr': int_or_none(details.get('sr')),
'asr': traverse_obj(details, ('sr', {int_or_none})),
**traverse_obj(song, {
'ext': ('type', {str}),
'abr': ('br', {self.kilo_or_none}),
'filesize': ('size', {int_or_none}),
}),
})
elif err == 0:
err = try_get(song, lambda x: x['code'], int)
err = traverse_obj(song, ('code', {int})) or 0
if not formats:
msg = 'No media links found'
if err != 0 and (err < 200 or err >= 400):
raise ExtractorError(
'%s (site code %d)' % (msg, err, ), expected=True)
raise ExtractorError(f'No media links found (site code {err})', expected=True)
else:
self.raise_geo_restricted(
msg + ': probably this video is not available from your location due to geo restriction.',
countries=['CN'])
'No media links found: probably due to geo restriction.', countries=['CN'])
return formats
@classmethod
def convert_milliseconds(cls, ms):
return int(round(ms / 1000.0))
def query_api(self, endpoint, video_id, note):
req = Request('%s%s' % (self._API_BASE, endpoint))
req.headers['Referer'] = self._API_BASE
return self._download_json(req, video_id, note)
result = self._download_json(
f'{self._API_BASE}{endpoint}', video_id, note, headers={'Referer': self._API_BASE})
code = traverse_obj(result, ('code', {int}))
message = traverse_obj(result, ('message', {str})) or ''
if code == -462:
self.raise_login_required(f'Login required to download: {message}')
elif code != 200:
raise ExtractorError(f'Failed to get meta info: {code} {message}')
return result
def _get_entries(self, songs_data, entry_keys=None, id_key='id', name_key='name'):
for song in traverse_obj(songs_data, (
*variadic(entry_keys, (str, bytes, dict, set)),
lambda _, v: int_or_none(v[id_key]) is not None)):
song_id = str(song[id_key])
yield self.url_result(
f'http://music.163.com/#/song?id={song_id}', NetEaseMusicIE,
song_id, traverse_obj(song, (name_key, {str})))
class NetEaseMusicIE(NetEaseMusicBaseIE):
@ -156,16 +130,18 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
IE_DESC = '网易云音乐'
_VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://music.163.com/#/song?id=32102397',
'md5': '3e909614ce09b1ccef4a3eb205441190',
'url': 'https://music.163.com/#/song?id=548648087',
'info_dict': {
'id': '32102397',
'id': '548648087',
'ext': 'mp3',
'title': 'Bad Blood',
'creator': 'Taylor Swift / Kendrick Lamar',
'upload_date': '20150516',
'timestamp': 1431792000,
'description': 'md5:25fc5f27e47aad975aa6d36382c7833c',
'title': '戒烟 (Live)',
'creator': '李荣浩 / 朱正廷 / 陈立农 / 尤长靖 / ONER灵超 / ONER木子洋 / 杨非同 / 陆定昊',
'timestamp': 1522944000,
'upload_date': '20180405',
'description': 'md5:3650af9ee22c87e8637cb2dde22a765c',
'subtitles': {'lyrics': [{'ext': 'lrc'}]},
"duration": 256,
'thumbnail': r're:^http.*\.jpg',
},
}, {
'note': 'No lyrics.',
@ -176,21 +152,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'title': 'Opus 28',
'creator': 'Dustin O\'Halloran',
'upload_date': '20080211',
'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4',
'timestamp': 1202745600,
},
}, {
'note': 'Has translated name.',
'url': 'http://music.163.com/#/song?id=22735043',
'info_dict': {
'id': '22735043',
'ext': 'mp3',
'title': '소원을 말해봐 (Genie)',
'creator': '少女时代',
'description': 'md5:79d99cc560e4ca97e0c4d86800ee4184',
'upload_date': '20100127',
'timestamp': 1264608000,
'alt_title': '说出愿望吧(Genie)',
'duration': 263,
'thumbnail': r're:^http.*\.jpg',
},
}, {
'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
@ -203,59 +167,99 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'upload_date': '19911130',
'timestamp': 691516800,
'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
'subtitles': {'lyrics': [{'ext': 'lrc'}]},
'duration': 268,
'alt_title': '伴唱:现代人乐队 合唱:总政歌舞团',
'thumbnail': r're:^http.*\.jpg',
},
}, {
'url': 'http://music.163.com/#/song?id=32102397',
'md5': '3e909614ce09b1ccef4a3eb205441190',
'info_dict': {
'id': '32102397',
'ext': 'mp3',
'title': 'Bad Blood',
'creator': 'Taylor Swift / Kendrick Lamar',
'upload_date': '20150516',
'timestamp': 1431792000,
'description': 'md5:21535156efb73d6d1c355f95616e285a',
'subtitles': {'lyrics': [{'ext': 'lrc'}]},
'duration': 199,
'thumbnail': r're:^http.*\.jpg',
},
'skip': 'Blocked outside Mainland China',
}, {
'note': 'Has translated name.',
'url': 'http://music.163.com/#/song?id=22735043',
'info_dict': {
'id': '22735043',
'ext': 'mp3',
'title': '소원을 말해봐 (Genie)',
'creator': '少女时代',
'upload_date': '20100127',
'timestamp': 1264608000,
'description': 'md5:03d1ffebec3139aa4bafe302369269c5',
'subtitles': {'lyrics': [{'ext': 'lrc'}]},
'duration': 229,
'alt_title': '说出愿望吧(Genie)',
'thumbnail': r're:^http.*\.jpg',
},
'skip': 'Blocked outside Mainland China',
}]
def _process_lyrics(self, lyrics_info):
original = lyrics_info.get('lrc', {}).get('lyric')
translated = lyrics_info.get('tlyric', {}).get('lyric')
original = traverse_obj(lyrics_info, ('lrc', 'lyric', {str}))
translated = traverse_obj(lyrics_info, ('tlyric', 'lyric', {str}))
if not original or original == '[99:00.00]纯音乐,请欣赏\n':
return None
if not translated:
return original
return {
'lyrics': [{'data': original, 'ext': 'lrc'}],
}
lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)'
original_ts_texts = re.findall(lyrics_expr, original)
translation_ts_dict = dict(
(time_stamp, text) for time_stamp, text in re.findall(lyrics_expr, translated)
)
lyrics = '\n'.join([
'%s%s / %s' % (time_stamp, text, translation_ts_dict.get(time_stamp, ''))
for time_stamp, text in original_ts_texts
])
return lyrics
translation_ts_dict = dict(re.findall(lyrics_expr, translated))
merged = '\n'.join(
join_nonempty(f'{timestamp}{text}', translation_ts_dict.get(timestamp, ''), delim=' / ')
for timestamp, text in original_ts_texts)
return {
'lyrics_merged': [{'data': merged, 'ext': 'lrc'}],
'lyrics': [{'data': original, 'ext': 'lrc'}],
'lyrics_translated': [{'data': translated, 'ext': 'lrc'}],
}
def _real_extract(self, url):
song_id = self._match_id(url)
params = {
'id': song_id,
'ids': '[%s]' % song_id
}
info = self.query_api(
'song/detail?' + compat_urllib_parse_urlencode(params),
song_id, 'Downloading song info')['songs'][0]
f'song/detail?id={song_id}&ids=%5B{song_id}%5D', song_id, 'Downloading song info')['songs'][0]
formats = self.extract_formats(info)
lyrics_info = self.query_api(
'song/lyric?id=%s&lv=-1&tv=-1' % song_id,
song_id, 'Downloading lyrics data')
lyrics = self._process_lyrics(lyrics_info)
alt_title = None
if info.get('transNames'):
alt_title = '/'.join(info.get('transNames'))
lyrics = self._process_lyrics(self.query_api(
f'song/lyric?id={song_id}&lv=-1&tv=-1', song_id, 'Downloading lyrics data'))
lyric_data = {
'description': traverse_obj(lyrics, (('lyrics_merged', 'lyrics'), 0, 'data'), get_all=False),
'subtitles': lyrics,
} if lyrics else {}
return {
'id': song_id,
'title': info['name'],
'alt_title': alt_title,
'creator': ' / '.join([artist['name'] for artist in info.get('artists', [])]),
'timestamp': self.convert_milliseconds(info.get('album', {}).get('publishTime')),
'thumbnail': info.get('album', {}).get('picUrl'),
'duration': self.convert_milliseconds(info.get('duration', 0)),
'description': lyrics,
'formats': formats,
'alt_title': '/'.join(traverse_obj(info, (('transNames', 'alias'), ...))) or None,
'creator': ' / '.join(traverse_obj(info, ('artists', ..., 'name'))) or None,
**lyric_data,
**traverse_obj(info, {
'title': ('name', {str}),
'timestamp': ('album', 'publishTime', {self.kilo_or_none}),
'thumbnail': ('album', 'picUrl', {url_or_none}),
'duration': ('duration', {self.kilo_or_none}),
}),
}
@ -263,31 +267,44 @@ class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:album'
IE_DESC = '网易云音乐 - 专辑'
_VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)'
_TEST = {
_TESTS = [{
'url': 'https://music.163.com/#/album?id=133153666',
'info_dict': {
'id': '133153666',
'title': '桃几的翻唱',
'upload_date': '20210913',
'description': '桃几2021年翻唱合集',
'thumbnail': r're:^http.*\.jpg',
},
'playlist_mincount': 13,
}, {
'url': 'http://music.163.com/#/album?id=220780',
'info_dict': {
'id': '220780',
'title': 'B\'day',
'title': 'B\'Day',
'upload_date': '20060904',
'description': 'md5:71a74e1d8f392d88cf1bbe48879ad0b0',
'thumbnail': r're:^http.*\.jpg',
},
'playlist_count': 23,
'skip': 'Blocked outside Mainland China',
}
}]
def _real_extract(self, url):
album_id = self._match_id(url)
webpage = self._download_webpage(f'https://music.163.com/album?id={album_id}', album_id)
info = self.query_api(
'album/%s?id=%s' % (album_id, album_id),
album_id, 'Downloading album data')['album']
name = info['name']
desc = info.get('description')
entries = [
self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
'NetEaseMusic', song['id'])
for song in info['songs']
]
return self.playlist_result(entries, album_id, name, desc)
songs = self._search_json(
r'<textarea[^>]+\bid="song-list-pre-data"[^>]*>', webpage, 'metainfo', album_id,
end_pattern=r'</textarea>', contains_pattern=r'\[(?s:.+)\]')
metainfo = {
'title': self._og_search_property('title', webpage, 'title', fatal=False),
'description': self._html_search_regex(
(rf'<div[^>]+\bid="album-desc-{suffix}"[^>]*>(.*?)</div>' for suffix in ('more', 'dot')),
webpage, 'description', flags=re.S, fatal=False),
'thumbnail': self._og_search_property('image', webpage, 'thumbnail', fatal=False),
'upload_date': unified_strdate(self._html_search_meta('music:release_date', webpage, 'date', fatal=False)),
}
return self.playlist_result(self._get_entries(songs), album_id, **metainfo)
class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
@ -299,10 +316,9 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
'url': 'http://music.163.com/#/artist?id=10559',
'info_dict': {
'id': '10559',
'title': '张惠妹 - aMEI;阿密特',
'title': '张惠妹 - aMEI;阿妹;阿密特',
},
'playlist_count': 50,
'skip': 'Blocked outside Mainland China',
}, {
'note': 'Singer has translated name.',
'url': 'http://music.163.com/#/artist?id=124098',
@ -311,28 +327,28 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
'title': '李昇基 - 이승기',
},
'playlist_count': 50,
'skip': 'Blocked outside Mainland China',
}, {
'note': 'Singer with both translated and alias',
'url': 'https://music.163.com/#/artist?id=159692',
'info_dict': {
'id': '159692',
'title': '初音ミク - 初音未来;Hatsune Miku',
},
'playlist_count': 50,
}]
def _real_extract(self, url):
singer_id = self._match_id(url)
info = self.query_api(
'artist/%s?id=%s' % (singer_id, singer_id),
singer_id, 'Downloading singer data')
f'artist/{singer_id}?id={singer_id}', singer_id, note='Downloading singer data')
name = info['artist']['name']
if info['artist']['trans']:
name = '%s - %s' % (name, info['artist']['trans'])
if info['artist']['alias']:
name = '%s - %s' % (name, ';'.join(info['artist']['alias']))
name = join_nonempty(
traverse_obj(info, ('artist', 'name', {str})),
join_nonempty(*traverse_obj(info, ('artist', ('trans', ('alias', ...)), {str})), delim=';'),
delim=' - ')
entries = [
self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
'NetEaseMusic', song['id'])
for song in info['hotSongs']
]
return self.playlist_result(entries, singer_id, name)
return self.playlist_result(self._get_entries(info, 'hotSongs'), singer_id, name)
class NetEaseMusicListIE(NetEaseMusicBaseIE):
@ -344,10 +360,28 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
'info_dict': {
'id': '79177352',
'title': 'Billboard 2007 Top 100',
'description': 'md5:12fd0819cab2965b9583ace0f8b7b022'
'description': 'md5:12fd0819cab2965b9583ace0f8b7b022',
'tags': ['欧美'],
'uploader': '浑然破灭',
'uploader_id': '67549805',
'timestamp': int,
'upload_date': r're:\d{8}',
},
'playlist_count': 99,
'skip': 'Blocked outside Mainland China',
'playlist_mincount': 95,
}, {
'note': 'Toplist/Charts sample',
'url': 'https://music.163.com/#/discover/toplist?id=60198',
'info_dict': {
'id': '60198',
'title': 're:美国Billboard榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
'description': '美国Billboard排行榜',
'tags': ['流行', '欧美', '榜单'],
'uploader': 'Billboard公告牌',
'uploader_id': '48171',
'timestamp': int,
'upload_date': r're:\d{8}',
},
'playlist_count': 100,
}, {
'note': 'Toplist/Charts sample',
'url': 'http://music.163.com/#/discover/toplist?id=3733003',
@ -363,64 +397,86 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
def _real_extract(self, url):
list_id = self._match_id(url)
info = self.query_api(
'playlist/detail?id=%s&lv=-1&tv=-1' % list_id,
list_id, 'Downloading playlist data')['result']
info = self._download_eapi_json(
'/v3/playlist/detail', list_id,
{'id': list_id, 't': '-1', 'n': '500', 's': '0'},
note="Downloading playlist info")
name = info['name']
desc = info.get('description')
metainfo = traverse_obj(info, ('playlist', {
'title': ('name', {str}),
'description': ('description', {str}),
'tags': ('tags', ..., {str}),
'uploader': ('creator', 'nickname', {str}),
'uploader_id': ('creator', 'userId', {str_or_none}),
'timestamp': ('updateTime', {self.kilo_or_none}),
}))
if traverse_obj(info, ('playlist', 'specialType')) == 10:
metainfo['title'] = f'{metainfo.get("title")} {strftime_or_none(metainfo.get("timestamp"), "%Y-%m-%d")}'
if info.get('specialType') == 10: # is a chart/toplist
datestamp = datetime.fromtimestamp(
self.convert_milliseconds(info['updateTime'])).strftime('%Y-%m-%d')
name = '%s %s' % (name, datestamp)
entries = [
self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
'NetEaseMusic', song['id'])
for song in info['tracks']
]
return self.playlist_result(entries, list_id, name, desc)
return self.playlist_result(self._get_entries(info, ('playlist', 'tracks')), list_id, **metainfo)
class NetEaseMusicMvIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:mv'
IE_DESC = '网易云音乐 - MV'
_VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)'
_TEST = {
_TESTS = [{
'url': 'https://music.163.com/#/mv?id=10958064',
'info_dict': {
'id': '10958064',
'ext': 'mp4',
'title': '交换余生',
'description': 'md5:e845872cff28820642a2b02eda428fea',
'creator': '林俊杰',
'upload_date': '20200916',
'thumbnail': r're:http.*\.jpg',
'duration': 364,
'view_count': int,
'like_count': int,
'comment_count': int,
},
}, {
'url': 'http://music.163.com/#/mv?id=415350',
'info_dict': {
'id': '415350',
'ext': 'mp4',
'title': '이럴거면 그러지말지',
'description': '白雅言自作曲唱甜蜜爱情',
'creator': '白雅言',
'creator': '娥娟',
'upload_date': '20150520',
'thumbnail': r're:http.*\.jpg',
'duration': 216,
'view_count': int,
'like_count': int,
'comment_count': int,
},
'skip': 'Blocked outside Mainland China',
}
}]
def _real_extract(self, url):
mv_id = self._match_id(url)
info = self.query_api(
'mv/detail?id=%s&type=mp4' % mv_id,
mv_id, 'Downloading mv info')['data']
f'mv/detail?id={mv_id}&type=mp4', mv_id, 'Downloading mv info')['data']
formats = [
{'url': mv_url, 'ext': 'mp4', 'format_id': '%sp' % brs, 'height': int(brs)}
{'url': mv_url, 'ext': 'mp4', 'format_id': f'{brs}p', 'height': int_or_none(brs)}
for brs, mv_url in info['brs'].items()
]
return {
'id': mv_id,
'title': info['name'],
'description': info.get('desc') or info.get('briefDesc'),
'creator': info['artistName'],
'upload_date': info['publishTime'].replace('-', ''),
'formats': formats,
'thumbnail': info.get('cover'),
'duration': self.convert_milliseconds(info.get('duration', 0)),
**traverse_obj(info, {
'title': ('name', {str}),
'description': (('desc', 'briefDesc'), {str}, {lambda x: x or None}),
'creator': ('artistName', {str}),
'upload_date': ('publishTime', {unified_strdate}),
'thumbnail': ('cover', {url_or_none}),
'duration': ('duration', {self.kilo_or_none}),
'view_count': ('playCount', {int_or_none}),
'like_count': ('likeCount', {int_or_none}),
'comment_count': ('commentCount', {int_or_none}),
}, get_all=False),
}
@ -431,75 +487,74 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
_TESTS = [{
'url': 'http://music.163.com/#/program?id=10109055',
'info_dict': {
'id': '10109055',
'id': '32593346',
'ext': 'mp3',
'title': '不丹足球背后的故事',
'description': '喜马拉雅人的足球梦 ...',
'creator': '大话西藏',
'timestamp': 1434179342,
'timestamp': 1434179287,
'upload_date': '20150613',
'thumbnail': r're:http.*\.jpg',
'duration': 900,
},
'skip': 'Blocked outside Mainland China',
}, {
'note': 'This program has accompanying songs.',
'url': 'http://music.163.com/#/program?id=10141022',
'info_dict': {
'id': '10141022',
'title': '25岁你是自在如风的少年<27°C>',
'title': '滚滚电台的有声节目',
'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
'creator': '滚滚电台ORZ',
'timestamp': 1434450733,
'upload_date': '20150616',
'thumbnail': r're:http.*\.jpg',
},
'playlist_count': 4,
'skip': 'Blocked outside Mainland China',
}, {
'note': 'This program has accompanying songs.',
'url': 'http://music.163.com/#/program?id=10141022',
'info_dict': {
'id': '10141022',
'id': '32647209',
'ext': 'mp3',
'title': '25岁你是自在如风的少年<27°C>',
'title': '滚滚电台的有声节目',
'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
'timestamp': 1434450841,
'creator': '滚滚电台ORZ',
'timestamp': 1434450733,
'upload_date': '20150616',
'thumbnail': r're:http.*\.jpg',
'duration': 1104,
},
'params': {
'noplaylist': True
},
'skip': 'Blocked outside Mainland China',
}]
def _real_extract(self, url):
program_id = self._match_id(url)
info = self.query_api(
'dj/program/detail?id=%s' % program_id,
program_id, 'Downloading program info')['program']
f'dj/program/detail?id={program_id}', program_id, note='Downloading program info')['program']
name = info['name']
description = info['description']
metainfo = traverse_obj(info, {
'title': ('name', {str}),
'description': ('description', {str}),
'creator': ('dj', 'brand', {str}),
'thumbnail': ('coverUrl', {url_or_none}),
'timestamp': ('createTime', {self.kilo_or_none}),
})
if not self._yes_playlist(info['songs'] and program_id, info['mainSong']['id']):
formats = self.extract_formats(info['mainSong'])
return {
'id': info['mainSong']['id'],
'title': name,
'description': description,
'creator': info['dj']['brand'],
'timestamp': self.convert_milliseconds(info['createTime']),
'thumbnail': info['coverUrl'],
'duration': self.convert_milliseconds(info.get('duration', 0)),
'id': str(info['mainSong']['id']),
'formats': formats,
'duration': traverse_obj(info, ('mainSong', 'duration', {self.kilo_or_none})),
**metainfo,
}
song_ids = [info['mainSong']['id']]
song_ids.extend([song['id'] for song in info['songs']])
entries = [
self.url_result('http://music.163.com/#/song?id=%s' % song_id,
'NetEaseMusic', song_id)
for song_id in song_ids
]
return self.playlist_result(entries, program_id, name, description)
songs = traverse_obj(info, (('mainSong', ('songs', ...)),))
return self.playlist_result(self._get_entries(songs), program_id, **metainfo)
class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
@ -511,38 +566,32 @@ class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
'info_dict': {
'id': '42',
'title': '声音蔓延',
'description': 'md5:766220985cbd16fdd552f64c578a6b15'
'description': 'md5:c7381ebd7989f9f367668a5aee7d5f08'
},
'playlist_mincount': 40,
'skip': 'Blocked outside Mainland China',
}
_PAGE_SIZE = 1000
def _real_extract(self, url):
dj_id = self._match_id(url)
name = None
desc = None
metainfo = {}
entries = []
for offset in itertools.count(start=0, step=self._PAGE_SIZE):
info = self.query_api(
'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d'
% (self._PAGE_SIZE, dj_id, offset),
dj_id, 'Downloading dj programs - %d' % offset)
f'dj/program/byradio?asc=false&limit={self._PAGE_SIZE}&radioId={dj_id}&offset={offset}',
dj_id, note=f'Downloading dj programs - {offset}')
entries.extend([
self.url_result(
'http://music.163.com/#/program?id=%s' % program['id'],
'NetEaseMusicProgram', program['id'])
for program in info['programs']
])
if name is None:
radio = info['programs'][0]['radio']
name = radio['name']
desc = radio['desc']
entries.extend(self.url_result(
f'http://music.163.com/#/program?id={program["id"]}', NetEaseMusicProgramIE,
program['id'], program.get('name')) for program in info['programs'])
if not metainfo:
metainfo = traverse_obj(info, ('programs', 0, 'radio', {
'title': ('name', {str}),
'description': ('desc', {str}),
}))
if not info['more']:
break
return self.playlist_result(entries, dj_id, name, desc)
return self.playlist_result(entries, dj_id, **metainfo)

View File

@ -64,6 +64,85 @@ class NFLBaseIE(InfoExtractor):
_VIDEO_CONFIG_REGEX = r'<script[^>]+id="[^"]*video-config-[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}[^"]*"[^>]*>\s*({.+});?\s*</script>'
_ANVATO_PREFIX = 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:'
_CLIENT_DATA = {
'clientKey': '4cFUW6DmwJpzT9L7LrG3qRAcABG5s04g',
'clientSecret': 'CZuvCL49d9OwfGsR',
'deviceId': str(uuid.uuid4()),
'deviceInfo': base64.b64encode(json.dumps({
'model': 'desktop',
'version': 'Chrome',
'osName': 'Windows',
'osVersion': '10.0',
}, separators=(',', ':')).encode()).decode(),
'networkType': 'other',
'nflClaimGroupsToAdd': [],
'nflClaimGroupsToRemove': [],
}
_ACCOUNT_INFO = {}
_API_KEY = None
_TOKEN = None
_TOKEN_EXPIRY = 0
def _get_account_info(self, url, slug):
if not self._API_KEY:
webpage = self._download_webpage(url, slug, fatal=False) or ''
self._API_KEY = self._search_regex(
r'window\.gigyaApiKey\s*=\s*["\'](\w+)["\'];', webpage, 'API key',
fatal=False) or '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f'
cookies = self._get_cookies('https://auth-id.nfl.com/')
login_token = traverse_obj(cookies, (
(f'glt_{self._API_KEY}', lambda k, _: k.startswith('glt_')), {lambda x: x.value}), get_all=False)
if not login_token:
self.raise_login_required()
if 'ucid' not in cookies:
raise ExtractorError(
'Required cookies for the auth-id.nfl.com domain were not found among passed cookies. '
'If using --cookies, these cookies must be exported along with .nfl.com cookies, '
'or else try using --cookies-from-browser instead', expected=True)
account = self._download_json(
'https://auth-id.nfl.com/accounts.getAccountInfo', slug,
note='Downloading account info', data=urlencode_postdata({
'include': 'profile,data',
'lang': 'en',
'APIKey': self._API_KEY,
'sdk': 'js_latest',
'login_token': login_token,
'authMode': 'cookie',
'pageURL': url,
'sdkBuild': traverse_obj(cookies, (
'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='15170'),
'format': 'json',
}), headers={'Content-Type': 'application/x-www-form-urlencoded'})
self._ACCOUNT_INFO = traverse_obj(account, {
'signatureTimestamp': 'signatureTimestamp',
'uid': 'UID',
'uidSignature': 'UIDSignature',
})
if len(self._ACCOUNT_INFO) != 3:
raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True)
def _get_auth_token(self, url, slug):
if self._TOKEN and self._TOKEN_EXPIRY > int(time.time() + 30):
return
if not self._ACCOUNT_INFO:
self._get_account_info(url, slug)
token = self._download_json(
'https://api.nfl.com/identity/v3/token%s' % (
'/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''),
slug, headers={'Content-Type': 'application/json'}, note='Downloading access token',
data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode())
self._TOKEN = token['accessToken']
self._TOKEN_EXPIRY = token['expiresIn']
self._ACCOUNT_INFO['refreshToken'] = token['refreshToken']
def _parse_video_config(self, video_config, display_id):
video_config = self._parse_json(video_config, display_id)
item = video_config['playlist'][0]
@ -168,7 +247,7 @@ class NFLArticleIE(NFLBaseIE):
class NFLPlusReplayIE(NFLBaseIE):
IE_NAME = 'nfl.com:plus:replay'
_VALID_URL = r'https?://(?:www\.)?nfl.com/plus/games/[\w-]+/(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?nfl.com/plus/games/(?P<slug>[\w-]+)(?:/(?P<id>\d+))?'
_TESTS = [{
'url': 'https://www.nfl.com/plus/games/giants-at-vikings-2022-post-1/1572108',
'info_dict': {
@ -185,23 +264,92 @@ class NFLPlusReplayIE(NFLBaseIE):
'thumbnail': r're:^https?://.*\.jpg',
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'Subscription required',
'url': 'https://www.nfl.com/plus/games/giants-at-vikings-2022-post-1',
'playlist_count': 4,
'info_dict': {
'id': 'giants-at-vikings-2022-post-1',
},
}, {
'note': 'Subscription required',
'url': 'https://www.nfl.com/plus/games/giants-at-patriots-2011-pre-4',
'playlist_count': 2,
'info_dict': {
'id': 'giants-at-patriots-2011-pre-4',
},
}, {
'note': 'Subscription required',
'url': 'https://www.nfl.com/plus/games/giants-at-patriots-2011-pre-4',
'info_dict': {
'id': '950701',
'ext': 'mp4',
'title': 'Giants @ Patriots',
'description': 'Giants at Patriots on September 01, 2011',
'uploader': 'NFL',
'upload_date': '20210724',
'timestamp': 1627085874,
'duration': 1532,
'categories': ['Game Highlights'],
'tags': ['play-by-play'],
'thumbnail': r're:^https?://.*\.jpg',
},
'params': {
'skip_download': 'm3u8',
'extractor_args': {'nflplusreplay': {'type': ['condensed_game']}},
},
}]
_REPLAY_TYPES = {
'full_game': 'Full Game',
'full_game_spanish': 'Full Game - Spanish',
'condensed_game': 'Condensed Game',
'all_22': 'All-22',
}
def _real_extract(self, url):
video_id = self._match_id(url)
return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
slug, video_id = self._match_valid_url(url).group('slug', 'id')
requested_types = self._configuration_arg('type', ['all'])
if 'all' in requested_types:
requested_types = list(self._REPLAY_TYPES.keys())
requested_types = traverse_obj(self._REPLAY_TYPES, (None, requested_types))
if not video_id:
self._get_auth_token(url, slug)
headers = {'Authorization': f'Bearer {self._TOKEN}'}
game_id = self._download_json(
f'https://api.nfl.com/football/v2/games/externalId/slug/{slug}', slug,
'Downloading game ID', query={'withExternalIds': 'true'}, headers=headers)['id']
replays = self._download_json(
'https://api.nfl.com/content/v1/videos/replays', slug, 'Downloading replays JSON',
query={'gameId': game_id}, headers=headers)
if len(requested_types) == 1:
video_id = traverse_obj(replays, (
'items', lambda _, v: v['subType'] == requested_types[0], 'mcpPlaybackId'), get_all=False)
if video_id:
return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
def entries():
for replay in traverse_obj(
replays, ('items', lambda _, v: v['mcpPlaybackId'] and v['subType'] in requested_types)
):
video_id = replay['mcpPlaybackId']
yield self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
return self.playlist_result(entries(), slug)
class NFLPlusEpisodeIE(NFLBaseIE):
IE_NAME = 'nfl.com:plus:episode'
_VALID_URL = r'https?://(?:www\.)?nfl.com/plus/episodes/(?P<id>[\w-]+)'
_TESTS = [{
'note': 'premium content',
'note': 'Subscription required',
'url': 'https://www.nfl.com/plus/episodes/kurt-s-qb-insider-conference-championships',
'info_dict': {
'id': '1576832',
'ext': 'mp4',
'title': 'Kurt\'s QB Insider: Conference Championships',
'title': 'Conference Championships',
'description': 'md5:944f7fab56f7a37430bf8473f5473857',
'uploader': 'NFL',
'upload_date': '20230127',
@ -214,85 +362,9 @@ class NFLPlusEpisodeIE(NFLBaseIE):
'params': {'skip_download': 'm3u8'},
}]
_CLIENT_DATA = {
'clientKey': '4cFUW6DmwJpzT9L7LrG3qRAcABG5s04g',
'clientSecret': 'CZuvCL49d9OwfGsR',
'deviceId': str(uuid.uuid4()),
'deviceInfo': base64.b64encode(json.dumps({
'model': 'desktop',
'version': 'Chrome',
'osName': 'Windows',
'osVersion': '10.0',
}, separators=(',', ':')).encode()).decode(),
'networkType': 'other',
'nflClaimGroupsToAdd': [],
'nflClaimGroupsToRemove': [],
}
_ACCOUNT_INFO = {}
_API_KEY = None
_TOKEN = None
_TOKEN_EXPIRY = 0
def _get_account_info(self, url, video_id):
cookies = self._get_cookies('https://www.nfl.com/')
login_token = traverse_obj(cookies, (
(f'glt_{self._API_KEY}', f'gig_loginToken_{self._API_KEY}',
lambda k, _: k.startswith('glt_') or k.startswith('gig_loginToken_')),
{lambda x: x.value}), get_all=False)
if not login_token:
self.raise_login_required()
account = self._download_json(
'https://auth-id.nfl.com/accounts.getAccountInfo', video_id,
note='Downloading account info', data=urlencode_postdata({
'include': 'profile,data',
'lang': 'en',
'APIKey': self._API_KEY,
'sdk': 'js_latest',
'login_token': login_token,
'authMode': 'cookie',
'pageURL': url,
'sdkBuild': traverse_obj(cookies, (
'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='13642'),
'format': 'json',
}), headers={'Content-Type': 'application/x-www-form-urlencoded'})
self._ACCOUNT_INFO = traverse_obj(account, {
'signatureTimestamp': 'signatureTimestamp',
'uid': 'UID',
'uidSignature': 'UIDSignature',
})
if len(self._ACCOUNT_INFO) != 3:
raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True)
def _get_auth_token(self, url, video_id):
if not self._ACCOUNT_INFO:
self._get_account_info(url, video_id)
token = self._download_json(
'https://api.nfl.com/identity/v3/token%s' % (
'/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''),
video_id, headers={'Content-Type': 'application/json'}, note='Downloading access token',
data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode())
self._TOKEN = token['accessToken']
self._TOKEN_EXPIRY = token['expiresIn']
self._ACCOUNT_INFO['refreshToken'] = token['refreshToken']
def _real_extract(self, url):
slug = self._match_id(url)
if not self._API_KEY:
webpage = self._download_webpage(url, slug, fatal=False) or ''
self._API_KEY = self._search_regex(
r'window\.gigyaApiKey=["\'](\w+)["\'];', webpage, 'API key',
default='3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f')
if not self._TOKEN or self._TOKEN_EXPIRY <= int(time.time()):
self._get_auth_token(url, slug)
self._get_auth_token(url, slug)
video_id = self._download_json(
f'https://api.nfl.com/content/v1/videos/episodes/{slug}', slug, headers={
'Authorization': f'Bearer {self._TOKEN}',

View File

@ -28,6 +28,44 @@ class NhkBaseIE(InfoExtractor):
m_id, lang, '/all' if is_video else ''),
m_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'] or []
def _get_api_info(self, refresh=True):
if not refresh:
return self.cache.load('nhk', 'api_info')
self.cache.store('nhk', 'api_info', {})
movie_player_js = self._download_webpage(
'https://movie-a.nhk.or.jp/world/player/js/movie-player.js', None,
note='Downloading stream API information')
api_info = {
'url': self._search_regex(
r'prod:[^;]+\bapiUrl:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API url'),
'token': self._search_regex(
r'prod:[^;]+\btoken:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API token'),
}
self.cache.store('nhk', 'api_info', api_info)
return api_info
def _extract_formats_and_subtitles(self, vod_id):
for refresh in (False, True):
api_info = self._get_api_info(refresh)
if not api_info:
continue
api_url = api_info.pop('url')
stream_url = traverse_obj(
self._download_json(
api_url, vod_id, 'Downloading stream url info', fatal=False, query={
**api_info,
'type': 'json',
'optional_id': vod_id,
'active_flg': 1,
}),
('meta', 0, 'movie_url', ('mb_auto', 'auto_sp', 'auto_pc'), {url_or_none}), get_all=False)
if stream_url:
return self._extract_m3u8_formats_and_subtitles(stream_url, vod_id)
raise ExtractorError('Unable to extract stream url')
def _extract_episode_info(self, url, episode=None):
fetch_episode = episode is None
lang, m_type, episode_id = NhkVodIE._match_valid_url(url).groups()
@ -67,12 +105,14 @@ class NhkBaseIE(InfoExtractor):
}
if is_video:
vod_id = episode['vod_id']
formats, subs = self._extract_formats_and_subtitles(vod_id)
info.update({
'_type': 'url_transparent',
'ie_key': 'Piksel',
'url': 'https://movie-s.nhk.or.jp/v/refid/nhkworld/prefid/' + vod_id,
'id': vod_id,
'formats': formats,
'subtitles': subs,
})
else:
if fetch_episode:
audio_path = episode['audio']['audio']

View File

@ -0,0 +1,426 @@
import functools
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
OnDemandPagedList,
filter_dict,
int_or_none,
parse_qs,
str_or_none,
traverse_obj,
unified_timestamp,
url_or_none,
)
class NiconicoChannelPlusBaseIE(InfoExtractor):
_WEBPAGE_BASE_URL = 'https://nicochannel.jp'
def _call_api(self, path, item_id, *args, **kwargs):
return self._download_json(
f'https://nfc-api.nicochannel.jp/fc/{path}', video_id=item_id, *args, **kwargs)
def _find_fanclub_site_id(self, channel_name):
fanclub_list_json = self._call_api(
'content_providers/channels', item_id=f'channels/{channel_name}',
note='Fetching channel list', errnote='Unable to fetch channel list',
)['data']['content_providers']
fanclub_id = traverse_obj(fanclub_list_json, (
lambda _, v: v['domain'] == f'{self._WEBPAGE_BASE_URL}/{channel_name}', 'id'),
get_all=False)
if not fanclub_id:
raise ExtractorError(f'Channel {channel_name} does not exist', expected=True)
return fanclub_id
def _get_channel_base_info(self, fanclub_site_id):
return traverse_obj(self._call_api(
f'fanclub_sites/{fanclub_site_id}/page_base_info', item_id=f'fanclub_sites/{fanclub_site_id}',
note='Fetching channel base info', errnote='Unable to fetch channel base info', fatal=False,
), ('data', 'fanclub_site', {dict})) or {}
def _get_channel_user_info(self, fanclub_site_id):
return traverse_obj(self._call_api(
f'fanclub_sites/{fanclub_site_id}/user_info', item_id=f'fanclub_sites/{fanclub_site_id}',
note='Fetching channel user info', errnote='Unable to fetch channel user info', fatal=False,
data=json.dumps('null').encode('ascii'),
), ('data', 'fanclub_site', {dict})) or {}
class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE):
IE_NAME = 'NiconicoChannelPlus'
IE_DESC = 'ニコニコチャンネルプラス'
_VALID_URL = r'https?://nicochannel\.jp/(?P<channel>[\w.-]+)/(?:video|live)/(?P<code>sm\w+)'
_TESTS = [{
'url': 'https://nicochannel.jp/kaorin/video/smsDd8EdFLcVZk9yyAhD6H7H',
'info_dict': {
'id': 'smsDd8EdFLcVZk9yyAhD6H7H',
'title': '前田佳織里はニコ生がしたい!',
'ext': 'mp4',
'channel': '前田佳織里の世界攻略計画',
'channel_id': 'kaorin',
'channel_url': 'https://nicochannel.jp/kaorin',
'live_status': 'not_live',
'thumbnail': 'https://nicochannel.jp/public_html/contents/video_pages/74/thumbnail_path',
'description': '2021年11月に放送された\n「前田佳織里はニコ生がしたい!」アーカイブになります。',
'timestamp': 1641360276,
'duration': 4097,
'comment_count': int,
'view_count': int,
'tags': [],
'upload_date': '20220105',
},
'params': {
'skip_download': True,
},
}, {
# age limited video; test purpose channel.
'url': 'https://nicochannel.jp/testman/video/smDXbcrtyPNxLx9jc4BW69Ve',
'info_dict': {
'id': 'smDXbcrtyPNxLx9jc4BW69Ve',
'title': 'test oshiro',
'ext': 'mp4',
'channel': '本番チャンネルプラステストマン',
'channel_id': 'testman',
'channel_url': 'https://nicochannel.jp/testman',
'age_limit': 18,
'live_status': 'was_live',
'timestamp': 1666344616,
'duration': 86465,
'comment_count': int,
'view_count': int,
'tags': [],
'upload_date': '20221021',
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):
content_code, channel_id = self._match_valid_url(url).group('code', 'channel')
fanclub_site_id = self._find_fanclub_site_id(channel_id)
data_json = self._call_api(
f'video_pages/{content_code}', item_id=content_code, headers={'fc_use_device': 'null'},
note='Fetching video page info', errnote='Unable to fetch video page info',
)['data']['video_page']
live_status, session_id = self._get_live_status_and_session_id(content_code, data_json)
release_timestamp_str = data_json.get('live_scheduled_start_at')
formats = []
if live_status == 'is_upcoming':
if release_timestamp_str:
msg = f'This live event will begin at {release_timestamp_str} UTC'
else:
msg = 'This event has not started yet'
self.raise_no_formats(msg, expected=True, video_id=content_code)
else:
formats = self._extract_m3u8_formats(
# "authenticated_url" is a format string that contains "{session_id}".
m3u8_url=data_json['video_stream']['authenticated_url'].format(session_id=session_id),
video_id=content_code)
return {
'id': content_code,
'formats': formats,
'_format_sort_fields': ('tbr', 'vcodec', 'acodec'),
'channel': self._get_channel_base_info(fanclub_site_id).get('fanclub_site_name'),
'channel_id': channel_id,
'channel_url': f'{self._WEBPAGE_BASE_URL}/{channel_id}',
'age_limit': traverse_obj(self._get_channel_user_info(fanclub_site_id), ('content_provider', 'age_limit')),
'live_status': live_status,
'release_timestamp': unified_timestamp(release_timestamp_str),
**traverse_obj(data_json, {
'title': ('title', {str}),
'thumbnail': ('thumbnail_url', {url_or_none}),
'description': ('description', {str}),
'timestamp': ('released_at', {unified_timestamp}),
'duration': ('active_video_filename', 'length', {int_or_none}),
'comment_count': ('video_aggregate_info', 'number_of_comments', {int_or_none}),
'view_count': ('video_aggregate_info', 'total_views', {int_or_none}),
'tags': ('video_tags', ..., 'tag', {str}),
}),
'__post_extractor': self.extract_comments(
content_code=content_code,
comment_group_id=traverse_obj(data_json, ('video_comment_setting', 'comment_group_id'))),
}
def _get_comments(self, content_code, comment_group_id):
item_id = f'{content_code}/comments'
if not comment_group_id:
return None
comment_access_token = self._call_api(
f'video_pages/{content_code}/comments_user_token', item_id,
note='Getting comment token', errnote='Unable to get comment token',
)['data']['access_token']
comment_list = self._download_json(
'https://comm-api.sheeta.com/messages.history', video_id=item_id,
note='Fetching comments', errnote='Unable to fetch comments',
headers={'Content-Type': 'application/json'},
query={
'sort_direction': 'asc',
'limit': int_or_none(self._configuration_arg('max_comments', [''])[0]) or 120,
},
data=json.dumps({
'token': comment_access_token,
'group_id': comment_group_id,
}).encode('ascii'))
for comment in traverse_obj(comment_list, ...):
yield traverse_obj(comment, {
'author': ('nickname', {str}),
'author_id': ('sender_id', {str_or_none}),
'id': ('id', {str_or_none}),
'text': ('message', {str}),
'timestamp': (('updated_at', 'sent_at', 'created_at'), {unified_timestamp}),
'author_is_uploader': ('sender_id', {lambda x: x == '-1'}),
}, get_all=False)
def _get_live_status_and_session_id(self, content_code, data_json):
video_type = data_json.get('type')
live_finished_at = data_json.get('live_finished_at')
payload = {}
if video_type == 'vod':
if live_finished_at:
live_status = 'was_live'
else:
live_status = 'not_live'
elif video_type == 'live':
if not data_json.get('live_started_at'):
return 'is_upcoming', ''
if not live_finished_at:
live_status = 'is_live'
else:
live_status = 'was_live'
payload = {'broadcast_type': 'dvr'}
video_allow_dvr_flg = traverse_obj(data_json, ('video', 'allow_dvr_flg'))
video_convert_to_vod_flg = traverse_obj(data_json, ('video', 'convert_to_vod_flg'))
self.write_debug(f'allow_dvr_flg = {video_allow_dvr_flg}, convert_to_vod_flg = {video_convert_to_vod_flg}.')
if not (video_allow_dvr_flg and video_convert_to_vod_flg):
raise ExtractorError(
'Live was ended, there is no video for download.', video_id=content_code, expected=True)
else:
raise ExtractorError(f'Unknown type: {video_type}', video_id=content_code, expected=False)
self.write_debug(f'{content_code}: video_type={video_type}, live_status={live_status}')
session_id = self._call_api(
f'video_pages/{content_code}/session_ids', item_id=f'{content_code}/session',
data=json.dumps(payload).encode('ascii'), headers={
'Content-Type': 'application/json',
'fc_use_device': 'null',
'origin': 'https://nicochannel.jp',
},
note='Getting session id', errnote='Unable to get session id',
)['data']['session_id']
return live_status, session_id
class NiconicoChannelPlusChannelBaseIE(NiconicoChannelPlusBaseIE):
_PAGE_SIZE = 12
def _fetch_paged_channel_video_list(self, path, query, channel_name, item_id, page):
response = self._call_api(
path, item_id, query={
**query,
'page': (page + 1),
'per_page': self._PAGE_SIZE,
},
headers={'fc_use_device': 'null'},
note=f'Getting channel info (page {page + 1})',
errnote=f'Unable to get channel info (page {page + 1})')
for content_code in traverse_obj(response, ('data', 'video_pages', 'list', ..., 'content_code')):
# "video/{content_code}" works for both VOD and live, but "live/{content_code}" doesn't work for VOD
yield self.url_result(
f'{self._WEBPAGE_BASE_URL}/{channel_name}/video/{content_code}', NiconicoChannelPlusIE)
class NiconicoChannelPlusChannelVideosIE(NiconicoChannelPlusChannelBaseIE):
IE_NAME = 'NiconicoChannelPlus:channel:videos'
IE_DESC = 'ニコニコチャンネルプラス - チャンネル - 動画リスト. nicochannel.jp/channel/videos'
_VALID_URL = r'https?://nicochannel\.jp/(?P<id>[a-z\d\._-]+)/videos(?:\?.*)?'
_TESTS = [{
# query: None
'url': 'https://nicochannel.jp/testman/videos',
'info_dict': {
'id': 'testman-videos',
'title': '本番チャンネルプラステストマン-videos',
},
'playlist_mincount': 18,
}, {
# query: None
'url': 'https://nicochannel.jp/testtarou/videos',
'info_dict': {
'id': 'testtarou-videos',
'title': 'チャンネルプラステスト太郎-videos',
},
'playlist_mincount': 2,
}, {
# query: None
'url': 'https://nicochannel.jp/testjirou/videos',
'info_dict': {
'id': 'testjirou-videos',
'title': 'チャンネルプラステスト二郎-videos',
},
'playlist_mincount': 12,
}, {
# query: tag
'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8',
'info_dict': {
'id': 'testman-videos',
'title': '本番チャンネルプラステストマン-videos',
},
'playlist_mincount': 6,
}, {
# query: vodType
'url': 'https://nicochannel.jp/testman/videos?vodType=1',
'info_dict': {
'id': 'testman-videos',
'title': '本番チャンネルプラステストマン-videos',
},
'playlist_mincount': 18,
}, {
# query: sort
'url': 'https://nicochannel.jp/testman/videos?sort=-released_at',
'info_dict': {
'id': 'testman-videos',
'title': '本番チャンネルプラステストマン-videos',
},
'playlist_mincount': 18,
}, {
# query: tag, vodType
'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&vodType=1',
'info_dict': {
'id': 'testman-videos',
'title': '本番チャンネルプラステストマン-videos',
},
'playlist_mincount': 6,
}, {
# query: tag, sort
'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&sort=-released_at',
'info_dict': {
'id': 'testman-videos',
'title': '本番チャンネルプラステストマン-videos',
},
'playlist_mincount': 6,
}, {
# query: vodType, sort
'url': 'https://nicochannel.jp/testman/videos?vodType=1&sort=-released_at',
'info_dict': {
'id': 'testman-videos',
'title': '本番チャンネルプラステストマン-videos',
},
'playlist_mincount': 18,
}, {
# query: tag, vodType, sort
'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&vodType=1&sort=-released_at',
'info_dict': {
'id': 'testman-videos',
'title': '本番チャンネルプラステストマン-videos',
},
'playlist_mincount': 6,
}]
def _real_extract(self, url):
"""
API parameters:
sort:
-released_at 公開日が新しい順 (newest to oldest)
released_at 公開日が古い順 (oldest to newest)
-number_of_vod_views 再生数が多い順 (most play count)
number_of_vod_views コメントが多い順 (most comments)
vod_type (is "vodType" in "url"):
0 すべて (all)
1 会員限定 (members only)
2 一部無料 (partially free)
3 レンタル (rental)
4 生放送アーカイブ (live archives)
5 アップロード動画 (uploaded videos)
"""
channel_id = self._match_id(url)
fanclub_site_id = self._find_fanclub_site_id(channel_id)
channel_name = self._get_channel_base_info(fanclub_site_id).get('fanclub_site_name')
qs = parse_qs(url)
return self.playlist_result(
OnDemandPagedList(
functools.partial(
self._fetch_paged_channel_video_list, f'fanclub_sites/{fanclub_site_id}/video_pages',
filter_dict({
'tag': traverse_obj(qs, ('tag', 0)),
'sort': traverse_obj(qs, ('sort', 0), default='-released_at'),
'vod_type': traverse_obj(qs, ('vodType', 0), default='0'),
}),
channel_id, f'{channel_id}/videos'),
self._PAGE_SIZE),
playlist_id=f'{channel_id}-videos', playlist_title=f'{channel_name}-videos')
class NiconicoChannelPlusChannelLivesIE(NiconicoChannelPlusChannelBaseIE):
IE_NAME = 'NiconicoChannelPlus:channel:lives'
IE_DESC = 'ニコニコチャンネルプラス - チャンネル - ライブリスト. nicochannel.jp/channel/lives'
_VALID_URL = r'https?://nicochannel\.jp/(?P<id>[a-z\d\._-]+)/lives'
_TESTS = [{
'url': 'https://nicochannel.jp/testman/lives',
'info_dict': {
'id': 'testman-lives',
'title': '本番チャンネルプラステストマン-lives',
},
'playlist_mincount': 18,
}, {
'url': 'https://nicochannel.jp/testtarou/lives',
'info_dict': {
'id': 'testtarou-lives',
'title': 'チャンネルプラステスト太郎-lives',
},
'playlist_mincount': 2,
}, {
'url': 'https://nicochannel.jp/testjirou/lives',
'info_dict': {
'id': 'testjirou-lives',
'title': 'チャンネルプラステスト二郎-lives',
},
'playlist_mincount': 6,
}]
def _real_extract(self, url):
"""
API parameters:
live_type:
1 放送中 (on air)
2 放送予定 (scheduled live streams, oldest to newest)
3 過去の放送 - すべて (all ended live streams, newest to oldest)
4 過去の放送 - 生放送アーカイブ (all archives for live streams, oldest to newest)
We use "4" instead of "3" because some recently ended live streams could not be downloaded.
"""
channel_id = self._match_id(url)
fanclub_site_id = self._find_fanclub_site_id(channel_id)
channel_name = self._get_channel_base_info(fanclub_site_id).get('fanclub_site_name')
return self.playlist_result(
OnDemandPagedList(
functools.partial(
self._fetch_paged_channel_video_list, f'fanclub_sites/{fanclub_site_id}/live_pages',
{
'live_type': 4,
},
channel_id, f'{channel_id}/lives'),
self._PAGE_SIZE),
playlist_id=f'{channel_id}-lives', playlist_title=f'{channel_name}-lives')

View File

@ -6,7 +6,6 @@ from ..utils import (
determine_ext,
int_or_none,
js_to_json,
qualities,
traverse_obj,
unified_strdate,
url_or_none,
@ -49,77 +48,52 @@ class NovaEmbedIE(InfoExtractor):
duration = None
formats = []
player = self._parse_json(
self._search_regex(
(r'(?:(?:replacePlaceholders|processAdTagModifier).*?:\s*)?(?:replacePlaceholders|processAdTagModifier)\s*\(\s*(?P<json>{.*?})\s*\)(?:\s*\))?\s*,',
r'Player\.init\s*\([^,]+,(?P<cndn>\s*\w+\s*\?)?\s*(?P<json>{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)'),
webpage, 'player', default='{}', group='json'), video_id, fatal=False)
if player:
for format_id, format_list in player['tracks'].items():
if not isinstance(format_list, list):
format_list = [format_list]
for format_dict in format_list:
if not isinstance(format_dict, dict):
continue
if (not self.get_param('allow_unplayable_formats')
and traverse_obj(format_dict, ('drm', 'keySystem'))):
has_drm = True
continue
format_url = url_or_none(format_dict.get('src'))
format_type = format_dict.get('type')
ext = determine_ext(format_url)
if (format_type == 'application/x-mpegURL'
or format_id == 'HLS' or ext == 'm3u8'):
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls',
fatal=False))
elif (format_type == 'application/dash+xml'
or format_id == 'DASH' or ext == 'mpd'):
formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id='dash', fatal=False))
else:
formats.append({
'url': format_url,
})
duration = int_or_none(player.get('duration'))
else:
# Old path, not actual as of 08.04.2020
bitrates = self._parse_json(
self._search_regex(
r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
video_id, transform_source=js_to_json)
QUALITIES = ('lq', 'mq', 'hq', 'hd')
quality_key = qualities(QUALITIES)
for format_id, format_list in bitrates.items():
if not isinstance(format_list, list):
format_list = [format_list]
for format_url in format_list:
format_url = url_or_none(format_url)
if not format_url:
continue
if format_id == 'hls':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, ext='mp4',
entry_protocol='m3u8_native', m3u8_id='hls',
fatal=False))
continue
f = {
def process_format_list(format_list, format_id=""):
nonlocal formats, has_drm
if not isinstance(format_list, list):
format_list = [format_list]
for format_dict in format_list:
if not isinstance(format_dict, dict):
continue
if (not self.get_param('allow_unplayable_formats')
and traverse_obj(format_dict, ('drm', 'keySystem'))):
has_drm = True
continue
format_url = url_or_none(format_dict.get('src'))
format_type = format_dict.get('type')
ext = determine_ext(format_url)
if (format_type == 'application/x-mpegURL'
or format_id == 'HLS' or ext == 'm3u8'):
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls',
fatal=False))
elif (format_type == 'application/dash+xml'
or format_id == 'DASH' or ext == 'mpd'):
formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id='dash', fatal=False))
else:
formats.append({
'url': format_url,
}
f_id = format_id
for quality in QUALITIES:
if '%s.mp4' % quality in format_url:
f_id += '-%s' % quality
f.update({
'quality': quality_key(quality),
'format_note': quality.upper(),
})
break
f['format_id'] = f_id
formats.append(f)
})
player = self._search_json(
r'player:', webpage, 'player', video_id, fatal=False, end_pattern=r';\s*</script>')
if player:
for src in traverse_obj(player, ('lib', 'source', 'sources', ...)):
process_format_list(src)
duration = traverse_obj(player, ('sourceInfo', 'duration', {int_or_none}))
if not formats and not has_drm:
# older code path, in use before August 2023
player = self._parse_json(
self._search_regex(
(r'(?:(?:replacePlaceholders|processAdTagModifier).*?:\s*)?(?:replacePlaceholders|processAdTagModifier)\s*\(\s*(?P<json>{.*?})\s*\)(?:\s*\))?\s*,',
r'Player\.init\s*\([^,]+,(?P<cndn>\s*\w+\s*\?)?\s*(?P<json>{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)'),
webpage, 'player', group='json'), video_id)
if player:
for format_id, format_list in player['tracks'].items():
process_format_list(format_list, format_id)
duration = int_or_none(player.get('duration'))
if not formats and has_drm:
self.report_drm(video_id)

View File

@ -146,7 +146,6 @@ class PlayVidsIE(PeekVidsBaseIE):
'uploader': 'Brazzers',
'age_limit': 18,
'view_count': int,
'age_limit': 18,
'categories': list,
'tags': list,
},

View File

@ -0,0 +1,70 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
parse_qs,
time_seconds,
traverse_obj,
)
class PIAULIZAPortalIE(InfoExtractor):
IE_DESC = 'ulizaportal.jp - PIA LIVE STREAM'
_VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
_TESTS = [{
'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44',
'info_dict': {
'id': '005f18b7-e810-5618-cb82-0987c5755d44',
'title': 'プレゼンテーションプレイヤーのサンプル',
'live_status': 'not_live',
},
'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
}, {
'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1',
'info_dict': {
'id': '005e1b23-fe93-5780-19a0-98e917cc4b7d',
'title': '【確認用】視聴サンプルページULIZA',
'live_status': 'not_live',
},
'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0)))
if expires and expires <= time_seconds():
raise ExtractorError('The link is expired.', video_id=video_id, expected=True)
webpage = self._download_webpage(url, video_id)
player_data = self._download_webpage(
self._search_regex(
r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"',
webpage, 'player data url'),
video_id, headers={'Referer': 'https://ulizaportal.jp/'},
note='Fetching player data', errnote='Unable to fetch player data')
formats = self._extract_m3u8_formats(
self._search_regex(
r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data,
'm3u8 url', default=None),
video_id, fatal=False)
m3u8_type = self._search_regex(
r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None)
return {
'id': video_id,
'title': self._html_extract_title(webpage),
'formats': formats,
'live_status': {
'video': 'is_live',
'dvr': 'was_live', # short-term archives
}.get(m3u8_type, 'not_live'), # VOD or long-term archives
}

View File

@ -1,4 +1,5 @@
import base64
import random
import urllib.parse
from .common import InfoExtractor
@ -13,6 +14,7 @@ from ..utils import (
class RadikoBaseIE(InfoExtractor):
_GEO_BYPASS = False
_FULL_KEY = None
_HOSTS_FOR_TIME_FREE_FFMPEG_UNSUPPORTED = (
'https://c-rpaa.smartstream.ne.jp',
@ -32,7 +34,7 @@ class RadikoBaseIE(InfoExtractor):
'https://c-radiko.smartstream.ne.jp',
)
def _auth_client(self):
def _negotiate_token(self):
_, auth1_handle = self._download_webpage_handle(
'https://radiko.jp/v2/api/auth1', None, 'Downloading authentication page',
headers={
@ -58,10 +60,23 @@ class RadikoBaseIE(InfoExtractor):
'x-radiko-partialkey': partial_key,
}).split(',')[0]
if area_id == 'OUT':
self.raise_geo_restricted(countries=['JP'])
auth_data = (auth_token, area_id)
self.cache.store('radiko', 'auth_data', auth_data)
return auth_data
def _auth_client(self):
cachedata = self.cache.load('radiko', 'auth_data')
if cachedata is not None:
response = self._download_webpage(
'https://radiko.jp/v2/api/auth_check', None, 'Checking cached token', expected_status=401,
headers={'X-Radiko-AuthToken': cachedata[0], 'X-Radiko-AreaId': cachedata[1]})
if response == 'OK':
return cachedata
return self._negotiate_token()
def _extract_full_key(self):
if self._FULL_KEY:
return self._FULL_KEY
@ -75,7 +90,7 @@ class RadikoBaseIE(InfoExtractor):
if full_key:
full_key = full_key.encode()
else: # use full key ever known
else: # use only full key ever known
full_key = b'bcd151073c03b352e1ef2fd66c32209da9ca0afa'
self._FULL_KEY = full_key
@ -103,24 +118,24 @@ class RadikoBaseIE(InfoExtractor):
m3u8_playlist_data = self._download_xml(
f'https://radiko.jp/v3/station/stream/pc_html5/{station}.xml', video_id,
note='Downloading stream information')
m3u8_urls = m3u8_playlist_data.findall('.//url')
formats = []
found = set()
for url_tag in m3u8_urls:
pcu = url_tag.find('playlist_create_url').text
url_attrib = url_tag.attrib
timefree_int = 0 if is_onair else 1
for element in m3u8_playlist_data.findall(f'.//url[@timefree="{timefree_int}"]/playlist_create_url'):
pcu = element.text
if pcu in found:
continue
found.add(pcu)
playlist_url = update_url_query(pcu, {
'station_id': station,
**query,
'l': '15',
'lsid': '88ecea37e968c1f17d5413312d9f8003',
'lsid': ''.join(random.choices('0123456789abcdef', k=32)),
'type': 'b',
})
if playlist_url in found:
continue
else:
found.add(playlist_url)
time_to_skip = None if is_onair else cursor - ft
@ -138,7 +153,7 @@ class RadikoBaseIE(InfoExtractor):
not is_onair and pcu.startswith(self._HOSTS_FOR_TIME_FREE_FFMPEG_UNSUPPORTED)):
sf['preference'] = -100
sf['format_note'] = 'not preferred'
if not is_onair and url_attrib['timefree'] == '1' and time_to_skip:
if not is_onair and timefree_int == 1 and time_to_skip:
sf['downloader_options'] = {'ffmpeg_args': ['-ss', time_to_skip]}
formats.extend(subformats)
@ -166,21 +181,7 @@ class RadikoIE(RadikoBaseIE):
vid_int = unified_timestamp(video_id, False)
prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int)
auth_cache = self.cache.load('radiko', 'auth_data')
for attempt in range(2):
auth_token, area_id = (not attempt and auth_cache) or self._auth_client()
formats = self._extract_formats(
video_id=video_id, station=station, is_onair=False,
ft=ft, cursor=vid_int, auth_token=auth_token, area_id=area_id,
query={
'start_at': radio_begin,
'ft': radio_begin,
'end_at': radio_end,
'to': radio_end,
'seek': video_id,
})
if formats:
break
auth_token, area_id = self._auth_client()
return {
'id': video_id,
@ -189,8 +190,18 @@ class RadikoIE(RadikoBaseIE):
'uploader': try_call(lambda: station_program.find('.//name').text),
'uploader_id': station,
'timestamp': vid_int,
'formats': formats,
'is_live': True,
'formats': self._extract_formats(
video_id=video_id, station=station, is_onair=False,
ft=ft, cursor=vid_int, auth_token=auth_token, area_id=area_id,
query={
'start_at': radio_begin,
'ft': radio_begin,
'end_at': radio_end,
'to': radio_end,
'seek': video_id
}
),
}

View File

@ -82,7 +82,7 @@ class RadioFranceBaseIE(InfoExtractor):
def _extract_data_from_webpage(self, webpage, display_id, key):
return traverse_obj(self._search_json(
r'\bconst\s+data\s*=', webpage, key, display_id,
contains_pattern=r'(\[\{.*?\}\]);', transform_source=js_to_json),
contains_pattern=r'\[\{(?s:.+)\}\]', transform_source=js_to_json),
(..., 'data', key, {dict}), get_all=False) or {}

View File

@ -1,10 +1,11 @@
import re
from .common import InfoExtractor
from ..utils import parse_qs, remove_start, traverse_obj, ExtractorError
class RbgTumIE(InfoExtractor):
_VALID_URL = r'https://live\.rbg\.tum\.de/w/(?P<id>.+)'
_VALID_URL = r'https://(?:live\.rbg\.tum\.de|tum\.live)/w/(?P<id>[^?#]+)'
_TESTS = [{
# Combined view
'url': 'https://live.rbg.tum.de/w/cpp/22128',
@ -35,16 +36,18 @@ class RbgTumIE(InfoExtractor):
'title': 'Fachschaftsvollversammlung',
'series': 'Fachschaftsvollversammlung Informatik',
}
}, {
'url': 'https://tum.live/w/linalginfo/27102',
'only_matching': True,
}, ]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
m3u8 = self._html_search_regex(r'(https://.+?\.m3u8)', webpage, 'm3u8')
lecture_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
lecture_series_title = self._html_search_regex(
r'(?s)<title\b[^>]*>\s*(?:TUM-Live\s\|\s?)?([^:]+):?.*?</title>', webpage, 'series')
m3u8 = self._html_search_regex(r'"(https://[^"]+\.m3u8[^"]*)', webpage, 'm3u8')
lecture_title = self._html_search_regex(r'<h1[^>]*>([^<]+)</h1>', webpage, 'title', fatal=False)
lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ')
formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
@ -57,9 +60,9 @@ class RbgTumIE(InfoExtractor):
class RbgTumCourseIE(InfoExtractor):
_VALID_URL = r'https://live\.rbg\.tum\.de/course/(?P<id>.+)'
_VALID_URL = r'https://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/old/course/(?P<id>(?P<year>\d+)/(?P<term>\w+)/(?P<slug>[^/?#]+))'
_TESTS = [{
'url': 'https://live.rbg.tum.de/course/2022/S/fpv',
'url': 'https://live.rbg.tum.de/old/course/2022/S/fpv',
'info_dict': {
'title': 'Funktionale Programmierung und Verifikation (IN0003)',
'id': '2022/S/fpv',
@ -69,7 +72,7 @@ class RbgTumCourseIE(InfoExtractor):
},
'playlist_count': 13,
}, {
'url': 'https://live.rbg.tum.de/course/2022/W/set',
'url': 'https://live.rbg.tum.de/old/course/2022/W/set',
'info_dict': {
'title': 'SET FSMPIC',
'id': '2022/W/set',
@ -78,16 +81,62 @@ class RbgTumCourseIE(InfoExtractor):
'noplaylist': False,
},
'playlist_count': 6,
}, {
'url': 'https://tum.live/old/course/2023/S/linalginfo',
'only_matching': True,
}, ]
def _real_extract(self, url):
course_id = self._match_id(url)
webpage = self._download_webpage(url, course_id)
course_id, hostname, year, term, slug = self._match_valid_url(url).group('id', 'hostname', 'year', 'term', 'slug')
meta = self._download_json(
f'https://{hostname}/api/courses/{slug}/', course_id, fatal=False,
query={'year': year, 'term': term}) or {}
lecture_series_title = meta.get('Name')
lectures = [self.url_result(f'https://{hostname}/w/{slug}/{stream_id}', RbgTumIE)
for stream_id in traverse_obj(meta, ('Streams', ..., 'ID'))]
lecture_series_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
if not lectures:
webpage = self._download_webpage(url, course_id)
lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ')
lectures = [self.url_result(f'https://{hostname}{lecture_path}', RbgTumIE)
for lecture_path in re.findall(r'href="(/w/[^/"]+/[^/"]+)"', webpage)]
lecture_urls = []
for lecture_url in re.findall(r'(?i)href="/w/(.+)(?<!/cam)(?<!/pres)(?<!/chat)"', webpage):
lecture_urls.append(self.url_result('https://live.rbg.tum.de/w/' + lecture_url, ie=RbgTumIE.ie_key()))
return self.playlist_result(lectures, course_id, lecture_series_title)
return self.playlist_result(lecture_urls, course_id, lecture_series_title)
class RbgTumNewCourseIE(InfoExtractor):
_VALID_URL = r'https://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/\?'
_TESTS = [{
'url': 'https://live.rbg.tum.de/?year=2022&term=S&slug=fpv&view=3',
'info_dict': {
'title': 'Funktionale Programmierung und Verifikation (IN0003)',
'id': '2022/S/fpv',
},
'params': {
'noplaylist': False,
},
'playlist_count': 13,
}, {
'url': 'https://live.rbg.tum.de/?year=2022&term=W&slug=set&view=3',
'info_dict': {
'title': 'SET FSMPIC',
'id': '2022/W/set',
},
'params': {
'noplaylist': False,
},
'playlist_count': 6,
}, {
'url': 'https://tum.live/?year=2023&term=S&slug=linalginfo&view=3',
'only_matching': True,
}]
def _real_extract(self, url):
query = parse_qs(url)
errors = [key for key in ('year', 'term', 'slug') if not query.get(key)]
if errors:
raise ExtractorError(f'Input URL is missing query parameters: {", ".join(errors)}')
year, term, slug = query['year'][0], query['term'][0], query['slug'][0]
hostname = self._match_valid_url(url).group('hostname')
return self.url_result(f'https://{hostname}/old/course/{year}/{term}/{slug}', RbgTumCourseIE)

View File

@ -239,10 +239,10 @@ class RCSEmbedsIE(RCSBaseIE):
}
}, {
'url': 'https://video.gazzanet.gazzetta.it/video-embed/gazzanet-mo05-0000260789',
'match_only': True
'only_matching': True
}, {
'url': 'https://video.gazzetta.it/video-embed/49612410-00ca-11eb-bcd8-30d4253e0140',
'match_only': True
'only_matching': True
}]
_WEBPAGE_TESTS = [{
'url': 'https://www.iodonna.it/video-iodonna/personaggi-video/monica-bellucci-piu-del-lavoro-oggi-per-me-sono-importanti-lamicizia-e-la-famiglia/',
@ -325,7 +325,7 @@ class RCSIE(RCSBaseIE):
}
}, {
'url': 'https://video.corriere.it/video-360/metro-copenaghen-tutta-italiana/a248a7f0-e2db-11e9-9830-af2de6b1f945',
'match_only': True
'only_matching': True
}]

View File

@ -40,7 +40,6 @@ class RokfinIE(InfoExtractor):
'channel': 'Jimmy Dore',
'channel_id': 65429,
'channel_url': 'https://rokfin.com/TheJimmyDoreShow',
'duration': 213.0,
'availability': 'public',
'live_status': 'not_live',
'dislike_count': int,

View File

@ -78,7 +78,6 @@ class S4CSeriesIE(InfoExtractor):
'info_dict': {
'id': '864982911',
'title': 'Iaith ar Daith',
'description': 'md5:e878ebf660dce89bd2ef521d7ce06397'
},
}, {
'url': 'https://www.s4c.cymru/clic/series/866852587',
@ -86,7 +85,6 @@ class S4CSeriesIE(InfoExtractor):
'info_dict': {
'id': '866852587',
'title': 'FFIT Cymru',
'description': 'md5:abcb3c129cb68dbb6cd304fd33b07e96'
},
}]

View File

@ -76,7 +76,6 @@ class SovietsClosetIE(SovietsClosetBaseIE):
'title': 'Arma 3 - Zeus Games #5',
'uploader': 'SovietWomble',
'thumbnail': r're:^https?://.*\.b-cdn\.net/c0e5e76f-3a93-40b4-bf01-12343c2eec5d/thumbnail\.jpg$',
'uploader': 'SovietWomble',
'creator': 'SovietWomble',
'release_timestamp': 1461157200,
'release_date': '20160420',

View File

@ -50,16 +50,16 @@ class SubstackIE(InfoExtractor):
if not re.search(r'<script[^>]+src=["\']https://substackcdn.com/[^"\']+\.js', webpage):
return
mobj = re.search(r'{[^}]*["\']subdomain["\']\s*:\s*["\'](?P<subdomain>[^"]+)', webpage)
mobj = re.search(r'{[^}]*\\?["\']subdomain\\?["\']\s*:\s*\\?["\'](?P<subdomain>[^\\"\']+)', webpage)
if mobj:
parsed = urllib.parse.urlparse(url)
yield parsed._replace(netloc=f'{mobj.group("subdomain")}.substack.com').geturl()
raise cls.StopExtraction()
def _extract_video_formats(self, video_id, username):
def _extract_video_formats(self, video_id, url):
formats, subtitles = [], {}
for video_format in ('hls', 'mp4'):
video_url = f'https://{username}.substack.com/api/v1/video/upload/{video_id}/src?type={video_format}'
video_url = urllib.parse.urljoin(url, f'/api/v1/video/upload/{video_id}/src?type={video_format}')
if video_format == 'hls':
fmts, subs = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4', fatal=False)
@ -81,12 +81,17 @@ class SubstackIE(InfoExtractor):
r'window\._preloads\s*=\s*JSON\.parse\(', webpage, 'json string',
display_id, transform_source=js_to_json, contains_pattern=r'"{(?s:.+)}"'), display_id)
canonical_url = url
domain = traverse_obj(webpage_info, ('domainInfo', 'customDomain', {str}))
if domain:
canonical_url = urllib.parse.urlparse(url)._replace(netloc=domain).geturl()
post_type = webpage_info['post']['type']
formats, subtitles = [], {}
if post_type == 'podcast':
formats, subtitles = [{'url': webpage_info['post']['podcast_url']}], {}
elif post_type == 'video':
formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], username)
formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], canonical_url)
else:
self.raise_no_formats(f'Page type "{post_type}" is not supported')
@ -99,4 +104,5 @@ class SubstackIE(InfoExtractor):
'thumbnail': traverse_obj(webpage_info, ('post', 'cover_image')),
'uploader': traverse_obj(webpage_info, ('pub', 'name')),
'uploader_id': str_or_none(traverse_obj(webpage_info, ('post', 'publication_id'))),
'webpage_url': canonical_url,
}

View File

@ -5,8 +5,9 @@ import re
from .common import InfoExtractor
from ..dependencies import websockets
from ..utils import (
clean_html,
ExtractorError,
UserNotLive,
clean_html,
float_or_none,
get_element_by_class,
get_element_by_id,
@ -235,6 +236,9 @@ class TwitCastingLiveIE(InfoExtractor):
_TESTS = [{
'url': 'https://twitcasting.tv/ivetesangalo',
'only_matching': True,
}, {
'url': 'https://twitcasting.tv/c:unusedlive',
'expected_exception': 'UserNotLive',
}]
def _real_extract(self, url):
@ -260,7 +264,7 @@ class TwitCastingLiveIE(InfoExtractor):
r'(?s)<a\s+class="tw-movie-thumbnail"\s*href="/[^/]+/movie/(?P<video_id>\d+)"\s*>.+?</a>',
webpage, 'current live ID 2', default=None, group='video_id')
if not current_live:
raise ExtractorError('The user is not currently live')
raise UserNotLive(video_id=uploader_id)
return self.url_result('https://twitcasting.tv/%s/movie/%s' % (uploader_id, current_live))

View File

@ -190,10 +190,7 @@ class WrestleUniverseVODIE(WrestleUniverseBaseIE):
def _real_extract(self, url):
lang, video_id = self._match_valid_url(url).group('lang', 'id')
metadata = self._download_metadata(url, video_id, lang, 'videoEpisodeFallbackData')
video_data = self._call_api(video_id, ':watch', 'watch', data={
# 'deviceId' is required if ignoreDeviceRestriction is False
'ignoreDeviceRestriction': True,
})
video_data = self._call_api(video_id, ':watch', 'watch', data={'deviceId': self._DEVICE_ID})
return {
'id': video_id,

View File

@ -407,7 +407,7 @@ class XHamsterEmbedIE(InfoExtractor):
class XHamsterUserIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?%s/users/(?P<id>[^/?#&]+)' % XHamsterIE._DOMAINS
_VALID_URL = rf'https?://(?:[^/?#]+\.)?{XHamsterIE._DOMAINS}/(?:(?P<user>users)|creators)/(?P<id>[^/?#&]+)'
_TESTS = [{
# Paginated user profile
'url': 'https://xhamster.com/users/netvideogirls/videos',
@ -422,6 +422,12 @@ class XHamsterUserIE(InfoExtractor):
'id': 'firatkaan',
},
'playlist_mincount': 1,
}, {
'url': 'https://xhamster.com/creators/squirt-orgasm-69',
'info_dict': {
'id': 'squirt-orgasm-69',
},
'playlist_mincount': 150,
}, {
'url': 'https://xhday.com/users/mobhunter',
'only_matching': True,
@ -430,8 +436,9 @@ class XHamsterUserIE(InfoExtractor):
'only_matching': True,
}]
def _entries(self, user_id):
next_page_url = 'https://xhamster.com/users/%s/videos/1' % user_id
def _entries(self, user_id, is_user):
prefix, suffix = ('users', 'videos') if is_user else ('creators', 'exclusive')
next_page_url = f'https://xhamster.com/{prefix}/{user_id}/{suffix}/1'
for pagenum in itertools.count(1):
page = self._download_webpage(
next_page_url, user_id, 'Downloading page %s' % pagenum)
@ -454,5 +461,5 @@ class XHamsterUserIE(InfoExtractor):
break
def _real_extract(self, url):
user_id = self._match_id(url)
return self.playlist_result(self._entries(user_id), user_id)
user, user_id = self._match_valid_url(url).group('user', 'id')
return self.playlist_result(self._entries(user_id, bool(user)), user_id)

View File

@ -902,7 +902,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
"""
# XXX: this could be moved to a general function in utils.py
# XXX: this could be moved to a general function in utils/_utils.py
# The relative time text strings are roughly the same as what
# Javascript's Intl.RelativeTimeFormat function generates.
# See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
@ -941,7 +941,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
default_client='web'):
for retry in self.RetryManager():
raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE))
# Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.
icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))
icd_rm = next(icd_retries)
main_retries = iter(self.RetryManager())
main_rm = next(main_retries)
for _ in range(main_rm.retries + icd_rm.retries + 1):
try:
response = self._call_api(
ep=ep, fatal=True, headers=headers,
@ -953,7 +959,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
if not isinstance(e.cause, network_exceptions):
return self._error_or_warning(e, fatal=fatal)
elif not isinstance(e.cause, HTTPError):
retry.error = e
main_rm.error = e
next(main_retries)
continue
first_bytes = e.cause.response.read(512)
@ -965,27 +972,32 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
if yt_error:
self._report_alerts([('ERROR', yt_error)], fatal=False)
# Downloading page may result in intermittent 5xx HTTP error
# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
# Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
# We also want to catch all other network exceptions since errors in later pages can be troublesome
# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
if e.cause.status not in (403, 429):
retry.error = e
main_rm.error = e
next(main_retries)
continue
return self._error_or_warning(e, fatal=fatal)
try:
self._extract_and_report_alerts(response, only_once=True)
except ExtractorError as e:
# YouTube servers may return errors we want to retry on in a 200 OK response
# YouTube's servers may return errors we want to retry on in a 200 OK response
# See: https://github.com/yt-dlp/yt-dlp/issues/839
if 'unknown error' in e.msg.lower():
retry.error = e
main_rm.error = e
next(main_retries)
continue
return self._error_or_warning(e, fatal=fatal)
# Youtube sometimes sends incomplete data
# See: https://github.com/ytdl-org/youtube-dl/issues/28194
if not traverse_obj(response, *variadic(check_get_keys)):
retry.error = ExtractorError('Incomplete data received', expected=True)
icd_rm.error = ExtractorError('Incomplete data received', expected=True)
should_retry = next(icd_retries, None)
if not should_retry:
return None
continue
return response

View File

@ -1,4 +1,4 @@
# flake8: noqa: 401
# flake8: noqa: F401
from .common import (
HEADRequest,
PUTRequest,

View File

@ -337,7 +337,7 @@ def handle_sslerror(e: ssl.SSLError):
def handle_response_read_exceptions(e):
if isinstance(e, http.client.IncompleteRead):
raise IncompleteRead(partial=e.partial, cause=e, expected=e.expected) from e
raise IncompleteRead(partial=len(e.partial), cause=e, expected=e.expected) from e
elif isinstance(e, ssl.SSLError):
handle_sslerror(e)
elif isinstance(e, (OSError, EOFError, http.client.HTTPException, *CONTENT_DECODE_ERRORS)):

View File

@ -75,10 +75,10 @@ class HTTPError(RequestError):
class IncompleteRead(TransportError):
def __init__(self, partial, expected=None, **kwargs):
def __init__(self, partial: int, expected: int = None, **kwargs):
self.partial = partial
self.expected = expected
msg = f'{len(partial)} bytes read'
msg = f'{partial} bytes read'
if expected is not None:
msg += f', {expected} more expected'

View File

@ -1,8 +1,6 @@
import subprocess
from .common import PostProcessor
from ..compat import compat_shlex_quote
from ..utils import PostProcessingError, encodeArgument, variadic
from ..utils import Popen, PostProcessingError, variadic
class ExecPP(PostProcessor):
@ -27,10 +25,10 @@ class ExecPP(PostProcessor):
def run(self, info):
for tmpl in self.exec_cmd:
cmd = self.parse_cmd(tmpl, info)
self.to_screen('Executing command: %s' % cmd)
retCode = subprocess.call(encodeArgument(cmd), shell=True)
if retCode != 0:
raise PostProcessingError('Command returned error code %d' % retCode)
self.to_screen(f'Executing command: {cmd}')
_, _, return_code = Popen.run(cmd, shell=True)
if return_code != 0:
raise PostProcessingError(f'Command returned error code {return_code}')
return [], info

View File

@ -112,6 +112,31 @@ def is_non_updateable():
detect_variant(), _NON_UPDATEABLE_REASONS['unknown' if VARIANT else 'other'])
def _get_system_deprecation():
MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 8)
if sys.version_info > MIN_RECOMMENDED:
return None
major, minor = sys.version_info[:2]
if sys.version_info < MIN_SUPPORTED:
msg = f'Python version {major}.{minor} is no longer supported'
else:
msg = f'Support for Python version {major}.{minor} has been deprecated. '
# Temporary until `win_x86_exe` uses 3.8, which will deprecate Vista and Server 2008
if detect_variant() == 'win_x86_exe':
platform_name = platform.platform()
if any(platform_name.startswith(f'Windows-{name}') for name in ('Vista', '2008Server')):
msg = 'Support for Windows Vista/Server 2008 has been deprecated. '
else:
return None
msg += ('See https://github.com/yt-dlp/yt-dlp/issues/7803 for details.'
'\nYou may stop receiving updates on this version at any time')
major, minor = MIN_RECOMMENDED
return f'{msg}! Please update to Python {major}.{minor} or above'
def _sha256_file(path):
h = hashlib.sha256()
mv = memoryview(bytearray(128 * 1024))

View File

@ -825,7 +825,7 @@ class Popen(subprocess.Popen):
_fix('LD_LIBRARY_PATH') # Linux
_fix('DYLD_LIBRARY_PATH') # macOS
def __init__(self, *args, env=None, text=False, **kwargs):
def __init__(self, args, *remaining, env=None, text=False, shell=False, **kwargs):
if env is None:
env = os.environ.copy()
self._fix_pyinstaller_ld_path(env)
@ -835,7 +835,21 @@ class Popen(subprocess.Popen):
kwargs['universal_newlines'] = True # For 3.6 compatibility
kwargs.setdefault('encoding', 'utf-8')
kwargs.setdefault('errors', 'replace')
super().__init__(*args, env=env, **kwargs, startupinfo=self._startupinfo)
if shell and compat_os_name == 'nt' and kwargs.get('executable') is None:
if not isinstance(args, str):
args = ' '.join(compat_shlex_quote(a) for a in args)
shell = False
args = f'{self.__comspec()} /Q /S /D /V:OFF /C "{args}"'
super().__init__(args, *remaining, env=env, shell=shell, **kwargs, startupinfo=self._startupinfo)
def __comspec(self):
comspec = os.environ.get('ComSpec') or os.path.join(
os.environ.get('SystemRoot', ''), 'System32', 'cmd.exe')
if os.path.isabs(comspec):
return comspec
raise FileNotFoundError('shell not found: neither %ComSpec% nor %SystemRoot% is set')
def communicate_or_kill(self, *args, **kwargs):
try:
@ -2727,6 +2741,7 @@ def js_to_json(code, vars={}, *, strict=False):
def create_map(mobj):
return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
code = re.sub(r'(?:new\s+)?Array\((.*?)\)', r'[\g<1>]', code)
code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
if not strict:
code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)

View File

@ -1,8 +1,8 @@
# Autogenerated by devscripts/update-version.py
__version__ = '2023.07.06'
__version__ = '2023.09.24'
RELEASE_GIT_HEAD = 'b532a3481046e1eabb6232ee8196fb696c356ff6'
RELEASE_GIT_HEAD = '088add9567d39b758737e4299a0e619fd89d2e8f'
VARIANT = None