yt-dlp/yt_dlp/postprocessor/sponsorblock.py

import hashlib
import json
import re
import urllib.parse

from .ffmpeg import FFmpegPostProcessor


class SponsorBlockPP(FFmpegPostProcessor):
    # https://wiki.sponsor.ajay.app/w/Types
    EXTRACTORS = {
        'Youtube': 'YouTube',
    }
    POI_CATEGORIES = {
        'poi_highlight': 'Highlight',
    }
    NON_SKIPPABLE_CATEGORIES = {
        **POI_CATEGORIES,
        'chapter': 'Chapter',
    }
    CATEGORIES = {
        'sponsor': 'Sponsor',
        'intro': 'Intermission/Intro Animation',
        'outro': 'Endcards/Credits',
        'selfpromo': 'Unpaid/Self Promotion',
        'preview': 'Preview/Recap',
        'filler': 'Filler Tangent',
        'interaction': 'Interaction Reminder',
        'music_offtopic': 'Non-Music Section',
        **NON_SKIPPABLE_CATEGORIES,
    }

    def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'):
        FFmpegPostProcessor.__init__(self, downloader)
        self._categories = tuple(categories or self.CATEGORIES.keys())
        self._API_URL = api if re.match('https?://', api) else 'https://' + api

    def run(self, info):
        extractor = info['extractor_key']
        if extractor not in self.EXTRACTORS:
            self.to_screen(f'SponsorBlock is not supported for {extractor}')
            return [], info

        self.to_screen('Fetching SponsorBlock segments')
        info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info.get('duration'))
        return [], info

    def _get_sponsor_chapters(self, info, duration):
        segments = self._get_sponsor_segments(info['id'], self.EXTRACTORS[info['extractor_key']])

        def duration_filter(s):
            start_end = s['segment']
            # Ignore entire video segments (https://wiki.sponsor.ajay.app/w/Types).
            if start_end == (0, 0):
                return False
            # Ignore milliseconds difference at the start.
            if start_end[0] <= 1:
                start_end[0] = 0
            # Make POI chapters 1 sec so that we can properly mark them
            if s['category'] in self.POI_CATEGORIES:
                start_end[1] += 1
            # Ignore milliseconds difference at the end.
            # Never allow the segment to exceed the video.
            if duration and duration - start_end[1] <= 1:
                start_end[1] = duration
            # SponsorBlock duration may be absent or it may deviate from the real one.
            diff = abs(duration - s['videoDuration']) if s['videoDuration'] else 0
            return diff < 1 or (diff < 5 and diff / (start_end[1] - start_end[0]) < 0.05)

        duration_match = [s for s in segments if duration_filter(s)]
        if len(duration_match) != len(segments):
            self.report_warning('Some SponsorBlock segments are from a video of different duration, maybe from an old version of this video')

        def to_chapter(s):
            (start, end), cat = s['segment'], s['category']
            title = s['description'] if cat == 'chapter' else self.CATEGORIES[cat]
            return {
                'start_time': start,
                'end_time': end,
                'category': cat,
                'title': title,
                'type': s['actionType'],
                '_categories': [(cat, start, end, title)],
            }

        sponsor_chapters = [to_chapter(s) for s in duration_match]
        if not sponsor_chapters:
            self.to_screen('No matching segments were found in the SponsorBlock database')
        else:
            self.to_screen(f'Found {len(sponsor_chapters)} segments in the SponsorBlock database')
        return sponsor_chapters

    def _get_sponsor_segments(self, video_id, service):
        video_hash = hashlib.sha256(video_id.encode('ascii')).hexdigest()
        # SponsorBlock API recommends using first 4 hash characters.
        url = f'{self._API_URL}/api/skipSegments/{video_hash[:4]}?' + urllib.parse.urlencode({
            'service': service,
            'categories': json.dumps(self._categories),
            'actionTypes': json.dumps(['skip', 'poi', 'chapter']),
        })
        for d in self._download_json(url) or []:
            if d['videoID'] == video_id:
                return d['segments']
        return []
[cleanup] Misc cleanup (#2173) Authored by: fstirlitz, pukkandan 2022-04-12 00:01:54 +00:00			`import hashlib`
Native SponsorBlock implementation and related improvements (#360) SponsorBlock options: * The fetched sponsor sections are written to infojson * `--sponsorblock-remove` removes specified chapters from file * `--sponsorblock-mark` marks the specified sponsor sections as chapters * `--sponsorblock-chapter-title` to specify sponsor chapter template * `--sponsorblock-api` to use a different API Related improvements: * Split `--embed-chapters` from `--embed-metadata` * Add `--remove-chapters` to remove arbitrary chapters * Add `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters Deprecates all `--sponskrub` options Authored by: nihil-admirari, pukkandan 2021-09-01 20:55:16 +00:00			`import json`
			`import re`
[compat] Remove deprecated functions from core code 2022-06-24 10:54:43 +00:00			`import urllib.parse`
Native SponsorBlock implementation and related improvements (#360) SponsorBlock options: * The fetched sponsor sections are written to infojson * `--sponsorblock-remove` removes specified chapters from file * `--sponsorblock-mark` marks the specified sponsor sections as chapters * `--sponsorblock-chapter-title` to specify sponsor chapter template * `--sponsorblock-api` to use a different API Related improvements: * Split `--embed-chapters` from `--embed-metadata` * Add `--remove-chapters` to remove arbitrary chapters * Add `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters Deprecates all `--sponskrub` options Authored by: nihil-admirari, pukkandan 2021-09-01 20:55:16 +00:00
			`from .ffmpeg import FFmpegPostProcessor`


			`class SponsorBlockPP(FFmpegPostProcessor):`
[SponsorBlock] Add Filler and Highlight categories (#1664) Authored by: nihil-admirari, pukkandan 2021-12-09 12:40:31 +00:00			`# https://wiki.sponsor.ajay.app/w/Types`
Native SponsorBlock implementation and related improvements (#360) SponsorBlock options: * The fetched sponsor sections are written to infojson * `--sponsorblock-remove` removes specified chapters from file * `--sponsorblock-mark` marks the specified sponsor sections as chapters * `--sponsorblock-chapter-title` to specify sponsor chapter template * `--sponsorblock-api` to use a different API Related improvements: * Split `--embed-chapters` from `--embed-metadata` * Add `--remove-chapters` to remove arbitrary chapters * Add `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters Deprecates all `--sponskrub` options Authored by: nihil-admirari, pukkandan 2021-09-01 20:55:16 +00:00			`EXTRACTORS = {`
			`'Youtube': 'YouTube',`
			`}`
[SponsorBlock] Add Filler and Highlight categories (#1664) Authored by: nihil-admirari, pukkandan 2021-12-09 12:40:31 +00:00			`POI_CATEGORIES = {`
			`'poi_highlight': 'Highlight',`
			`}`
[SponsorBlock] Support `chapter` category (#5260) Authored by: ajayyy, pukkandan 2022-10-18 16:51:57 +00:00			`NON_SKIPPABLE_CATEGORIES = {`
			`**POI_CATEGORIES,`
			`'chapter': 'Chapter',`
			`}`
Native SponsorBlock implementation and related improvements (#360) SponsorBlock options: * The fetched sponsor sections are written to infojson * `--sponsorblock-remove` removes specified chapters from file * `--sponsorblock-mark` marks the specified sponsor sections as chapters * `--sponsorblock-chapter-title` to specify sponsor chapter template * `--sponsorblock-api` to use a different API Related improvements: * Split `--embed-chapters` from `--embed-metadata` * Add `--remove-chapters` to remove arbitrary chapters * Add `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters Deprecates all `--sponskrub` options Authored by: nihil-admirari, pukkandan 2021-09-01 20:55:16 +00:00			`CATEGORIES = {`
			`'sponsor': 'Sponsor',`
			`'intro': 'Intermission/Intro Animation',`
			`'outro': 'Endcards/Credits',`
			`'selfpromo': 'Unpaid/Self Promotion',`
			`'preview': 'Preview/Recap',`
[SponsorBlock] Add Filler and Highlight categories (#1664) Authored by: nihil-admirari, pukkandan 2021-12-09 12:40:31 +00:00			`'filler': 'Filler Tangent',`
			`'interaction': 'Interaction Reminder',`
			`'music_offtopic': 'Non-Music Section',`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 23:09:58 +00:00			`**NON_SKIPPABLE_CATEGORIES,`
Native SponsorBlock implementation and related improvements (#360) SponsorBlock options: * The fetched sponsor sections are written to infojson * `--sponsorblock-remove` removes specified chapters from file * `--sponsorblock-mark` marks the specified sponsor sections as chapters * `--sponsorblock-chapter-title` to specify sponsor chapter template * `--sponsorblock-api` to use a different API Related improvements: * Split `--embed-chapters` from `--embed-metadata` * Add `--remove-chapters` to remove arbitrary chapters * Add `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters Deprecates all `--sponskrub` options Authored by: nihil-admirari, pukkandan 2021-09-01 20:55:16 +00:00			`}`

			`def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'):`
			`FFmpegPostProcessor.__init__(self, downloader)`
			`self._categories = tuple(categories or self.CATEGORIES.keys())`
[cleanup] Misc (#10807) Closes #10751, Closes #10769, Closes #10791 Authored by: bashonly, Codenade, pzhlkj6612, seproDev, coletdjnz, grqz, Grub4K Co-authored-by: Codenade <amadeus.dorian04@gmail.com> Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com> Co-authored-by: coletdjnz <coletdjnz@protonmail.com> Co-authored-by: N/Ame <173015200+grqz@users.noreply.github.com> Co-authored-by: Simon Sawicki <contact@grub4k.xyz> 2024-09-27 22:46:22 +00:00			`self._API_URL = api if re.match('https?://', api) else 'https://' + api`
Native SponsorBlock implementation and related improvements (#360) SponsorBlock options: * The fetched sponsor sections are written to infojson * `--sponsorblock-remove` removes specified chapters from file * `--sponsorblock-mark` marks the specified sponsor sections as chapters * `--sponsorblock-chapter-title` to specify sponsor chapter template * `--sponsorblock-api` to use a different API Related improvements: * Split `--embed-chapters` from `--embed-metadata` * Add `--remove-chapters` to remove arbitrary chapters * Add `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters Deprecates all `--sponskrub` options Authored by: nihil-admirari, pukkandan 2021-09-01 20:55:16 +00:00
			`def run(self, info):`
			`extractor = info['extractor_key']`
			`if extractor not in self.EXTRACTORS:`
			`self.to_screen(f'SponsorBlock is not supported for {extractor}')`
			`return [], info`

[SponsorBlock] Obey `extractor-retries` and `sleep-requests` 2021-10-18 03:49:37 +00:00			`self.to_screen('Fetching SponsorBlock segments')`
[Sponsorblock] Don't crash when duration is unknown CLoses #3529 2022-04-23 16:45:00 +00:00			`info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info.get('duration'))`
Native SponsorBlock implementation and related improvements (#360) SponsorBlock options: * The fetched sponsor sections are written to infojson * `--sponsorblock-remove` removes specified chapters from file * `--sponsorblock-mark` marks the specified sponsor sections as chapters * `--sponsorblock-chapter-title` to specify sponsor chapter template * `--sponsorblock-api` to use a different API Related improvements: * Split `--embed-chapters` from `--embed-metadata` * Add `--remove-chapters` to remove arbitrary chapters * Add `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters Deprecates all `--sponskrub` options Authored by: nihil-admirari, pukkandan 2021-09-01 20:55:16 +00:00			`return [], info`

			`def _get_sponsor_chapters(self, info, duration):`
			`segments = self._get_sponsor_segments(info['id'], self.EXTRACTORS[info['extractor_key']])`

			`def duration_filter(s):`
			`start_end = s['segment']`
[SponsorBlock] Fixes for highlight and "full video labels" (#2849) Authored by: nihil-admirari 2022-02-22 14:18:44 +00:00			`# Ignore entire video segments (https://wiki.sponsor.ajay.app/w/Types).`
			`if start_end == (0, 0):`
			`return False`
Native SponsorBlock implementation and related improvements (#360) SponsorBlock options: * The fetched sponsor sections are written to infojson * `--sponsorblock-remove` removes specified chapters from file * `--sponsorblock-mark` marks the specified sponsor sections as chapters * `--sponsorblock-chapter-title` to specify sponsor chapter template * `--sponsorblock-api` to use a different API Related improvements: * Split `--embed-chapters` from `--embed-metadata` * Add `--remove-chapters` to remove arbitrary chapters * Add `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters Deprecates all `--sponskrub` options Authored by: nihil-admirari, pukkandan 2021-09-01 20:55:16 +00:00			`# Ignore milliseconds difference at the start.`
			`if start_end[0] <= 1:`
			`start_end[0] = 0`
[SponsorBlock] Add Filler and Highlight categories (#1664) Authored by: nihil-admirari, pukkandan 2021-12-09 12:40:31 +00:00			`# Make POI chapters 1 sec so that we can properly mark them`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 23:09:58 +00:00			`if s['category'] in self.POI_CATEGORIES:`
[SponsorBlock] Add Filler and Highlight categories (#1664) Authored by: nihil-admirari, pukkandan 2021-12-09 12:40:31 +00:00			`start_end[1] += 1`
Native SponsorBlock implementation and related improvements (#360) SponsorBlock options: * The fetched sponsor sections are written to infojson * `--sponsorblock-remove` removes specified chapters from file * `--sponsorblock-mark` marks the specified sponsor sections as chapters * `--sponsorblock-chapter-title` to specify sponsor chapter template * `--sponsorblock-api` to use a different API Related improvements: * Split `--embed-chapters` from `--embed-metadata` * Add `--remove-chapters` to remove arbitrary chapters * Add `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters Deprecates all `--sponskrub` options Authored by: nihil-admirari, pukkandan 2021-09-01 20:55:16 +00:00			`# Ignore milliseconds difference at the end.`
			`# Never allow the segment to exceed the video.`
			`if duration and duration - start_end[1] <= 1:`
			`start_end[1] = duration`
			`# SponsorBlock duration may be absent or it may deviate from the real one.`
[SponsorBlock] Relax duration check for large segments 2022-10-18 17:49:25 +00:00			`diff = abs(duration - s['videoDuration']) if s['videoDuration'] else 0`
			`return diff < 1 or (diff < 5 and diff / (start_end[1] - start_end[0]) < 0.05)`
Native SponsorBlock implementation and related improvements (#360) SponsorBlock options: * The fetched sponsor sections are written to infojson * `--sponsorblock-remove` removes specified chapters from file * `--sponsorblock-mark` marks the specified sponsor sections as chapters * `--sponsorblock-chapter-title` to specify sponsor chapter template * `--sponsorblock-api` to use a different API Related improvements: * Split `--embed-chapters` from `--embed-metadata` * Add `--remove-chapters` to remove arbitrary chapters * Add `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters Deprecates all `--sponskrub` options Authored by: nihil-admirari, pukkandan 2021-09-01 20:55:16 +00:00
			`duration_match = [s for s in segments if duration_filter(s)]`
			`if len(duration_match) != len(segments):`
			`self.report_warning('Some SponsorBlock segments are from a video of different duration, maybe from an old version of this video')`

			`def to_chapter(s):`
			`(start, end), cat = s['segment'], s['category']`
[SponsorBlock] Support `chapter` category (#5260) Authored by: ajayyy, pukkandan 2022-10-18 16:51:57 +00:00			`title = s['description'] if cat == 'chapter' else self.CATEGORIES[cat]`
Native SponsorBlock implementation and related improvements (#360) SponsorBlock options: * The fetched sponsor sections are written to infojson * `--sponsorblock-remove` removes specified chapters from file * `--sponsorblock-mark` marks the specified sponsor sections as chapters * `--sponsorblock-chapter-title` to specify sponsor chapter template * `--sponsorblock-api` to use a different API Related improvements: * Split `--embed-chapters` from `--embed-metadata` * Add `--remove-chapters` to remove arbitrary chapters * Add `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters Deprecates all `--sponskrub` options Authored by: nihil-admirari, pukkandan 2021-09-01 20:55:16 +00:00			`return {`
			`'start_time': start,`
			`'end_time': end,`
			`'category': cat,`
[SponsorBlock] Support `chapter` category (#5260) Authored by: ajayyy, pukkandan 2022-10-18 16:51:57 +00:00			`'title': title,`
[SponsorBlock] Add `type` field 2022-10-18 17:38:23 +00:00			`'type': s['actionType'],`
[SponsorBlock] Support `chapter` category (#5260) Authored by: ajayyy, pukkandan 2022-10-18 16:51:57 +00:00			`'_categories': [(cat, start, end, title)],`
Native SponsorBlock implementation and related improvements (#360) SponsorBlock options: * The fetched sponsor sections are written to infojson * `--sponsorblock-remove` removes specified chapters from file * `--sponsorblock-mark` marks the specified sponsor sections as chapters * `--sponsorblock-chapter-title` to specify sponsor chapter template * `--sponsorblock-api` to use a different API Related improvements: * Split `--embed-chapters` from `--embed-metadata` * Add `--remove-chapters` to remove arbitrary chapters * Add `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters Deprecates all `--sponskrub` options Authored by: nihil-admirari, pukkandan 2021-09-01 20:55:16 +00:00			`}`

			`sponsor_chapters = [to_chapter(s) for s in duration_match]`
			`if not sponsor_chapters:`
[cleanup Misc Closes #5162 2022-10-18 17:58:57 +00:00			`self.to_screen('No matching segments were found in the SponsorBlock database')`
Native SponsorBlock implementation and related improvements (#360) SponsorBlock options: * The fetched sponsor sections are written to infojson * `--sponsorblock-remove` removes specified chapters from file * `--sponsorblock-mark` marks the specified sponsor sections as chapters * `--sponsorblock-chapter-title` to specify sponsor chapter template * `--sponsorblock-api` to use a different API Related improvements: * Split `--embed-chapters` from `--embed-metadata` * Add `--remove-chapters` to remove arbitrary chapters * Add `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters Deprecates all `--sponskrub` options Authored by: nihil-admirari, pukkandan 2021-09-01 20:55:16 +00:00			`else:`
			`self.to_screen(f'Found {len(sponsor_chapters)} segments in the SponsorBlock database')`
			`return sponsor_chapters`

			`def _get_sponsor_segments(self, video_id, service):`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 23:09:58 +00:00			`video_hash = hashlib.sha256(video_id.encode('ascii')).hexdigest()`
Native SponsorBlock implementation and related improvements (#360) SponsorBlock options: * The fetched sponsor sections are written to infojson * `--sponsorblock-remove` removes specified chapters from file * `--sponsorblock-mark` marks the specified sponsor sections as chapters * `--sponsorblock-chapter-title` to specify sponsor chapter template * `--sponsorblock-api` to use a different API Related improvements: * Split `--embed-chapters` from `--embed-metadata` * Add `--remove-chapters` to remove arbitrary chapters * Add `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters Deprecates all `--sponskrub` options Authored by: nihil-admirari, pukkandan 2021-09-01 20:55:16 +00:00			`# SponsorBlock API recommends using first 4 hash characters.`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 23:09:58 +00:00			`url = f'{self._API_URL}/api/skipSegments/{video_hash[:4]}?' + urllib.parse.urlencode({`
Native SponsorBlock implementation and related improvements (#360) SponsorBlock options: * The fetched sponsor sections are written to infojson * `--sponsorblock-remove` removes specified chapters from file * `--sponsorblock-mark` marks the specified sponsor sections as chapters * `--sponsorblock-chapter-title` to specify sponsor chapter template * `--sponsorblock-api` to use a different API Related improvements: * Split `--embed-chapters` from `--embed-metadata` * Add `--remove-chapters` to remove arbitrary chapters * Add `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters Deprecates all `--sponskrub` options Authored by: nihil-admirari, pukkandan 2021-09-01 20:55:16 +00:00			`'service': service,`
			`'categories': json.dumps(self._categories),`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 23:09:58 +00:00			`'actionTypes': json.dumps(['skip', 'poi', 'chapter']),`
Native SponsorBlock implementation and related improvements (#360) SponsorBlock options: * The fetched sponsor sections are written to infojson * `--sponsorblock-remove` removes specified chapters from file * `--sponsorblock-mark` marks the specified sponsor sections as chapters * `--sponsorblock-chapter-title` to specify sponsor chapter template * `--sponsorblock-api` to use a different API Related improvements: * Split `--embed-chapters` from `--embed-metadata` * Add `--remove-chapters` to remove arbitrary chapters * Add `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters Deprecates all `--sponskrub` options Authored by: nihil-admirari, pukkandan 2021-09-01 20:55:16 +00:00			`})`
[postprocessor,cleanup] Create `_download_json` 2022-03-25 03:01:45 +00:00			`for d in self._download_json(url) or []:`
Native SponsorBlock implementation and related improvements (#360) SponsorBlock options: * The fetched sponsor sections are written to infojson * `--sponsorblock-remove` removes specified chapters from file * `--sponsorblock-mark` marks the specified sponsor sections as chapters * `--sponsorblock-chapter-title` to specify sponsor chapter template * `--sponsorblock-api` to use a different API Related improvements: * Split `--embed-chapters` from `--embed-metadata` * Add `--remove-chapters` to remove arbitrary chapters * Add `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters Deprecates all `--sponskrub` options Authored by: nihil-admirari, pukkandan 2021-09-01 20:55:16 +00:00			`if d['videoID'] == video_id:`
			`return d['segments']`
			`return []`