From f3bcebb1d2ebf6a69f06b72e1e365bc76970e1e2 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Fri, 9 Aug 2013 18:36:01 +0200 Subject: [PATCH 1/7] add an aes implementation --- youtube_dl/aes.py | 200 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100644 youtube_dl/aes.py diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py new file mode 100644 index 000000000..2fa9238e3 --- /dev/null +++ b/youtube_dl/aes.py @@ -0,0 +1,200 @@ +__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_decrypt_text'] + +import base64 +from math import ceil + +BLOCK_SIZE_BYTES = 16 + +def aes_ctr_decrypt(data, key, counter): + """ + Decrypt with aes in counter mode + + @param {int[]} data cipher + @param {int[]} key 16/24/32-Byte cipher key + @param {instance} counter Instance whose next_value function (@returns {int[]} 16-Byte block) + returns the next counter block + @returns {int[]} decrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + + decrypted_data=[] + for i in range(block_count): + counter_block = counter.next_value() + block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES] + block += [0]*(BLOCK_SIZE_BYTES - len(block)) + + cipher_counter_block = aes_encrypt(counter_block, expanded_key) + decrypted_data += xor(block, cipher_counter_block) + decrypted_data = decrypted_data[:len(data)] + + return decrypted_data + +def key_expansion(data): + """ + Generate key schedule + + @param {int[]} data 16/24/32-Byte cipher key + @returns {int[]} 176/208/240-Byte expanded key + """ + data = data[:] # copy + rcon_iteration = 1 + key_size_bytes = len(data) + expanded_key_size_bytes = (key_size_bytes/4 + 7) * BLOCK_SIZE_BYTES + + while len(data) < expanded_key_size_bytes: + temp = data[-4:] + temp = key_schedule_core(temp, rcon_iteration) + rcon_iteration += 1 + data += xor(temp, data[-key_size_bytes : 4-key_size_bytes]) + + for _ in range(3): + temp = data[-4:] + data += xor(temp, data[-key_size_bytes : 4-key_size_bytes]) + + if key_size_bytes == 32: + temp = data[-4:] + temp = sub_bytes(temp) + data += xor(temp, data[-key_size_bytes : 4-key_size_bytes]) + + for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0): + temp = data[-4:] + data += xor(temp, data[-key_size_bytes : 4-key_size_bytes]) + data = data[:expanded_key_size_bytes] + + return data + +def aes_encrypt(data, expanded_key): + """ + Encrypt one block with aes + + @param {int[]} data 16-Byte state + @param {int[]} expanded_key 176/208/240-Byte expanded key + @returns {int[]} 16-Byte cipher + """ + rounds = len(expanded_key) / BLOCK_SIZE_BYTES - 1 + + data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) + for i in range(1, rounds+1): + data = sub_bytes(data) + data = shift_rows(data) + if i != rounds: + data = mix_columns(data) + data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]) + + return data + +def aes_decrypt_text(data, password, key_size_bytes): + """ + Decrypt text + - The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter + - The cipher key is retrieved by encrypting the first 16 Byte of 'password' + with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's) + - Mode of operation is 'counter' + + @param {str} data Base64 encoded string + @param {str,unicode} password Password (will be encoded with utf-8) + @param {int} key_size_bytes Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit + @returns {str} Decrypted data + """ + NONCE_LENGTH_BYTES = 8 + + data = map(lambda c: ord(c), base64.b64decode(data)) + password = map(lambda c: ord(c), password.encode('utf-8')) + + key = password[:key_size_bytes] + [0]*(key_size_bytes - len(password)) + key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes / BLOCK_SIZE_BYTES) + + nonce = data[:NONCE_LENGTH_BYTES] + cipher = data[NONCE_LENGTH_BYTES:] + + class Counter: + __value = nonce + [0]*(BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES) + def next_value(self): + temp = self.__value + self.__value = inc(self.__value) + return temp + + decrypted_data = aes_ctr_decrypt(cipher, key, Counter()) + plaintext = ''.join(map(lambda x: chr(x), decrypted_data)) + + return plaintext + +RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) +SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, + 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, + 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, + 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, + 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, + 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, + 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, + 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, + 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, + 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, + 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, + 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, + 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, + 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, + 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, + 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16) +MIX_COLUMN_MATRIX = ((2,3,1,1), + (1,2,3,1), + (1,1,2,3), + (3,1,1,2)) + +def sub_bytes(data): + return map(lambda x: SBOX[x], data) + +def rotate(data): + return data[1:] + [data[0]] + +def key_schedule_core(data, rcon_iteration): + data = rotate(data) + data = sub_bytes(data) + data[0] = data[0] ^ RCON[rcon_iteration] + + return data + +def xor(data1, data2): + return map(lambda (x,y): x^y, zip(data1, data2)) + +def mix_column(data): + data_mixed = [] + for row in range(4): + mixed = 0 + for column in range(4): + addend = data[column] + if MIX_COLUMN_MATRIX[row][column] in (2,3): + addend <<= 1 + if addend > 0xff: + addend &= 0xff + addend ^= 0x1b + if MIX_COLUMN_MATRIX[row][column] == 3: + addend ^= data[column] + mixed ^= addend & 0xff + data_mixed.append(mixed) + return data_mixed + +def mix_columns(data): + data_mixed = [] + for i in range(4): + column = data[i*4 : (i+1)*4] + data_mixed += mix_column(column) + return data_mixed + +def shift_rows(data): + data_shifted = [] + for column in range(4): + for row in range(4): + data_shifted.append( data[((column + row) & 0b11) * 4 + row] ) + return data_shifted + +def inc(data): + data = data[:] # copy + for i in range(len(data)-1,-1,-1): + if data[i] == 255: + data[i] = 0 + else: + data[i] = data[i] + 1 + break + return data From 97b3656c2e37e45d556816b8f1f15c20d14f1acd Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Fri, 9 Aug 2013 18:37:33 +0200 Subject: [PATCH 2/7] YoupornIE: Add support for hd videos and update Test --- youtube_dl/extractor/youporn.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index d1156bf42..cc9c37027 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -12,14 +12,16 @@ unescapeHTML, unified_strdate, ) - +from ..aes import ( + aes_decrypt_text +) class YouPornIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P[0-9]+)/(?P[^/]+)' _TEST = { u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', u'file': u'505835.mp4', - u'md5': u'c37ddbaaa39058c76a7e86c6813423c1', + u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89', u'info_dict': { u"upload_date": u"20101221", u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", @@ -75,6 +77,14 @@ def _real_extract(self, url): # Get all of the links from the page LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">' links = re.findall(LINK_RE, download_list_html) + + # Get link of hd video + encrypted_video_url = self._html_search_regex(r'var encryptedURL = \'(?P<encrypted_video_url>[a-zA-Z0-9+/]+={0,2})\';', + webpage, u'encrypted_video_url') + video_url = unicode( aes_decrypt_text(encrypted_video_url, video_title, 32), 'utf-8') + if video_url.split('/')[6].split('_')[0] == u'720p': # only add if 720p to avoid duplicates + links = [video_url] + links + if(len(links) == 0): raise ExtractorError(u'ERROR: no known formats available for video') From 48ea9cea77e7ea24ee867027f03ca37dd1b935d8 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 28 Aug 2013 14:28:55 +0200 Subject: [PATCH 3/7] Allow changes to run under Python 3 --- youtube_dl/aes.py | 18 ++++++++++-------- youtube_dl/extractor/youporn.py | 12 ++++++++---- youtube_dl/utils.py | 10 ++++++++++ 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py index 2fa9238e3..278f8bb82 100644 --- a/youtube_dl/aes.py +++ b/youtube_dl/aes.py @@ -3,6 +3,8 @@ import base64 from math import ceil +from .utils import bytes_to_intlist + BLOCK_SIZE_BYTES = 16 def aes_ctr_decrypt(data, key, counter): @@ -16,7 +18,7 @@ def aes_ctr_decrypt(data, key, counter): @returns {int[]} decrypted data """ expanded_key = key_expansion(key) - block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + block_count = int(ceil(float(len(data)) // BLOCK_SIZE_BYTES)) decrypted_data=[] for i in range(block_count): @@ -40,7 +42,7 @@ def key_expansion(data): data = data[:] # copy rcon_iteration = 1 key_size_bytes = len(data) - expanded_key_size_bytes = (key_size_bytes/4 + 7) * BLOCK_SIZE_BYTES + expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES while len(data) < expanded_key_size_bytes: temp = data[-4:] @@ -72,7 +74,7 @@ def aes_encrypt(data, expanded_key): @param {int[]} expanded_key 176/208/240-Byte expanded key @returns {int[]} 16-Byte cipher """ - rounds = len(expanded_key) / BLOCK_SIZE_BYTES - 1 + rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) for i in range(1, rounds+1): @@ -99,11 +101,11 @@ def aes_decrypt_text(data, password, key_size_bytes): """ NONCE_LENGTH_BYTES = 8 - data = map(lambda c: ord(c), base64.b64decode(data)) - password = map(lambda c: ord(c), password.encode('utf-8')) + data = bytes_to_intlist(base64.b64decode(data)) + password = bytes_to_intlist(password.encode('utf-8')) key = password[:key_size_bytes] + [0]*(key_size_bytes - len(password)) - key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes / BLOCK_SIZE_BYTES) + key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES) nonce = data[:NONCE_LENGTH_BYTES] cipher = data[NONCE_LENGTH_BYTES:] @@ -143,7 +145,7 @@ def next_value(self): (3,1,1,2)) def sub_bytes(data): - return map(lambda x: SBOX[x], data) + return [SBOX[x] for x in data] def rotate(data): return data[1:] + [data[0]] @@ -156,7 +158,7 @@ def key_schedule_core(data, rcon_iteration): return data def xor(data1, data2): - return map(lambda (x,y): x^y, zip(data1, data2)) + return [x^y for x, y in zip(data1, data2)] def mix_column(data): data_mixed = [] diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index cc9c37027..19360e273 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -5,6 +5,7 @@ from .common import InfoExtractor from ..utils import ( + compat_str, compat_urllib_parse_urlparse, compat_urllib_request, @@ -79,13 +80,16 @@ def _real_extract(self, url): links = re.findall(LINK_RE, download_list_html) # Get link of hd video - encrypted_video_url = self._html_search_regex(r'var encryptedURL = \'(?P<encrypted_video_url>[a-zA-Z0-9+/]+={0,2})\';', + encrypted_video_url = self._html_search_regex( + r'var encrypted(?:Quality[0-9]+)?URL = \'(?P<encrypted_video_url>[a-zA-Z0-9+/]+={0,2})\';', webpage, u'encrypted_video_url') - video_url = unicode( aes_decrypt_text(encrypted_video_url, video_title, 32), 'utf-8') + video_url = aes_decrypt_text(encrypted_video_url, video_title, 32) + print(video_url) + assert isinstance(video_url, compat_str) if video_url.split('/')[6].split('_')[0] == u'720p': # only add if 720p to avoid duplicates links = [video_url] + links - if(len(links) == 0): + if not links: raise ExtractorError(u'ERROR: no known formats available for video') self.to_screen(u'Links found: %d' % len(links)) @@ -122,7 +126,7 @@ def _real_extract(self, url): self._print_formats(formats) return - req_format = self._downloader.params.get('format', None) + req_format = self._downloader.params.get('format', 'best') self.to_screen(u'Format: %s' % req_format) if req_format is None or req_format == 'best': diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 59eeaf4a8..07b40da6c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -708,3 +708,13 @@ def __contains__(self, date): return self.start <= date <= self.end def __str__(self): return '%s - %s' % ( self.start.isoformat(), self.end.isoformat()) + + +def bytes_to_intlist(bs): + if not bs: + return [] + if isinstance(bs[0], int): # Python 3 + return list(bs) + else: + return [ord(c) for c in bs] + From cba892fa1fd6a7f1278e637c338921c5ae236840 Mon Sep 17 00:00:00 2001 From: rzhxeo <rzhxeo@users.noreply.github.com> Date: Wed, 28 Aug 2013 15:59:07 +0200 Subject: [PATCH 4/7] Add intlist_to_bytes to utils.py --- youtube_dl/utils.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 07b40da6c..ee8df6a5b 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -718,3 +718,10 @@ def bytes_to_intlist(bs): else: return [ord(c) for c in bs] +def intlist_to_bytes(xs): + if not xs: + return b'' + if isinstance(chr(0), bytes): # Python 2 + return ''.join([chr(x) for x in xs]) + else: + return bytes(xs) From 6e74bc41ca07bda56107cfff9ceb98d6f8d28e53 Mon Sep 17 00:00:00 2001 From: rzhxeo <rzhxeo@users.noreply.github.com> Date: Wed, 28 Aug 2013 16:01:43 +0200 Subject: [PATCH 5/7] Fix division bug in aes.py --- youtube_dl/aes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py index 278f8bb82..9913d59a4 100644 --- a/youtube_dl/aes.py +++ b/youtube_dl/aes.py @@ -18,7 +18,7 @@ def aes_ctr_decrypt(data, key, counter): @returns {int[]} decrypted data """ expanded_key = key_expansion(key) - block_count = int(ceil(float(len(data)) // BLOCK_SIZE_BYTES)) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) decrypted_data=[] for i in range(block_count): From 0012690aae977d76e9162e2334989498366a8e94 Mon Sep 17 00:00:00 2001 From: rzhxeo <rzhxeo@users.noreply.github.com> Date: Wed, 28 Aug 2013 16:03:35 +0200 Subject: [PATCH 6/7] Let aes_decrypt_text return bytes instead of unicode --- youtube_dl/aes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py index 9913d59a4..9a0c93fa6 100644 --- a/youtube_dl/aes.py +++ b/youtube_dl/aes.py @@ -3,7 +3,7 @@ import base64 from math import ceil -from .utils import bytes_to_intlist +from .utils import bytes_to_intlist, intlist_to_bytes BLOCK_SIZE_BYTES = 16 @@ -118,7 +118,7 @@ def next_value(self): return temp decrypted_data = aes_ctr_decrypt(cipher, key, Counter()) - plaintext = ''.join(map(lambda x: chr(x), decrypted_data)) + plaintext = intlist_to_bytes(decrypted_data) return plaintext From 878e83c5a4c84c7abbf3484366e76fbe906c8947 Mon Sep 17 00:00:00 2001 From: rzhxeo <rzhxeo@users.noreply.github.com> Date: Wed, 28 Aug 2013 16:04:48 +0200 Subject: [PATCH 7/7] YoupornIE: Clean up extraction of hd video --- youtube_dl/extractor/youporn.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index 19360e273..c85fd4b5a 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -5,7 +5,6 @@ from .common import InfoExtractor from ..utils import ( - compat_str, compat_urllib_parse_urlparse, compat_urllib_request, @@ -79,14 +78,11 @@ def _real_extract(self, url): LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">' links = re.findall(LINK_RE, download_list_html) - # Get link of hd video - encrypted_video_url = self._html_search_regex( - r'var encrypted(?:Quality[0-9]+)?URL = \'(?P<encrypted_video_url>[a-zA-Z0-9+/]+={0,2})\';', - webpage, u'encrypted_video_url') - video_url = aes_decrypt_text(encrypted_video_url, video_title, 32) - print(video_url) - assert isinstance(video_url, compat_str) - if video_url.split('/')[6].split('_')[0] == u'720p': # only add if 720p to avoid duplicates + # Get link of hd video if available + mobj = re.search(r'var encryptedQuality720URL = \'(?P<encrypted_video_url>[a-zA-Z0-9+/]+={0,2})\';', webpage) + if mobj != None: + encrypted_video_url = mobj.group(u'encrypted_video_url') + video_url = aes_decrypt_text(encrypted_video_url, video_title, 32).decode('utf-8') links = [video_url] + links if not links: