mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-04 06:37:07 -05:00 
			
		
		
		
	[pornhub] Decode obfuscated video URL (closes #12470)
This commit is contained in:
		@@ -1,7 +1,9 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import functools
 | 
			
		||||
import itertools
 | 
			
		||||
import operator
 | 
			
		||||
# import os
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
@@ -129,9 +131,38 @@ class PornHubIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        tv_webpage = dl_webpage('tv')
 | 
			
		||||
 | 
			
		||||
        video_url = self._search_regex(
 | 
			
		||||
            r'<video[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//.+?)\1', tv_webpage,
 | 
			
		||||
            'video url', group='url')
 | 
			
		||||
        encoded_url = self._search_regex(r'(var.*mediastring.*)</script>',
 | 
			
		||||
            tv_webpage, 'encoded url')
 | 
			
		||||
        assignments = encoded_url.split(";")
 | 
			
		||||
        js_vars = {}
 | 
			
		||||
 | 
			
		||||
        def parse_js_value(inp):
 | 
			
		||||
            inp = re.sub(r'/\*[^*]*\*/', "", inp)
 | 
			
		||||
 | 
			
		||||
            if "+" in inp:
 | 
			
		||||
                inps = inp.split("+")
 | 
			
		||||
                return functools.reduce(operator.concat, map(parse_js_value, inps))
 | 
			
		||||
 | 
			
		||||
            inp = inp.strip()
 | 
			
		||||
            if inp in js_vars:
 | 
			
		||||
                return js_vars[inp]
 | 
			
		||||
 | 
			
		||||
            # Hope it's a string!
 | 
			
		||||
            assert inp.startswith('"') and inp.endswith('"')
 | 
			
		||||
            return inp[1:-1]
 | 
			
		||||
 | 
			
		||||
        for assn in assignments:
 | 
			
		||||
            assn = assn.strip()
 | 
			
		||||
            if len(assn) == 0:
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            assert assn.startswith("var ")
 | 
			
		||||
            assn = assn[4:]
 | 
			
		||||
            vname, value = assn.split("=", 1)
 | 
			
		||||
 | 
			
		||||
            js_vars[vname] = parse_js_value(value)
 | 
			
		||||
 | 
			
		||||
        video_url = js_vars["mediastring"]
 | 
			
		||||
 | 
			
		||||
        title = self._search_regex(
 | 
			
		||||
            r'<h1>([^>]+)</h1>', tv_webpage, 'title', default=None)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user