mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-04 10:17:07 -05:00 
			
		
		
		
	
		
			
				
	
	
		
			61 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			61 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import re
 | 
						|
import os
 | 
						|
import socket
 | 
						|
 | 
						|
from .common import InfoExtractor
 | 
						|
from ..utils import (
 | 
						|
    compat_http_client,
 | 
						|
    compat_str,
 | 
						|
    compat_urllib_error,
 | 
						|
    compat_urllib_parse,
 | 
						|
    compat_urllib_request,
 | 
						|
 | 
						|
    ExtractorError,
 | 
						|
)
 | 
						|
 | 
						|
 | 
						|
class DepositFilesIE(InfoExtractor):
 | 
						|
    """Information extractor for depositfiles.com"""
 | 
						|
 | 
						|
    _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
 | 
						|
 | 
						|
    def _real_extract(self, url):
 | 
						|
        file_id = url.split('/')[-1]
 | 
						|
        # Rebuild url in english locale
 | 
						|
        url = 'http://depositfiles.com/en/files/' + file_id
 | 
						|
 | 
						|
        # Retrieve file webpage with 'Free download' button pressed
 | 
						|
        free_download_indication = { 'gateway_result' : '1' }
 | 
						|
        request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
 | 
						|
        try:
 | 
						|
            self.report_download_webpage(file_id)
 | 
						|
            webpage = compat_urllib_request.urlopen(request).read()
 | 
						|
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 | 
						|
            raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err))
 | 
						|
 | 
						|
        # Search for the real file URL
 | 
						|
        mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
 | 
						|
        if (mobj is None) or (mobj.group(1) is None):
 | 
						|
            # Try to figure out reason of the error.
 | 
						|
            mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
 | 
						|
            if (mobj is not None) and (mobj.group(1) is not None):
 | 
						|
                restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
 | 
						|
                raise ExtractorError(u'%s' % restriction_message)
 | 
						|
            else:
 | 
						|
                raise ExtractorError(u'Unable to extract download URL from: %s' % url)
 | 
						|
 | 
						|
        file_url = mobj.group(1)
 | 
						|
        file_extension = os.path.splitext(file_url)[1][1:]
 | 
						|
 | 
						|
        # Search for file title
 | 
						|
        file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title')
 | 
						|
 | 
						|
        return [{
 | 
						|
            'id':       file_id.decode('utf-8'),
 | 
						|
            'url':      file_url.decode('utf-8'),
 | 
						|
            'uploader': None,
 | 
						|
            'upload_date':  None,
 | 
						|
            'title':    file_title,
 | 
						|
            'ext':      file_extension.decode('utf-8'),
 | 
						|
        }]
 |