mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-04 07:47:08 -05:00 
			
		
		
		
	[minhateca] Add extractor (Fixes #4094)
This commit is contained in:
		@@ -376,6 +376,7 @@ class TestUtil(unittest.TestCase):
 | 
			
		||||
        self.assertEqual(parse_filesize('2 MiB'), 2097152)
 | 
			
		||||
        self.assertEqual(parse_filesize('5 GB'), 5000000000)
 | 
			
		||||
        self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
 | 
			
		||||
        self.assertEqual(parse_filesize('1,24 KB'), 1240)
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
 
 | 
			
		||||
@@ -217,6 +217,7 @@ from .mdr import MDRIE
 | 
			
		||||
from .metacafe import MetacafeIE
 | 
			
		||||
from .metacritic import MetacriticIE
 | 
			
		||||
from .mgoon import MgoonIE
 | 
			
		||||
from .minhateca import MinhatecaIE
 | 
			
		||||
from .ministrygrid import MinistryGridIE
 | 
			
		||||
from .mit import TechTVMITIE, MITIE, OCWMITIE
 | 
			
		||||
from .mitele import MiTeleIE
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										71
									
								
								youtube_dl/extractor/minhateca.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								youtube_dl/extractor/minhateca.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,71 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..compat import (
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
)
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    parse_filesize,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MinhatecaIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://minhateca\.com\.br/[^?#]+,(?P<id>[0-9]+)\.'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://minhateca.com.br/pereba/misc/youtube-dl+test+video,125848331.mp4(video)',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '125848331',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'youtube-dl test video',
 | 
			
		||||
            'thumbnail': 're:^https?://.*\.jpg$',
 | 
			
		||||
            'filesize_approx': 1530000,
 | 
			
		||||
            'duration': 9,
 | 
			
		||||
            'view_count': int,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        token = self._html_search_regex(
 | 
			
		||||
            r'<input name="__RequestVerificationToken".*?value="([^"]+)"',
 | 
			
		||||
            webpage, 'request token')
 | 
			
		||||
        token_data = [
 | 
			
		||||
            ('fileId', video_id),
 | 
			
		||||
            ('__RequestVerificationToken', token),
 | 
			
		||||
        ]
 | 
			
		||||
        req = compat_urllib_request.Request(
 | 
			
		||||
            'http://minhateca.com.br/action/License/Download',
 | 
			
		||||
            data=compat_urllib_parse.urlencode(token_data))
 | 
			
		||||
        req.add_header('Content-Type', 'application/x-www-form-urlencoded')
 | 
			
		||||
        data = self._download_json(
 | 
			
		||||
            req, video_id, note='Downloading metadata')
 | 
			
		||||
 | 
			
		||||
        video_url = data['redirectUrl']
 | 
			
		||||
        title_str = self._html_search_regex(
 | 
			
		||||
            r'<h1.*?>(.*?)</h1>', webpage, 'title')
 | 
			
		||||
        title, _, ext = title_str.rpartition('.')
 | 
			
		||||
        filesize_approx = parse_filesize(self._html_search_regex(
 | 
			
		||||
            r'<p class="fileSize">(.*?)</p>',
 | 
			
		||||
            webpage, 'file size approximation', fatal=False))
 | 
			
		||||
        duration = int_or_none(self._html_search_regex(
 | 
			
		||||
            r'(?s)<p class="fileLeng[ht][th]">.*?([0-9]+)\s*s',
 | 
			
		||||
            webpage, 'duration', fatal=False))
 | 
			
		||||
        view_count = int_or_none(self._html_search_regex(
 | 
			
		||||
            r'<p class="downloadsCounter">([0-9]+)</p>',
 | 
			
		||||
            webpage, 'view count', fatal=False))
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'ext': ext,
 | 
			
		||||
            'filesize_approx': filesize_approx,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'view_count': view_count,
 | 
			
		||||
            'thumbnail': self._og_search_thumbnail(webpage),
 | 
			
		||||
        }
 | 
			
		||||
@@ -1090,11 +1090,14 @@ def parse_filesize(s):
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
 | 
			
		||||
    m = re.match(r'(?P<num>[0-9]+(?:\.[0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
 | 
			
		||||
    m = re.match(
 | 
			
		||||
        r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
 | 
			
		||||
    if not m:
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    return int(float(m.group('num')) * _UNIT_TABLE[m.group('unit')])
 | 
			
		||||
    num_str = m.group('num').replace(',', '.')
 | 
			
		||||
    mult = _UNIT_TABLE[m.group('unit')]
 | 
			
		||||
    return int(float(num_str) * mult)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_term_width():
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user