mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-03 19:57:06 -05:00 
			
		
		
		
	[toypics] Separate user and video extraction (#2601)
This commit is contained in:
		@@ -37,6 +37,7 @@ from youtube_dl.extractor import (
 | 
				
			|||||||
    GoogleSearchIE,
 | 
					    GoogleSearchIE,
 | 
				
			||||||
    GenericIE,
 | 
					    GenericIE,
 | 
				
			||||||
    TEDIE,
 | 
					    TEDIE,
 | 
				
			||||||
 | 
					    ToypicsUserIE,
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -269,5 +270,13 @@ class TestPlaylists(unittest.TestCase):
 | 
				
			|||||||
        self.assertEqual(result['title'], 'Who are the hackers?')
 | 
					        self.assertEqual(result['title'], 'Who are the hackers?')
 | 
				
			||||||
        self.assertTrue(len(result['entries']) >= 6)
 | 
					        self.assertTrue(len(result['entries']) >= 6)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_toypics_user(self):
 | 
				
			||||||
 | 
					        dl = FakeYDL()
 | 
				
			||||||
 | 
					        ie = ToypicsUserIE(dl)
 | 
				
			||||||
 | 
					        result = ie.extract('http://videos.toypics.net/Mikey')
 | 
				
			||||||
 | 
					        self.assertIsPlaylist(result)
 | 
				
			||||||
 | 
					        self.assertEqual(result['id'], 'Mikey')
 | 
				
			||||||
 | 
					        self.assertTrue(len(result['entries']) >= 17)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if __name__ == '__main__':
 | 
					if __name__ == '__main__':
 | 
				
			||||||
    unittest.main()
 | 
					    unittest.main()
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -239,7 +239,7 @@ from .theplatform import ThePlatformIE
 | 
				
			|||||||
from .thisav import ThisAVIE
 | 
					from .thisav import ThisAVIE
 | 
				
			||||||
from .tinypic import TinyPicIE
 | 
					from .tinypic import TinyPicIE
 | 
				
			||||||
from .toutv import TouTvIE
 | 
					from .toutv import TouTvIE
 | 
				
			||||||
from .toypics import ToypicsIE
 | 
					from .toypics import ToypicsUserIE, ToypicsIE
 | 
				
			||||||
from .traileraddict import TrailerAddictIE
 | 
					from .traileraddict import TrailerAddictIE
 | 
				
			||||||
from .trilulilu import TriluliluIE
 | 
					from .trilulilu import TriluliluIE
 | 
				
			||||||
from .trutube import TruTubeIE
 | 
					from .trutube import TruTubeIE
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -2,43 +2,26 @@ from .common import InfoExtractor
 | 
				
			|||||||
from math import ceil
 | 
					from math import ceil
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class ToypicsIE(InfoExtractor):
 | 
					class ToypicsIE(InfoExtractor):
 | 
				
			||||||
    _VALID_URL = r'(?:http://)?videos\.toypics\.net/.*'
 | 
					    IE_DESC = 'Toypics user profile'
 | 
				
			||||||
 | 
					    _VALID_URL = r'http://videos\.toypics\.net/view/(?P<id>[0-9]+)/.*'
 | 
				
			||||||
    _TEST = {
 | 
					    _TEST = {
 | 
				
			||||||
        'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/',
 | 
					        'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/',
 | 
				
			||||||
        #'md5': '8a8b546956bbd0e769dbe28f6e80abb3', == $head -c10K 12929646011616163504.mp4 |md5sum //no idea why it fails
 | 
					        'md5': '16e806ad6d6f58079d210fe30985e08b',
 | 
				
			||||||
        'info_dict': {
 | 
					        'info_dict': {
 | 
				
			||||||
            'id': '514',
 | 
					            'id': '514',
 | 
				
			||||||
            'ext': 'mp4',
 | 
					            'ext': 'mp4',
 | 
				
			||||||
            'title': 'Chance-Bulge\'d, 2',
 | 
					            'title': 'Chance-Bulge\'d, 2',
 | 
				
			||||||
            'age_limit': 18
 | 
					            'age_limit': 18,
 | 
				
			||||||
 | 
					            'uploader': 'kidsune',
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    PAGINATED=8
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _real_extract(self, url):
 | 
					    def _real_extract(self, url):
 | 
				
			||||||
        mobj = re.match(r'(http://)?videos\.toypics\.net/(?P<username>[^/?]+)$', url)
 | 
					        mobj = re.match(self._VALID_URL, url)
 | 
				
			||||||
        if not mobj:
 | 
					        video_id = mobj.group('id')
 | 
				
			||||||
            return self.extract_one(url)
 | 
					        page = self._download_webpage(url, video_id)
 | 
				
			||||||
        return [self.extract_one(u) for u in self.process_paginated(url,
 | 
					 | 
				
			||||||
            r'public/">Public Videos \((?P<videos_count>[0-9]+)\)</a></li>',
 | 
					 | 
				
			||||||
            r'<p class="video-entry-title">\n\s*<a href="(http://videos.toypics.net/view/[^"]+)">'
 | 
					 | 
				
			||||||
        )]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def process_paginated(self, profile_url, re_total, re_video_page):
 | 
					 | 
				
			||||||
        profile_page = self._download_webpage(profile_url, 'profile' , 'getting profile page: '+profile_url)
 | 
					 | 
				
			||||||
        videos_count = self._html_search_regex(re_total, profile_page, 'videos count')
 | 
					 | 
				
			||||||
        lst = []
 | 
					 | 
				
			||||||
        for n in xrange(1,int(ceil(float(videos_count)/self.PAGINATED)) +1):
 | 
					 | 
				
			||||||
            lpage_url = profile_url +'/public/%d'%n
 | 
					 | 
				
			||||||
            lpage = self._download_webpage(lpage_url, 'page %d'%n)
 | 
					 | 
				
			||||||
            lst.extend(re.findall(re_video_page, lpage))
 | 
					 | 
				
			||||||
        return lst
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def extract_one(self,url):
 | 
					 | 
				
			||||||
        mobj = re.match(r'(http://)?videos\.toypics\.net/view/(?P<videoid>[0-9]+)/.*', url)
 | 
					 | 
				
			||||||
        video_id = mobj.group('videoid')
 | 
					 | 
				
			||||||
        page = self._download_webpage(url, video_id, 'getting page: '+url)
 | 
					 | 
				
			||||||
        video_url = self._html_search_regex(
 | 
					        video_url = self._html_search_regex(
 | 
				
			||||||
            r'src:\s+"(http://static[0-9]+\.toypics\.net/flvideo/[^"]+)"', page, 'video URL')
 | 
					            r'src:\s+"(http://static[0-9]+\.toypics\.net/flvideo/[^"]+)"', page, 'video URL')
 | 
				
			||||||
        title = self._html_search_regex(
 | 
					        title = self._html_search_regex(
 | 
				
			||||||
@@ -48,8 +31,46 @@ class ToypicsIE(InfoExtractor):
 | 
				
			|||||||
        return {
 | 
					        return {
 | 
				
			||||||
            'id': video_id,
 | 
					            'id': video_id,
 | 
				
			||||||
            'url': video_url,
 | 
					            'url': video_url,
 | 
				
			||||||
            'ext': video_url[-3:],
 | 
					 | 
				
			||||||
            'title': title,
 | 
					            'title': title,
 | 
				
			||||||
            'uploader': username,
 | 
					            'uploader': username,
 | 
				
			||||||
            'age_limit': 18
 | 
					            'age_limit': 18,
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class ToypicsUserIE(InfoExtractor):
 | 
				
			||||||
 | 
					    IE_DESC = 'Toypics user profile'
 | 
				
			||||||
 | 
					    _VALID_URL = r'http://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _real_extract(self, url):
 | 
				
			||||||
 | 
					        mobj = re.match(self._VALID_URL, url)
 | 
				
			||||||
 | 
					        username = mobj.group('username')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        profile_page = self._download_webpage(
 | 
				
			||||||
 | 
					            url, username, note='Retrieving profile page')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        video_count = int(self._search_regex(
 | 
				
			||||||
 | 
					            r'public/">Public Videos \(([0-9]+)\)</a></li>', profile_page,
 | 
				
			||||||
 | 
					            'video count'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        PAGE_SIZE = 8
 | 
				
			||||||
 | 
					        urls = []
 | 
				
			||||||
 | 
					        page_count = (video_count + PAGE_SIZE + 1) // PAGE_SIZE
 | 
				
			||||||
 | 
					        for n in range(1, page_count + 1):
 | 
				
			||||||
 | 
					            lpage_url = url + '/public/%d' % n
 | 
				
			||||||
 | 
					            lpage = self._download_webpage(
 | 
				
			||||||
 | 
					                lpage_url, username,
 | 
				
			||||||
 | 
					                note='Downloading page %d/%d' % (n, page_count))
 | 
				
			||||||
 | 
					            urls.extend(
 | 
				
			||||||
 | 
					                re.findall(
 | 
				
			||||||
 | 
					                    r'<p class="video-entry-title">\n\s*<a href="(http://videos.toypics.net/view/[^"]+)">',
 | 
				
			||||||
 | 
					                    lpage))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return {
 | 
				
			||||||
 | 
					            '_type': 'playlist',
 | 
				
			||||||
 | 
					            'id': username,
 | 
				
			||||||
 | 
					            'entries': [{
 | 
				
			||||||
 | 
					                '_type': 'url',
 | 
				
			||||||
 | 
					                'url': url,
 | 
				
			||||||
 | 
					                'ie_key': 'Toypics',
 | 
				
			||||||
 | 
					            } for url in urls]
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user