Move Collegehumor IE into its own file

2025-11-05 05:17:07 -05:00 · 2013-06-23 21:10:21 +02:00
parent 153697660d
commit 7beb36a529
2 changed files with 75 additions and 90 deletions
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -22,6 +22,7 @@ from .extractor.ard import ARDIE
 from .extractor.arte import ArteTvIE
 from .extractor.bliptv import BlipTVIE, BlipTVUserIE
 from .extractor.comedycentral import ComedyCentralIE
 from .extractor.collegehumor import CollegeHumorIE
 from .extractor.dailymotion import DailymotionIE
 from .extractor.depositfiles import DepositFilesIE
 from .extractor.escapist import EscapistIE
@@ -41,96 +42,6 @@ from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, Yo
 from .extractor.zdf import ZDFIE
 class CollegeHumorIE(InfoExtractor):
    """Information extractor for collegehumor.com"""
    _WORKING = False
    _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
    IE_NAME = u'collegehumor'
    def report_manifest(self, video_id):
        """Report information extraction."""
        self.to_screen(u'%s: Downloading XML manifest' % video_id)
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        if mobj is None:
            raise ExtractorError(u'Invalid URL: %s' % url)
        video_id = mobj.group('videoid')
        info = {
            'id': video_id,
            'uploader': None,
            'upload_date': None,
        }
        self.report_extraction(video_id)
        xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
        try:
            metaXml = compat_urllib_request.urlopen(xmlUrl).read()
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
            raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
        mdoc = xml.etree.ElementTree.fromstring(metaXml)
        try:
            videoNode = mdoc.findall('./video')[0]
            info['description'] = videoNode.findall('./description')[0].text
            info['title'] = videoNode.findall('./caption')[0].text
            info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
            manifest_url = videoNode.findall('./file')[0].text
        except IndexError:
            raise ExtractorError(u'Invalid metadata XML file')
        manifest_url += '?hdcore=2.10.3'
        self.report_manifest(video_id)
        try:
            manifestXml = compat_urllib_request.urlopen(manifest_url).read()
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
            raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
        adoc = xml.etree.ElementTree.fromstring(manifestXml)
        try:
            media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
            node_id = media_node.attrib['url']
            video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
        except IndexError as err:
            raise ExtractorError(u'Invalid manifest file')
        url_pr = compat_urllib_parse_urlparse(manifest_url)
        url = url_pr.scheme + '://' + url_pr.netloc + '/z' + video_id[:-2] + '/' + node_id + 'Seg1-Frag1'
        info['url'] = url
        info['ext'] = 'f4f'
        return [info]
 class XVideosIE(InfoExtractor):
    """Information extractor for xvideos.com"""
--- a/youtube_dl/extractor/collegehumor.py
+++ b/youtube_dl/extractor/collegehumor.py
@@ -0,0 +1,74 @@
 import re
 import socket
 import xml.etree.ElementTree
 from .common import InfoExtractor
 from ..utils import (
    compat_http_client,
    compat_str,
    compat_urllib_error,
    compat_urllib_parse_urlparse,
    compat_urllib_request,
    ExtractorError,
 )
 class CollegeHumorIE(InfoExtractor):
    _WORKING = False
    _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
    def report_manifest(self, video_id):
        """Report information extraction."""
        self.to_screen(u'%s: Downloading XML manifest' % video_id)
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        if mobj is None:
            raise ExtractorError(u'Invalid URL: %s' % url)
        video_id = mobj.group('videoid')
        info = {
            'id': video_id,
            'uploader': None,
            'upload_date': None,
        }
        self.report_extraction(video_id)
        xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
        try:
            metaXml = compat_urllib_request.urlopen(xmlUrl).read()
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
            raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
        mdoc = xml.etree.ElementTree.fromstring(metaXml)
        try:
            videoNode = mdoc.findall('./video')[0]
            info['description'] = videoNode.findall('./description')[0].text
            info['title'] = videoNode.findall('./caption')[0].text
            info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
            manifest_url = videoNode.findall('./file')[0].text
        except IndexError:
            raise ExtractorError(u'Invalid metadata XML file')
        manifest_url += '?hdcore=2.10.3'
        self.report_manifest(video_id)
        try:
            manifestXml = compat_urllib_request.urlopen(manifest_url).read()
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
            raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
        adoc = xml.etree.ElementTree.fromstring(manifestXml)
        try:
            media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
            node_id = media_node.attrib['url']
            video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
        except IndexError as err:
            raise ExtractorError(u'Invalid manifest file')
        url_pr = compat_urllib_parse_urlparse(manifest_url)
        url = url_pr.scheme + '://' + url_pr.netloc + '/z' + video_id[:-2] + '/' + node_id + 'Seg1-Frag1'
        info['url'] = url
        info['ext'] = 'f4f'
        return [info]