mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-04 03:17:07 -05:00 
			
		
		
		
	Improve geo bypass mechanism
* Rename options to preffixly match with --geo-verification-proxy * Introduce _GEO_COUNTRIES for extractors * Implement faking IP right away for sites with known geo restriction
This commit is contained in:
		@@ -323,10 +323,15 @@ class InfoExtractor(object):
 | 
			
		||||
    _real_extract() methods and define a _VALID_URL regexp.
 | 
			
		||||
    Probably, they should also be added to the list of extractors.
 | 
			
		||||
 | 
			
		||||
    _BYPASS_GEO attribute may be set to False in order to disable
 | 
			
		||||
    _GEO_BYPASS attribute may be set to False in order to disable
 | 
			
		||||
    geo restriction bypass mechanisms for a particular extractor.
 | 
			
		||||
    Though it won't disable explicit geo restriction bypass based on
 | 
			
		||||
    country code provided with geo_bypass_country.
 | 
			
		||||
    country code provided with geo_bypass_country. (experimental)
 | 
			
		||||
 | 
			
		||||
    _GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
 | 
			
		||||
    countries for this extractor. One of these countries will be used by
 | 
			
		||||
    geo restriction bypass mechanism right away in order to bypass
 | 
			
		||||
    geo restriction, of course, if the mechanism is not disabled. (experimental)
 | 
			
		||||
 | 
			
		||||
    Finally, the _WORKING attribute should be set to False for broken IEs
 | 
			
		||||
    in order to warn the users and skip the tests.
 | 
			
		||||
@@ -335,7 +340,8 @@ class InfoExtractor(object):
 | 
			
		||||
    _ready = False
 | 
			
		||||
    _downloader = None
 | 
			
		||||
    _x_forwarded_for_ip = None
 | 
			
		||||
    _BYPASS_GEO = True
 | 
			
		||||
    _GEO_BYPASS = True
 | 
			
		||||
    _GEO_COUNTRIES = None
 | 
			
		||||
    _WORKING = True
 | 
			
		||||
 | 
			
		||||
    def __init__(self, downloader=None):
 | 
			
		||||
@@ -370,14 +376,28 @@ class InfoExtractor(object):
 | 
			
		||||
 | 
			
		||||
    def initialize(self):
 | 
			
		||||
        """Initializes an instance (authentication, etc)."""
 | 
			
		||||
        if not self._x_forwarded_for_ip:
 | 
			
		||||
            country_code = self._downloader.params.get('geo_bypass_country', None)
 | 
			
		||||
            if country_code:
 | 
			
		||||
                self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
 | 
			
		||||
        self.__initialize_geo_bypass()
 | 
			
		||||
        if not self._ready:
 | 
			
		||||
            self._real_initialize()
 | 
			
		||||
            self._ready = True
 | 
			
		||||
 | 
			
		||||
    def __initialize_geo_bypass(self):
 | 
			
		||||
        if not self._x_forwarded_for_ip:
 | 
			
		||||
            country_code = self._downloader.params.get('geo_bypass_country', None)
 | 
			
		||||
            # If there is no explicit country for geo bypass specified and
 | 
			
		||||
            # the extractor is known to be geo restricted let's fake IP
 | 
			
		||||
            # as X-Forwarded-For right away.
 | 
			
		||||
            if (not country_code and
 | 
			
		||||
                    self._GEO_BYPASS and
 | 
			
		||||
                    self._downloader.params.get('geo_bypass', True) and
 | 
			
		||||
                    self._GEO_COUNTRIES):
 | 
			
		||||
                country_code = random.choice(self._GEO_COUNTRIES)
 | 
			
		||||
            if country_code:
 | 
			
		||||
                self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
 | 
			
		||||
                if self._downloader.params.get('verbose', False):
 | 
			
		||||
                    self._downloader.to_stdout(
 | 
			
		||||
                        '[debug] Using fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
 | 
			
		||||
 | 
			
		||||
    def extract(self, url):
 | 
			
		||||
        """Extracts URL information and returns it in list of dicts."""
 | 
			
		||||
        try:
 | 
			
		||||
@@ -389,16 +409,8 @@ class InfoExtractor(object):
 | 
			
		||||
                        ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
 | 
			
		||||
                    return ie_result
 | 
			
		||||
                except GeoRestrictedError as e:
 | 
			
		||||
                    if (not self._downloader.params.get('geo_bypass_country', None) and
 | 
			
		||||
                            self._BYPASS_GEO and
 | 
			
		||||
                            self._downloader.params.get('geo_bypass', True) and
 | 
			
		||||
                            not self._x_forwarded_for_ip and
 | 
			
		||||
                            e.countries):
 | 
			
		||||
                        self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries))
 | 
			
		||||
                        if self._x_forwarded_for_ip:
 | 
			
		||||
                            self.report_warning(
 | 
			
		||||
                                'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
 | 
			
		||||
                            continue
 | 
			
		||||
                    if self.__maybe_fake_ip_and_retry(e.countries):
 | 
			
		||||
                        continue
 | 
			
		||||
                    raise
 | 
			
		||||
        except ExtractorError:
 | 
			
		||||
            raise
 | 
			
		||||
@@ -407,6 +419,19 @@ class InfoExtractor(object):
 | 
			
		||||
        except (KeyError, StopIteration) as e:
 | 
			
		||||
            raise ExtractorError('An extractor error has occurred.', cause=e)
 | 
			
		||||
 | 
			
		||||
    def __maybe_fake_ip_and_retry(self, countries):
 | 
			
		||||
        if (not self._downloader.params.get('geo_bypass_country', None) and
 | 
			
		||||
                self._GEO_BYPASS and
 | 
			
		||||
                self._downloader.params.get('geo_bypass', True) and
 | 
			
		||||
                not self._x_forwarded_for_ip and
 | 
			
		||||
                countries):
 | 
			
		||||
            self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(countries))
 | 
			
		||||
            if self._x_forwarded_for_ip:
 | 
			
		||||
                self.report_warning(
 | 
			
		||||
                    'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
 | 
			
		||||
                return True
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
    def set_downloader(self, downloader):
 | 
			
		||||
        """Sets the downloader for this IE."""
 | 
			
		||||
        self._downloader = downloader
 | 
			
		||||
 
 | 
			
		||||
@@ -20,6 +20,7 @@ from ..utils import (
 | 
			
		||||
class DramaFeverBaseIE(AMPIE):
 | 
			
		||||
    _LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
 | 
			
		||||
    _NETRC_MACHINE = 'dramafever'
 | 
			
		||||
    _GEO_COUNTRIES = ['US', 'CA']
 | 
			
		||||
 | 
			
		||||
    _CONSUMER_SECRET = 'DA59dtVXYLxajktV'
 | 
			
		||||
 | 
			
		||||
@@ -118,7 +119,7 @@ class DramaFeverIE(DramaFeverBaseIE):
 | 
			
		||||
            if isinstance(e.cause, compat_HTTPError):
 | 
			
		||||
                self.raise_geo_restricted(
 | 
			
		||||
                    msg='Currently unavailable in your country',
 | 
			
		||||
                    countries=['US', 'CA'])
 | 
			
		||||
                    countries=self._GEO_COUNTRIES)
 | 
			
		||||
            raise
 | 
			
		||||
 | 
			
		||||
        series_id, episode_number = video_id.split('.')
 | 
			
		||||
 
 | 
			
		||||
@@ -37,6 +37,7 @@ class GoIE(AdobePassIE):
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
 | 
			
		||||
    _GEO_COUNTRIES = ['US']
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
@@ -104,7 +105,7 @@ class GoIE(AdobePassIE):
 | 
			
		||||
                        for error in errors:
 | 
			
		||||
                            if error.get('code') == 1002:
 | 
			
		||||
                                self.raise_geo_restricted(
 | 
			
		||||
                                    error['message'], countries=['US'])
 | 
			
		||||
                                    error['message'], countries=self._GEO_COUNTRIES)
 | 
			
		||||
                        error_message = ', '.join([error['message'] for error in errors])
 | 
			
		||||
                        raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
 | 
			
		||||
                    asset_url += '?' + entitlement['uplynkData']['sessionKey']
 | 
			
		||||
 
 | 
			
		||||
@@ -24,6 +24,7 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
class ITVIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
 | 
			
		||||
    _GEO_COUNTRIES = ['GB']
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
@@ -101,7 +102,8 @@ class ITVIE(InfoExtractor):
 | 
			
		||||
            fault_code = xpath_text(resp_env, './/faultcode')
 | 
			
		||||
            fault_string = xpath_text(resp_env, './/faultstring')
 | 
			
		||||
            if fault_code == 'InvalidGeoRegion':
 | 
			
		||||
                self.raise_geo_restricted(msg=fault_string, countries=['GB'])
 | 
			
		||||
                self.raise_geo_restricted(
 | 
			
		||||
                    msg=fault_string, countries=self._GEO_COUNTRIES)
 | 
			
		||||
            raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string))
 | 
			
		||||
        title = xpath_text(playlist, 'EpisodeTitle', fatal=True)
 | 
			
		||||
        video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
 | 
			
		||||
 
 | 
			
		||||
@@ -14,6 +14,7 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class NRKBaseIE(InfoExtractor):
 | 
			
		||||
    _GEO_COUNTRIES = ['NO']
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
 | 
			
		||||
@@ -93,7 +94,8 @@ class NRKBaseIE(InfoExtractor):
 | 
			
		||||
            # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
 | 
			
		||||
            if 'IsGeoBlocked' in message_type:
 | 
			
		||||
                self.raise_geo_restricted(
 | 
			
		||||
                    msg=MESSAGES.get('ProgramIsGeoBlocked'), countries=['NO'])
 | 
			
		||||
                    msg=MESSAGES.get('ProgramIsGeoBlocked'),
 | 
			
		||||
                    countries=self._GEO_COUNTRIES)
 | 
			
		||||
            raise ExtractorError(
 | 
			
		||||
                '%s said: %s' % (self.IE_NAME, MESSAGES.get(
 | 
			
		||||
                    message_type, message_type)),
 | 
			
		||||
 
 | 
			
		||||
@@ -10,6 +10,7 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
class OnDemandKoreaIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
 | 
			
		||||
    _GEO_COUNTRIES = ['US', 'CA']
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
@@ -36,7 +37,7 @@ class OnDemandKoreaIE(InfoExtractor):
 | 
			
		||||
        if 'msg_block_01.png' in webpage:
 | 
			
		||||
            self.raise_geo_restricted(
 | 
			
		||||
                msg='This content is not available in your region',
 | 
			
		||||
                countries=['US', 'CA'])
 | 
			
		||||
                countries=self._GEO_COUNTRIES)
 | 
			
		||||
 | 
			
		||||
        if 'This video is only available to ODK PLUS members.' in webpage:
 | 
			
		||||
            raise ExtractorError(
 | 
			
		||||
 
 | 
			
		||||
@@ -193,6 +193,8 @@ class PBSIE(InfoExtractor):
 | 
			
		||||
        )
 | 
			
		||||
    ''' % '|'.join(list(zip(*_STATIONS))[0])
 | 
			
		||||
 | 
			
		||||
    _GEO_COUNTRIES = ['US']
 | 
			
		||||
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
 | 
			
		||||
@@ -492,7 +494,8 @@ class PBSIE(InfoExtractor):
 | 
			
		||||
                message = self._ERRORS.get(
 | 
			
		||||
                    redirect_info['http_code'], redirect_info['message'])
 | 
			
		||||
                if redirect_info['http_code'] == 403:
 | 
			
		||||
                    self.raise_geo_restricted(msg=message, countries=['US'])
 | 
			
		||||
                    self.raise_geo_restricted(
 | 
			
		||||
                        msg=message, countries=self._GEO_COUNTRIES)
 | 
			
		||||
                raise ExtractorError(
 | 
			
		||||
                    '%s said: %s' % (self.IE_NAME, message), expected=True)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -14,7 +14,8 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
class SRGSSRIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'(?:https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|srgssr):(?P<bu>srf|rts|rsi|rtr|swi):(?:[^:]+:)?(?P<type>video|audio):(?P<id>[0-9a-f\-]{36}|\d+)'
 | 
			
		||||
    _BYPASS_GEO = False
 | 
			
		||||
    _GEO_BYPASS = False
 | 
			
		||||
    _GEO_COUNTRIES = ['CH']
 | 
			
		||||
 | 
			
		||||
    _ERRORS = {
 | 
			
		||||
        'AGERATING12': 'To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m.',
 | 
			
		||||
@@ -43,7 +44,8 @@ class SRGSSRIE(InfoExtractor):
 | 
			
		||||
        if media_data.get('block') and media_data['block'] in self._ERRORS:
 | 
			
		||||
            message = self._ERRORS[media_data['block']]
 | 
			
		||||
            if media_data['block'] == 'GEOBLOCK':
 | 
			
		||||
                self.raise_geo_restricted(msg=message, countries=['CH'])
 | 
			
		||||
                self.raise_geo_restricted(
 | 
			
		||||
                    msg=message, countries=self._GEO_COUNTRIES)
 | 
			
		||||
            raise ExtractorError(
 | 
			
		||||
                '%s said: %s' % (self.IE_NAME, message), expected=True)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -13,6 +13,7 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SVTBaseIE(InfoExtractor):
 | 
			
		||||
    _GEO_COUNTRIES = ['SE']
 | 
			
		||||
    def _extract_video(self, video_info, video_id):
 | 
			
		||||
        formats = []
 | 
			
		||||
        for vr in video_info['videoReferences']:
 | 
			
		||||
@@ -39,7 +40,8 @@ class SVTBaseIE(InfoExtractor):
 | 
			
		||||
                })
 | 
			
		||||
        if not formats and video_info.get('rights', {}).get('geoBlockedSweden'):
 | 
			
		||||
            self.raise_geo_restricted(
 | 
			
		||||
                'This video is only available in Sweden', countries=['SE'])
 | 
			
		||||
                'This video is only available in Sweden',
 | 
			
		||||
                countries=self._GEO_COUNTRIES)
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        subtitles = {}
 | 
			
		||||
 
 | 
			
		||||
@@ -20,6 +20,7 @@ class Vbox7IE(InfoExtractor):
 | 
			
		||||
                        )
 | 
			
		||||
                        (?P<id>[\da-fA-F]+)
 | 
			
		||||
                    '''
 | 
			
		||||
    _GEO_COUNTRIES = ['BG']
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://vbox7.com/play:0946fff23c',
 | 
			
		||||
        'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
 | 
			
		||||
@@ -78,7 +79,7 @@ class Vbox7IE(InfoExtractor):
 | 
			
		||||
        video_url = video['src']
 | 
			
		||||
 | 
			
		||||
        if '/na.mp4' in video_url:
 | 
			
		||||
            self.raise_geo_restricted(countries=['BG'])
 | 
			
		||||
            self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
 | 
			
		||||
 | 
			
		||||
        uploader = video.get('uploader')
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -14,7 +14,7 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
class VGTVIE(XstreamIE):
 | 
			
		||||
    IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet'
 | 
			
		||||
    _BYPASS_GEO = False
 | 
			
		||||
    _GEO_BYPASS = False
 | 
			
		||||
 | 
			
		||||
    _HOST_TO_APPNAME = {
 | 
			
		||||
        'vgtv.no': 'vgtv',
 | 
			
		||||
@@ -218,7 +218,8 @@ class VGTVIE(XstreamIE):
 | 
			
		||||
            properties = try_get(
 | 
			
		||||
                data, lambda x: x['streamConfiguration']['properties'], list)
 | 
			
		||||
            if properties and 'geoblocked' in properties:
 | 
			
		||||
                raise self.raise_geo_restricted(countries=['NO'])
 | 
			
		||||
                raise self.raise_geo_restricted(
 | 
			
		||||
                    countries=[host.rpartition('.')[-1].partition('/')[0].upper()])
 | 
			
		||||
 | 
			
		||||
        self._sort_formats(info['formats'])
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -27,7 +27,7 @@ class VikiBaseIE(InfoExtractor):
 | 
			
		||||
    _APP_VERSION = '2.2.5.1428709186'
 | 
			
		||||
    _APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)'
 | 
			
		||||
 | 
			
		||||
    _BYPASS_GEO = False
 | 
			
		||||
    _GEO_BYPASS = False
 | 
			
		||||
    _NETRC_MACHINE = 'viki'
 | 
			
		||||
 | 
			
		||||
    _token = None
 | 
			
		||||
 
 | 
			
		||||
@@ -3291,7 +3291,7 @@ class GeoUtils(object):
 | 
			
		||||
        addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
 | 
			
		||||
        addr_max = addr_min | (0xffffffff >> int(preflen))
 | 
			
		||||
        return compat_str(socket.inet_ntoa(
 | 
			
		||||
            compat_struct_pack('!I', random.randint(addr_min, addr_max))))
 | 
			
		||||
            compat_struct_pack('!L', random.randint(addr_min, addr_max))))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user