mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-04 10:27:07 -05:00 
			
		
		
		
	[cspan] Make ‘www’ optional and improve the regex for extracting the id (fixes #2194)
This commit is contained in:
		@@ -10,7 +10,7 @@ from ..utils import (
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class CSpanIE(InfoExtractor):
 | 
					class CSpanIE(InfoExtractor):
 | 
				
			||||||
    _VALID_URL = r'http://www\.c-spanvideo\.org/program/(.*)'
 | 
					    _VALID_URL = r'http://(?:www\.)?c-spanvideo\.org/program/(?P<name>.*)'
 | 
				
			||||||
    IE_DESC = 'C-SPAN'
 | 
					    IE_DESC = 'C-SPAN'
 | 
				
			||||||
    _TEST = {
 | 
					    _TEST = {
 | 
				
			||||||
        'url': 'http://www.c-spanvideo.org/program/HolderonV',
 | 
					        'url': 'http://www.c-spanvideo.org/program/HolderonV',
 | 
				
			||||||
@@ -24,9 +24,9 @@ class CSpanIE(InfoExtractor):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def _real_extract(self, url):
 | 
					    def _real_extract(self, url):
 | 
				
			||||||
        mobj = re.match(self._VALID_URL, url)
 | 
					        mobj = re.match(self._VALID_URL, url)
 | 
				
			||||||
        prog_name = mobj.group(1)
 | 
					        prog_name = mobj.group('name')
 | 
				
			||||||
        webpage = self._download_webpage(url, prog_name)
 | 
					        webpage = self._download_webpage(url, prog_name)
 | 
				
			||||||
        video_id = self._search_regex(r'programid=(.*?)&', webpage, 'video id')
 | 
					        video_id = self._search_regex(r'prog(?:ram)?id=(.*?)&', webpage, 'video id')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        title = self._html_search_regex(
 | 
					        title = self._html_search_regex(
 | 
				
			||||||
            r'<!-- title -->\n\s*<h1[^>]*>(.*?)</h1>', webpage, 'title')
 | 
					            r'<!-- title -->\n\s*<h1[^>]*>(.*?)</h1>', webpage, 'title')
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user