mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-10-31 16:22:22 -04:00 
			
		
		
		
	[cinchcast] Add new extractor (Fixes #4428)
This commit is contained in:
		| @@ -144,6 +144,9 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011') | ||||
|         self.assertEqual(unified_strdate('1968-12-10'), '19681210') | ||||
|         self.assertEqual(unified_strdate('28/01/2014 21:00:00 +0100'), '20140128') | ||||
|         self.assertEqual( | ||||
|             unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False), | ||||
|             '20141126') | ||||
|  | ||||
|     def test_find_xpath_attr(self): | ||||
|         testxml = '''<root> | ||||
|   | ||||
| @@ -51,6 +51,7 @@ from .cbsnews import CBSNewsIE | ||||
| from .ceskatelevize import CeskaTelevizeIE | ||||
| from .channel9 import Channel9IE | ||||
| from .chilloutzone import ChilloutzoneIE | ||||
| from .cinchcast import CinchcastIE | ||||
| from .clipfish import ClipfishIE | ||||
| from .cliphunter import CliphunterIE | ||||
| from .clipsyndicate import ClipsyndicateIE | ||||
|   | ||||
							
								
								
									
										53
									
								
								youtube_dl/extractor/cinchcast.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								youtube_dl/extractor/cinchcast.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,53 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     unified_strdate, | ||||
|     xpath_text, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CinchcastIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://player\.cinchcast\.com/.*?assetId=(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         # Actual test is run in generic, look for undergroundwellness | ||||
|         'url': 'http://player.cinchcast.com/?platformId=1&assetType=single&assetId=7141703', | ||||
|         'only_matching': True, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         doc = self._download_xml( | ||||
|             'http://www.blogtalkradio.com/playerasset/mrss?assetType=single&assetId=%s' % video_id, | ||||
|             video_id) | ||||
|  | ||||
|         item = doc.find('.//item') | ||||
|         title = xpath_text(item, './title', fatal=True) | ||||
|         date_str = xpath_text( | ||||
|             item, './{http://developer.longtailvideo.com/trac/}date') | ||||
|         upload_date = unified_strdate(date_str, day_first=False) | ||||
|         # duration is present but wrong | ||||
|         formats = [] | ||||
|         formats.append({ | ||||
|             'format_id': 'main', | ||||
|             'url': item.find( | ||||
|                 './{http://search.yahoo.com/mrss/}content').attrib['url'], | ||||
|         }) | ||||
|         backup_url = xpath_text( | ||||
|             item, './{http://developer.longtailvideo.com/trac/}backupContent') | ||||
|         if backup_url: | ||||
|             formats.append({ | ||||
|                 'preference': 2,  # seems to be more reliable | ||||
|                 'format_id': 'backup', | ||||
|                 'url': backup_url, | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'upload_date': upload_date, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -467,8 +467,17 @@ class GenericIE(InfoExtractor): | ||||
|             'expected_warnings': [ | ||||
|                 'URL could be a direct video link, returning it as such.' | ||||
|             ] | ||||
|         } | ||||
|  | ||||
|         }, | ||||
|         # Cinchcast embed | ||||
|         { | ||||
|             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/', | ||||
|             'info_dict': { | ||||
|                 'id': '7141703', | ||||
|                 'ext': 'mp3', | ||||
|                 'upload_date': '20141126', | ||||
|                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing', | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def report_following_redirect(self, new_url): | ||||
| @@ -962,6 +971,13 @@ class GenericIE(InfoExtractor): | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'SBS') | ||||
|  | ||||
|         # Look for embedded Cinchcast player | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1', | ||||
|             webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'Cinchcast') | ||||
|  | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1', | ||||
|             webpage) | ||||
|   | ||||
| @@ -166,7 +166,7 @@ def xpath_text(node, xpath, name=None, fatal=False): | ||||
|         xpath = xpath.encode('ascii') | ||||
|  | ||||
|     n = node.find(xpath) | ||||
|     if n is None: | ||||
|     if n is None or n.text is None: | ||||
|         if fatal: | ||||
|             name = xpath if name is None else name | ||||
|             raise ExtractorError('Could not find XML element %s' % name) | ||||
| @@ -644,17 +644,19 @@ def parse_iso8601(date_str, delimiter='T'): | ||||
|     return calendar.timegm(dt.timetuple()) | ||||
|  | ||||
|  | ||||
| def unified_strdate(date_str): | ||||
| def unified_strdate(date_str, day_first=True): | ||||
|     """Return a string with the date in the format YYYYMMDD""" | ||||
|  | ||||
|     if date_str is None: | ||||
|         return None | ||||
|  | ||||
|     upload_date = None | ||||
|     # Replace commas | ||||
|     date_str = date_str.replace(',', ' ') | ||||
|     # %z (UTC offset) is only supported in python>=3.2 | ||||
|     date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) | ||||
|     # Remove AM/PM + timezone | ||||
|     date_str = re.sub(r'(?i)\s*(?:AM|PM)\s+[A-Z]+', '', date_str) | ||||
|  | ||||
|     format_expressions = [ | ||||
|         '%d %B %Y', | ||||
|         '%d %b %Y', | ||||
| @@ -669,7 +671,6 @@ def unified_strdate(date_str): | ||||
|         '%d/%m/%Y', | ||||
|         '%d/%m/%y', | ||||
|         '%Y/%m/%d %H:%M:%S', | ||||
|         '%d/%m/%Y %H:%M:%S', | ||||
|         '%Y-%m-%d %H:%M:%S', | ||||
|         '%Y-%m-%d %H:%M:%S.%f', | ||||
|         '%d.%m.%Y %H:%M', | ||||
| @@ -681,6 +682,14 @@ def unified_strdate(date_str): | ||||
|         '%Y-%m-%dT%H:%M:%S.%f', | ||||
|         '%Y-%m-%dT%H:%M', | ||||
|     ] | ||||
|     if day_first: | ||||
|         format_expressions.extend([ | ||||
|             '%d/%m/%Y %H:%M:%S', | ||||
|         ]) | ||||
|     else: | ||||
|         format_expressions.extend([ | ||||
|             '%m/%d/%Y %H:%M:%S', | ||||
|         ]) | ||||
|     for expression in format_expressions: | ||||
|         try: | ||||
|             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') | ||||
|   | ||||
		Reference in New Issue
	
	Block a user