mirror of
https://gitlab.com/ytdl-org/youtube-dl.git
synced 2026-04-27 00:00:04 -04:00
Compare commits
140 Commits
2018.02.22
...
2018.04.09
| Author | SHA1 | Date | |
|---|---|---|---|
| f7f9757efc | |||
| 880ed89d49 | |||
| 94c3442e6a | |||
| 069937151e | |||
| d3431dcb90 | |||
| 1fc37ca3f1 | |||
| d04ca97616 | |||
| 608c738c7d | |||
| 66b686727b | |||
| 717ea4e14e | |||
| cae5d9705c | |||
| 1c9b1a4494 | |||
| ff826177cc | |||
| 9d15be3a5b | |||
| e2750e1437 | |||
| e944737c59 | |||
| fdfb32a0dd | |||
| 235d828b7b | |||
| 1236ac6b0b | |||
| df146eb282 | |||
| b71bb3ba8b | |||
| fd97fa7bfc | |||
| e8dfecb384 | |||
| 10f9caec04 | |||
| ea6679fbeb | |||
| 3acae1e031 | |||
| 8bd1df3c31 | |||
| 86693c4930 | |||
| d563fb32ba | |||
| e51762be19 | |||
| 03fcde10ce | |||
| 95a1322bc1 | |||
| 0669f8fd8f | |||
| 0b4bbcdcb6 | |||
| 3e78d23b57 | |||
| 190f6c936b | |||
| 02f6ccbce3 | |||
| 5d60b99717 | |||
| 9e6a418015 | |||
| 99c3091850 | |||
| bbd9d8c170 | |||
| c3cfc71a0c | |||
| 671e241bfb | |||
| 29d9594561 | |||
| f0298f653e | |||
| 2ea212628e | |||
| 80aa246094 | |||
| 0ff2c1ecb6 | |||
| 16132cff72 | |||
| 86e1958944 | |||
| b015cb1af3 | |||
| 7d34016fb0 | |||
| b9f5a41207 | |||
| 8b7340a45e | |||
| 1d4a0520ba | |||
| cba5d1b6b3 | |||
| 328ddf56a1 | |||
| 3395958d2b | |||
| a66d1d079a | |||
| c651de39d5 | |||
| d9e2240f7c | |||
| 832f9d5258 | |||
| 21dedcb580 | |||
| 6780154e6b | |||
| 38f59e2793 | |||
| 9a054fcbba | |||
| 6e3f23d912 | |||
| 47a5cb7734 | |||
| e0d198c18d | |||
| 96b8b9abae | |||
| 178ee88319 | |||
| d123960857 | |||
| 3526c3043b | |||
| 8e70c1bfac | |||
| 27b1c73f14 | |||
| 46c6742d4f | |||
| c95dfb0509 | |||
| b8c6badc96 | |||
| b848a4ca1a | |||
| e6e68069f6 | |||
| f3672ac522 | |||
| f226880c6d | |||
| 08250b69c2 | |||
| d116918993 | |||
| 7399ca1f80 | |||
| b4a190fe2a | |||
| cc7f6c720e | |||
| 3a0ceb32e2 | |||
| 7dee417127 | |||
| 5b1d158834 | |||
| a7298f3e99 | |||
| 5d49d879cc | |||
| b5434b5c31 | |||
| 690404a6f8 | |||
| d91dd0ce19 | |||
| 6202f08e1b | |||
| 574e9db2b0 | |||
| 2e25f80d5d | |||
| 64f34528df | |||
| 26ad6bcdfc | |||
| 81dc74966a | |||
| d53b6764d0 | |||
| 62f49dd3b9 | |||
| f9f10268c1 | |||
| f241a97312 | |||
| 86c8cfc555 | |||
| c01db237b5 | |||
| 0093c77032 | |||
| 5616caf852 | |||
| 05a7ffb126 | |||
| 28f21c9501 | |||
| 4c780fbd0a | |||
| 7773a92800 | |||
| b871d7e954 | |||
| 44dc11db61 | |||
| 949faa15e8 | |||
| 0c3e5f4921 | |||
| 266fbd6b73 | |||
| d1b6187012 | |||
| 6ab35f5e16 | |||
| 32ae31847f | |||
| abe8766c35 | |||
| eaa3172672 | |||
| 797c9284d6 | |||
| 8c73ef37b6 | |||
| b5cbe3d652 | |||
| ece12e6348 | |||
| ff274e3c16 | |||
| c106237d56 | |||
| 6e72ea4775 | |||
| d6a0350253 | |||
| ad29ef043e | |||
| f01df14c4f | |||
| 9306b0c8d9 | |||
| f4b7427279 | |||
| 300148b48a | |||
| 2d17c63140 | |||
| f2908d072e | |||
| 5e7841932c | |||
| 870f3bfc63 |
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.02.22*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.02.22**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.04.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.04.09**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2018.02.22
|
||||
[debug] youtube-dl version 2018.04.09
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
@@ -1,3 +1,157 @@
|
||||
version 2018.04.09
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Do not save/restore console title while simulate (#16103)
|
||||
* [extractor/common] Relax JSON-LD context check (#16006)
|
||||
|
||||
Extractors
|
||||
+ [generic] Add support for tube8 embeds
|
||||
+ [generic] Add support for share-videos.se embeds (#16089, #16115)
|
||||
* [odnoklassniki] Extend URL regular expression (#16081)
|
||||
* [steam] Bypass mature content check (#16113)
|
||||
+ [acast] Extract more metadata
|
||||
* [acast] Fix extraction (#16118)
|
||||
* [instagram:user] Fix extraction (#16119)
|
||||
* [drtuber] Fix title extraction (#16107, #16108)
|
||||
* [liveleak] Extend URL regular expression (#16117)
|
||||
+ [openload] Add support for oload.xyz
|
||||
* [openload] Relax stream URL regular expression
|
||||
* [openload] Fix extraction (#16099)
|
||||
+ [svtplay:series] Add support for season URLs
|
||||
+ [svtplay:series] Add support for series (#11130, #16059)
|
||||
|
||||
|
||||
version 2018.04.03
|
||||
|
||||
Extractors
|
||||
+ [tvnow] Add support for shows (#15837)
|
||||
* [dramafever] Fix authentication (#16067)
|
||||
* [afreecatv] Use partial view only when necessary (#14450)
|
||||
+ [afreecatv] Add support for authentication (#14450)
|
||||
+ [nationalgeographic] Add support for new URL schema (#16001, #16054)
|
||||
* [xvideos] Fix thumbnail extraction (#15978, #15979)
|
||||
* [medialaan] Fix vod id (#16038)
|
||||
+ [openload] Add support for oload.site (#16039)
|
||||
* [naver] Fix extraction (#16029)
|
||||
* [dramafever] Partially switch to API v5 (#16026)
|
||||
* [abc:iview] Unescape title and series meta fields (#15994)
|
||||
* [videa] Extend URL regular expression (#16003)
|
||||
|
||||
|
||||
version 2018.03.26.1
|
||||
|
||||
Core
|
||||
+ [downloader/external] Add elapsed time to progress hook (#10876)
|
||||
* [downloader/external,fragment] Fix download finalization when writing file
|
||||
to stdout (#10809, #10876, #15799)
|
||||
|
||||
Extractors
|
||||
* [vrv] Fix extraction on python2 (#15928)
|
||||
* [afreecatv] Update referrer (#15947)
|
||||
+ [24video] Add support for 24video.sexy (#15973)
|
||||
* [crackle] Bypass geo restriction
|
||||
* [crackle] Fix extraction (#15969)
|
||||
+ [lenta] Add support for lenta.ru (#15953)
|
||||
+ [instagram:user] Add pagination (#15934)
|
||||
* [youku] Update ccode (#15939)
|
||||
* [libsyn] Adapt to new page structure
|
||||
|
||||
|
||||
version 2018.03.20
|
||||
|
||||
Core
|
||||
* [extractor/common] Improve thumbnail extraction for HTML5 entries
|
||||
* Generalize XML manifest processing code and improve XSPF parsing
|
||||
+ [extractor/common] Add _download_xml_handle
|
||||
+ [extractor/common] Add support for relative URIs in _parse_xspf (#15794)
|
||||
|
||||
Extractors
|
||||
+ [7plus] Extract series metadata (#15862, #15906)
|
||||
* [9now] Bypass geo restriction (#15920)
|
||||
* [cbs] Skip unavailable assets (#13490, #13506, #15776)
|
||||
+ [canalc2] Add support for HTML5 videos (#15916, #15919)
|
||||
+ [ceskatelevize] Add support for iframe embeds (#15918)
|
||||
+ [prosiebensat1] Add support for galileo.tv (#15894)
|
||||
+ [generic] Add support for xfileshare embeds (#15879)
|
||||
* [bilibili] Switch to v2 playurl API
|
||||
* [bilibili] Fix and improve extraction (#15048, #15430, #15622, #15863)
|
||||
* [heise] Improve extraction (#15496, #15784, #15026)
|
||||
* [instagram] Fix user videos extraction (#15858)
|
||||
|
||||
|
||||
version 2018.03.14
|
||||
|
||||
Extractors
|
||||
* [soundcloud] Update client id (#15866)
|
||||
+ [tennistv] Add support for tennistv.com
|
||||
+ [line] Add support for tv.line.me (#9427)
|
||||
* [xnxx] Fix extraction (#15817)
|
||||
* [njpwworld] Fix authentication (#15815)
|
||||
|
||||
|
||||
version 2018.03.10
|
||||
|
||||
Core
|
||||
* [downloader/hls] Skip uplynk ad fragments (#15748)
|
||||
|
||||
Extractors
|
||||
* [pornhub] Don't override session cookies (#15697)
|
||||
+ [raywenderlich] Add support for videos.raywenderlich.com (#15251)
|
||||
* [funk] Fix extraction and rework extractors (#15792)
|
||||
* [nexx] Restore reverse engineered approach
|
||||
+ [heise] Add support for kaltura embeds (#14961, #15728)
|
||||
+ [tvnow] Extract series metadata (#15774)
|
||||
* [ruutu] Continue formats extraction on NOT-USED URLs (#15775)
|
||||
* [vrtnu] Use redirect URL for building video JSON URL (#15767, #15769)
|
||||
* [vimeo] Modernize login code and improve error messaging
|
||||
* [archiveorg] Fix extraction (#15770, #15772)
|
||||
+ [hidive] Add support for hidive.com (#15494)
|
||||
* [afreecatv] Detect deleted videos
|
||||
* [afreecatv] Fix extraction (#15755)
|
||||
* [vice] Fix extraction and rework extractors (#11101, #13019, #13622, #13778)
|
||||
+ [vidzi] Add support for vidzi.si (#15751)
|
||||
* [npo] Fix typo
|
||||
|
||||
|
||||
version 2018.03.03
|
||||
|
||||
Core
|
||||
+ [utils] Add parse_resolution
|
||||
Revert respect --prefer-insecure while updating
|
||||
|
||||
Extractors
|
||||
+ [yapfiles] Add support for yapfiles.ru (#15726, #11085)
|
||||
* [spankbang] Fix formats extraction (#15727)
|
||||
* [adn] Fix extraction (#15716)
|
||||
+ [toggle] Extract DASH and ISM formats (#15721)
|
||||
+ [nickelodeon] Add support for nickelodeon.com.tr (#15706)
|
||||
* [npo] Validate and filter format URLs (#15709)
|
||||
|
||||
|
||||
version 2018.02.26
|
||||
|
||||
Extractors
|
||||
* [udemy] Use custom User-Agent (#15571)
|
||||
|
||||
|
||||
version 2018.02.25
|
||||
|
||||
Core
|
||||
* [postprocessor/embedthumbnail] Skip embedding when there aren't any
|
||||
thumbnails (#12573)
|
||||
* [extractor/common] Improve jwplayer subtitles extraction (#15695)
|
||||
|
||||
Extractors
|
||||
+ [vidlii] Add support for vidlii.com (#14472, #14512, #14779)
|
||||
+ [streamango] Capture and output error messages
|
||||
* [streamango] Fix extraction (#14160, #14256)
|
||||
+ [telequebec] Add support for emissions (#14649, #14655)
|
||||
+ [telequebec:live] Add support for live streams (#15688)
|
||||
+ [mailru:music] Add support for mail.ru/music (#15618)
|
||||
* [aenetworks] Switch to akamai HLS formats (#15612)
|
||||
* [ytsearch] Fix flat title extraction (#11260, #15681)
|
||||
|
||||
|
||||
version 2018.02.22
|
||||
|
||||
Core
|
||||
|
||||
@@ -223,7 +223,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
|
||||
## Filesystem Options:
|
||||
-a, --batch-file FILE File containing URLs to download ('-' for
|
||||
stdin)
|
||||
stdin), one URL per line. Lines starting
|
||||
with '#', ';' or ']' are considered as
|
||||
comments and ignored.
|
||||
--id Use only video ID in file name
|
||||
-o, --output TEMPLATE Output filename template, see the "OUTPUT
|
||||
TEMPLATE" for all the info
|
||||
@@ -310,7 +312,8 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
--encoding ENCODING Force the specified encoding (experimental)
|
||||
--no-check-certificate Suppress HTTPS certificate validation
|
||||
--prefer-insecure Use an unencrypted connection to retrieve
|
||||
information whenever possible
|
||||
information about the video. (Currently
|
||||
supported only for YouTube)
|
||||
--user-agent UA Specify a custom user agent
|
||||
--referer URL Specify a custom referer, use if the video
|
||||
access is restricted to one domain
|
||||
|
||||
+15
-2
@@ -298,7 +298,8 @@
|
||||
- **freespeech.org**
|
||||
- **FreshLive**
|
||||
- **Funimation**
|
||||
- **Funk**
|
||||
- **FunkChannel**
|
||||
- **FunkMix**
|
||||
- **FunnyOrDie**
|
||||
- **Fusion**
|
||||
- **Fux**
|
||||
@@ -336,6 +337,7 @@
|
||||
- **HentaiStigma**
|
||||
- **hetklokhuis**
|
||||
- **hgtv.com:show**
|
||||
- **HiDive**
|
||||
- **HistoricFilms**
|
||||
- **history:topic**: History.com Topic
|
||||
- **hitbox**
|
||||
@@ -417,6 +419,7 @@
|
||||
- **Lecture2Go**
|
||||
- **LEGO**
|
||||
- **Lemonde**
|
||||
- **Lenta**
|
||||
- **LePlaylist**
|
||||
- **LetvCloud**: 乐视云
|
||||
- **Libsyn**
|
||||
@@ -425,6 +428,7 @@
|
||||
- **limelight**
|
||||
- **limelight:channel**
|
||||
- **limelight:channel_list**
|
||||
- **LineTV**
|
||||
- **LiTV**
|
||||
- **LiveLeak**
|
||||
- **LiveLeakEmbed**
|
||||
@@ -440,6 +444,8 @@
|
||||
- **m6**
|
||||
- **macgamestore**: MacGameStore trailers
|
||||
- **mailru**: Видео@Mail.Ru
|
||||
- **mailru:music**: Музыка@Mail.Ru
|
||||
- **mailru:music:search**: Музыка@Mail.Ru
|
||||
- **MakersChannel**
|
||||
- **MakerTV**
|
||||
- **mangomolo:live**
|
||||
@@ -672,6 +678,7 @@
|
||||
- **RaiPlay**
|
||||
- **RaiPlayLive**
|
||||
- **RaiPlayPlaylist**
|
||||
- **RayWenderlich**
|
||||
- **RBMARadio**
|
||||
- **RDS**: RDS.ca
|
||||
- **RedBullTV**
|
||||
@@ -797,6 +804,7 @@
|
||||
- **SunPorno**
|
||||
- **SVT**
|
||||
- **SVTPlay**: SVT Play and Öppet arkiv
|
||||
- **SVTSeries**
|
||||
- **SWRMediathek**
|
||||
- **Syfy**
|
||||
- **SztvHu**
|
||||
@@ -820,8 +828,11 @@
|
||||
- **Telegraaf**
|
||||
- **TeleMB**
|
||||
- **TeleQuebec**
|
||||
- **TeleQuebecEmission**
|
||||
- **TeleQuebecLive**
|
||||
- **TeleTask**
|
||||
- **Telewebion**
|
||||
- **TennisTV**
|
||||
- **TF1**
|
||||
- **TFO**
|
||||
- **TheIntercept**
|
||||
@@ -877,6 +888,7 @@
|
||||
- **TVNoe**
|
||||
- **TVNow**
|
||||
- **TVNowList**
|
||||
- **TVNowShow**
|
||||
- **tvp**: Telewizja Polska
|
||||
- **tvp:embed**: Telewizja Polska
|
||||
- **tvp:series**
|
||||
@@ -930,7 +942,6 @@
|
||||
- **vice**
|
||||
- **vice:article**
|
||||
- **vice:show**
|
||||
- **Viceland**
|
||||
- **Vidbit**
|
||||
- **Viddler**
|
||||
- **Videa**
|
||||
@@ -946,6 +957,7 @@
|
||||
- **VideoPress**
|
||||
- **videoweed**: VideoWeed
|
||||
- **Vidio**
|
||||
- **VidLii**
|
||||
- **vidme**
|
||||
- **vidme:user**
|
||||
- **vidme:user:likes**
|
||||
@@ -1050,6 +1062,7 @@
|
||||
- **yandexmusic:album**: Яндекс.Музыка - Альбом
|
||||
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
||||
- **yandexmusic:track**: Яндекс.Музыка - Трек
|
||||
- **YapFiles**
|
||||
- **YesJapan**
|
||||
- **yinyuetai:video**: 音悦Tai
|
||||
- **Ynet**
|
||||
|
||||
@@ -694,6 +694,55 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||
self.ie._sort_formats(formats)
|
||||
expect_value(self, formats, expected_formats, None)
|
||||
|
||||
def test_parse_xspf(self):
|
||||
_TEST_CASES = [
|
||||
(
|
||||
'foo_xspf',
|
||||
'https://example.org/src/foo_xspf.xspf',
|
||||
[{
|
||||
'id': 'foo_xspf',
|
||||
'title': 'Pandemonium',
|
||||
'description': 'Visit http://bigbrother404.bandcamp.com',
|
||||
'duration': 202.416,
|
||||
'formats': [{
|
||||
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
|
||||
'url': 'https://example.org/src/cd1/track%201.mp3',
|
||||
}],
|
||||
}, {
|
||||
'id': 'foo_xspf',
|
||||
'title': 'Final Cartridge (Nichico Twelve Remix)',
|
||||
'description': 'Visit http://bigbrother404.bandcamp.com',
|
||||
'duration': 255.857,
|
||||
'formats': [{
|
||||
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
|
||||
'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3',
|
||||
}],
|
||||
}, {
|
||||
'id': 'foo_xspf',
|
||||
'title': 'Rebuilding Nightingale',
|
||||
'description': 'Visit http://bigbrother404.bandcamp.com',
|
||||
'duration': 287.915,
|
||||
'formats': [{
|
||||
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
|
||||
'url': 'https://example.org/src/track3.mp3',
|
||||
}, {
|
||||
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
|
||||
'url': 'https://example.com/track3.mp3',
|
||||
}]
|
||||
}]
|
||||
),
|
||||
]
|
||||
|
||||
for xspf_file, xspf_url, expected_entries in _TEST_CASES:
|
||||
with io.open('./test/testdata/xspf/%s.xspf' % xspf_file,
|
||||
mode='r', encoding='utf-8') as f:
|
||||
entries = self.ie._parse_xspf(
|
||||
compat_etree_fromstring(f.read().encode('utf-8')),
|
||||
xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url)
|
||||
expect_value(self, entries, expected_entries, None)
|
||||
for i in range(len(entries)):
|
||||
expect_dict(self, entries[i], expected_entries[i])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -53,6 +53,7 @@ from youtube_dl.utils import (
|
||||
parse_filesize,
|
||||
parse_count,
|
||||
parse_iso8601,
|
||||
parse_resolution,
|
||||
pkcs1pad,
|
||||
read_batch_urls,
|
||||
sanitize_filename,
|
||||
@@ -351,6 +352,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
|
||||
self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
|
||||
self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
|
||||
self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363)
|
||||
|
||||
def test_determine_ext(self):
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||
@@ -982,6 +984,16 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(parse_count('1.1kk '), 1100000)
|
||||
self.assertEqual(parse_count('1.1kk views'), 1100000)
|
||||
|
||||
def test_parse_resolution(self):
|
||||
self.assertEqual(parse_resolution(None), {})
|
||||
self.assertEqual(parse_resolution(''), {})
|
||||
self.assertEqual(parse_resolution('1920x1080'), {'width': 1920, 'height': 1080})
|
||||
self.assertEqual(parse_resolution('1920×1080'), {'width': 1920, 'height': 1080})
|
||||
self.assertEqual(parse_resolution('1920 x 1080'), {'width': 1920, 'height': 1080})
|
||||
self.assertEqual(parse_resolution('720p'), {'height': 720})
|
||||
self.assertEqual(parse_resolution('4k'), {'height': 2160})
|
||||
self.assertEqual(parse_resolution('8K'), {'height': 4320})
|
||||
|
||||
def test_version_tuple(self):
|
||||
self.assertEqual(version_tuple('1'), (1,))
|
||||
self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
|
||||
|
||||
Vendored
+34
@@ -0,0 +1,34 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<playlist version="1" xmlns="http://xspf.org/ns/0/">
|
||||
<date>2018-03-09T18:01:43Z</date>
|
||||
<trackList>
|
||||
<track>
|
||||
<location>cd1/track%201.mp3</location>
|
||||
<title>Pandemonium</title>
|
||||
<creator>Foilverb</creator>
|
||||
<annotation>Visit http://bigbrother404.bandcamp.com</annotation>
|
||||
<album>Pandemonium EP</album>
|
||||
<trackNum>1</trackNum>
|
||||
<duration>202416</duration>
|
||||
</track>
|
||||
<track>
|
||||
<location>../%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3</location>
|
||||
<title>Final Cartridge (Nichico Twelve Remix)</title>
|
||||
<annotation>Visit http://bigbrother404.bandcamp.com</annotation>
|
||||
<creator>Foilverb</creator>
|
||||
<album>Pandemonium EP</album>
|
||||
<trackNum>2</trackNum>
|
||||
<duration>255857</duration>
|
||||
</track>
|
||||
<track>
|
||||
<location>track3.mp3</location>
|
||||
<location>https://example.com/track3.mp3</location>
|
||||
<title>Rebuilding Nightingale</title>
|
||||
<annotation>Visit http://bigbrother404.bandcamp.com</annotation>
|
||||
<creator>Foilverb</creator>
|
||||
<album>Pandemonium EP</album>
|
||||
<trackNum>3</trackNum>
|
||||
<duration>287915</duration>
|
||||
</track>
|
||||
</trackList>
|
||||
</playlist>
|
||||
@@ -532,6 +532,8 @@ class YoutubeDL(object):
|
||||
def save_console_title(self):
|
||||
if not self.params.get('consoletitle', False):
|
||||
return
|
||||
if self.params.get('simulate', False):
|
||||
return
|
||||
if compat_os_name != 'nt' and 'TERM' in os.environ:
|
||||
# Save the title on stack
|
||||
self._write_string('\033[22;0t', self._screen_file)
|
||||
@@ -539,6 +541,8 @@ class YoutubeDL(object):
|
||||
def restore_console_title(self):
|
||||
if not self.params.get('consoletitle', False):
|
||||
return
|
||||
if self.params.get('simulate', False):
|
||||
return
|
||||
if compat_os_name != 'nt' and 'TERM' in os.environ:
|
||||
# Restore the title from stack
|
||||
self._write_string('\033[23;0t', self._screen_file)
|
||||
|
||||
@@ -438,7 +438,7 @@ def _real_main(argv=None):
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
# Update version
|
||||
if opts.update_self:
|
||||
update_self(ydl.to_screen, opts.verbose, ydl._opener, opts.prefer_insecure)
|
||||
update_self(ydl.to_screen, opts.verbose, ydl._opener)
|
||||
|
||||
# Remove cache dir
|
||||
if opts.rm_cachedir:
|
||||
|
||||
@@ -249,12 +249,13 @@ class FileDownloader(object):
|
||||
if self.params.get('noprogress', False):
|
||||
self.to_screen('[download] Download completed')
|
||||
else:
|
||||
s['_total_bytes_str'] = format_bytes(s['total_bytes'])
|
||||
msg_template = '100%%'
|
||||
if s.get('total_bytes') is not None:
|
||||
s['_total_bytes_str'] = format_bytes(s['total_bytes'])
|
||||
msg_template += ' of %(_total_bytes_str)s'
|
||||
if s.get('elapsed') is not None:
|
||||
s['_elapsed_str'] = self.format_seconds(s['elapsed'])
|
||||
msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s'
|
||||
else:
|
||||
msg_template = '100%% of %(_total_bytes_str)s'
|
||||
msg_template += ' in %(_elapsed_str)s'
|
||||
self._report_progress_status(
|
||||
msg_template % s, is_last_line=True)
|
||||
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os.path
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..compat import (
|
||||
@@ -30,6 +31,7 @@ class ExternalFD(FileDownloader):
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
try:
|
||||
started = time.time()
|
||||
retval = self._call_downloader(tmpfilename, info_dict)
|
||||
except KeyboardInterrupt:
|
||||
if not info_dict.get('is_live'):
|
||||
@@ -41,15 +43,20 @@ class ExternalFD(FileDownloader):
|
||||
self.to_screen('[%s] Interrupted by user' % self.get_basename())
|
||||
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
status = {
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
'elapsed': time.time() - started,
|
||||
}
|
||||
if filename != '-':
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
status.update({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
})
|
||||
self._hook_progress(status)
|
||||
return True
|
||||
else:
|
||||
self.to_stderr('\n')
|
||||
|
||||
@@ -241,12 +241,16 @@ class FragmentFD(FileDownloader):
|
||||
if os.path.isfile(ytdl_filename):
|
||||
os.remove(ytdl_filename)
|
||||
elapsed = time.time() - ctx['started']
|
||||
self.try_rename(ctx['tmpfilename'], ctx['filename'])
|
||||
fsize = os.path.getsize(encodeFilename(ctx['filename']))
|
||||
|
||||
if ctx['tmpfilename'] == '-':
|
||||
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
|
||||
else:
|
||||
self.try_rename(ctx['tmpfilename'], ctx['filename'])
|
||||
downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'downloaded_bytes': downloaded_bytes,
|
||||
'total_bytes': downloaded_bytes,
|
||||
'filename': ctx['filename'],
|
||||
'status': 'finished',
|
||||
'elapsed': elapsed,
|
||||
|
||||
@@ -75,8 +75,9 @@ class HlsFD(FragmentFD):
|
||||
fd.add_progress_hook(ph)
|
||||
return fd.real_download(filename, info_dict)
|
||||
|
||||
def anvato_ad(s):
|
||||
return s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
|
||||
def is_ad_fragment(s):
|
||||
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or
|
||||
s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
|
||||
|
||||
media_frags = 0
|
||||
ad_frags = 0
|
||||
@@ -86,7 +87,7 @@ class HlsFD(FragmentFD):
|
||||
if not line:
|
||||
continue
|
||||
if line.startswith('#'):
|
||||
if anvato_ad(line):
|
||||
if is_ad_fragment(line):
|
||||
ad_frags += 1
|
||||
ad_frag_next = True
|
||||
continue
|
||||
@@ -195,7 +196,7 @@ class HlsFD(FragmentFD):
|
||||
'start': sub_range_start,
|
||||
'end': sub_range_start + int(splitted_byte_range[0]),
|
||||
}
|
||||
elif anvato_ad(line):
|
||||
elif is_ad_fragment(line):
|
||||
ad_frag_next = True
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
@@ -13,6 +13,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
@@ -109,16 +110,17 @@ class ABCIViewIE(InfoExtractor):
|
||||
|
||||
# ABC iview programs are normally available for 14 days only.
|
||||
_TESTS = [{
|
||||
'url': 'http://iview.abc.net.au/programs/call-the-midwife/ZW0898A003S00',
|
||||
'url': 'https://iview.abc.net.au/programs/ben-and-hollys-little-kingdom/ZY9247A021S00',
|
||||
'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
|
||||
'info_dict': {
|
||||
'id': 'ZW0898A003S00',
|
||||
'id': 'ZY9247A021S00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Series 5 Ep 3',
|
||||
'description': 'md5:e0ef7d4f92055b86c4f33611f180ed79',
|
||||
'upload_date': '20171228',
|
||||
'uploader_id': 'abc1',
|
||||
'timestamp': 1514499187,
|
||||
'title': "Gaston's Visit",
|
||||
'series': "Ben And Holly's Little Kingdom",
|
||||
'description': 'md5:18db170ad71cf161e006a4c688e33155',
|
||||
'upload_date': '20180318',
|
||||
'uploader_id': 'abc4kids',
|
||||
'timestamp': 1521400959,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -169,12 +171,12 @@ class ABCIViewIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': unescapeHTML(title),
|
||||
'description': self._html_search_meta(['og:description', 'twitter:description'], webpage),
|
||||
'thumbnail': self._html_search_meta(['og:image', 'twitter:image:src'], webpage),
|
||||
'duration': int_or_none(video_params.get('eventDuration')),
|
||||
'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
|
||||
'series': video_params.get('seriesTitle'),
|
||||
'series': unescapeHTML(video_params.get('seriesTitle')),
|
||||
'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
|
||||
'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage, default=None)),
|
||||
'episode': self._html_search_meta('episode_title', webpage, default=None),
|
||||
|
||||
@@ -66,7 +66,7 @@ class AbcNewsIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
|
||||
'info_dict': {
|
||||
'id': '10498713',
|
||||
'id': '10505354',
|
||||
'ext': 'flv',
|
||||
'display_id': 'dramatic-video-rare-death-job-america',
|
||||
'title': 'Occupational Hazards',
|
||||
@@ -79,7 +79,7 @@ class AbcNewsIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
|
||||
'info_dict': {
|
||||
'id': '39125818',
|
||||
'id': '38897857',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
|
||||
'title': 'Justin Timberlake Drops Hints For Secret Single',
|
||||
|
||||
@@ -7,7 +7,9 @@ import functools
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
OnDemandPagedList,
|
||||
)
|
||||
@@ -24,40 +26,58 @@ class ACastIE(InfoExtractor):
|
||||
'id': '57de3baa-4bb0-487e-9418-2692c1277a34',
|
||||
'ext': 'mp3',
|
||||
'title': '"Where Are You?": Taipei 101, Taiwan',
|
||||
'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
|
||||
'timestamp': 1196172000,
|
||||
'upload_date': '20071127',
|
||||
'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
|
||||
'duration': 211,
|
||||
'creator': 'Concierge',
|
||||
'series': 'Condé Nast Traveler Podcast',
|
||||
'episode': '"Where Are You?": Taipei 101, Taiwan',
|
||||
}
|
||||
}, {
|
||||
# test with multiple blings
|
||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||
'md5': 'e87d5b8516cd04c0d81b6ee1caca28d0',
|
||||
'md5': 'a02393c74f3bdb1801c3ec2695577ce0',
|
||||
'info_dict': {
|
||||
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'ext': 'mp3',
|
||||
'title': '2. Raggarmordet - Röster ur det förflutna',
|
||||
'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4',
|
||||
'timestamp': 1477346700,
|
||||
'upload_date': '20161024',
|
||||
'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4',
|
||||
'duration': 2766,
|
||||
'duration': 2766.602563,
|
||||
'creator': 'Anton Berg & Martin Johnson',
|
||||
'series': 'Spår',
|
||||
'episode': '2. Raggarmordet - Röster ur det förflutna',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel, display_id = re.match(self._VALID_URL, url).groups()
|
||||
s = self._download_json(
|
||||
'https://play-api.acast.com/stitch/%s/%s' % (channel, display_id),
|
||||
display_id)['result']
|
||||
media_url = s['url']
|
||||
cast_data = self._download_json(
|
||||
'https://play-api.acast.com/splash/%s/%s' % (channel, display_id), display_id)
|
||||
e = cast_data['result']['episode']
|
||||
'https://play-api.acast.com/splash/%s/%s' % (channel, display_id),
|
||||
display_id)['result']
|
||||
e = cast_data['episode']
|
||||
title = e['name']
|
||||
return {
|
||||
'id': compat_str(e['id']),
|
||||
'display_id': display_id,
|
||||
'url': e['mediaUrl'],
|
||||
'title': e['name'],
|
||||
'description': e.get('description'),
|
||||
'url': media_url,
|
||||
'title': title,
|
||||
'description': e.get('description') or e.get('summary'),
|
||||
'thumbnail': e.get('image'),
|
||||
'timestamp': unified_timestamp(e.get('publishingDate')),
|
||||
'duration': int_or_none(e.get('duration')),
|
||||
'duration': float_or_none(s.get('duration') or e.get('duration')),
|
||||
'filesize': int_or_none(e.get('contentLength')),
|
||||
'creator': try_get(cast_data, lambda x: x['show']['author'], compat_str),
|
||||
'series': try_get(cast_data, lambda x: x['show']['name'], compat_str),
|
||||
'season_number': int_or_none(e.get('seasonNumber')),
|
||||
'episode': title,
|
||||
'episode_number': int_or_none(e.get('episodeNumber')),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@ class ADNIE(InfoExtractor):
|
||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
|
||||
bytes_to_intlist(b'\x1b\xe0\x29\x61\x38\x94\x24\x00\x12\xbd\xc5\x80\xac\xce\xbe\xb0'),
|
||||
bytes_to_intlist(b'\xc8\x6e\x06\xbc\xbe\xc6\x49\xf5\x88\x0d\xc8\x47\xc4\x27\x0c\x60'),
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
|
||||
))
|
||||
subtitles_json = self._parse_json(
|
||||
@@ -107,15 +107,18 @@ class ADNIE(InfoExtractor):
|
||||
|
||||
options = player_config.get('options') or {}
|
||||
metas = options.get('metas') or {}
|
||||
title = metas.get('title') or video_info['title']
|
||||
links = player_config.get('links') or {}
|
||||
sub_path = player_config.get('subtitles')
|
||||
error = None
|
||||
if not links:
|
||||
links_url = player_config['linksurl']
|
||||
links_url = player_config.get('linksurl') or options['videoUrl']
|
||||
links_data = self._download_json(urljoin(
|
||||
self._BASE_URL, links_url), video_id)
|
||||
links = links_data.get('links') or {}
|
||||
metas = metas or links_data.get('meta') or {}
|
||||
sub_path = sub_path or links_data.get('subtitles')
|
||||
error = links_data.get('error')
|
||||
title = metas.get('title') or video_info['title']
|
||||
|
||||
formats = []
|
||||
for format_id, qualities in links.items():
|
||||
@@ -146,7 +149,7 @@ class ADNIE(InfoExtractor):
|
||||
'description': strip_or_none(metas.get('summary') or video_info.get('resume')),
|
||||
'thumbnail': video_info.get('image'),
|
||||
'formats': formats,
|
||||
'subtitles': self.extract_subtitles(player_config.get('subtitles'), video_id),
|
||||
'subtitles': self.extract_subtitles(sub_path, video_id),
|
||||
'episode': metas.get('subtitle') or video_info.get('videoTitle'),
|
||||
'series': video_info.get('playlistTitle'),
|
||||
}
|
||||
|
||||
@@ -122,7 +122,8 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'assetTypes': 'high_video_s3'
|
||||
'assetTypes': 'high_video_ak',
|
||||
'switch': 'hls_high_ak',
|
||||
}
|
||||
video_id = self._html_search_meta('aetn:VideoID', webpage)
|
||||
media_url = self._search_regex(
|
||||
|
||||
@@ -9,6 +9,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
@@ -28,6 +29,7 @@ class AfreecaTVIE(InfoExtractor):
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_NETRC_MACHINE = 'afreecatv'
|
||||
_TESTS = [{
|
||||
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
|
||||
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
|
||||
@@ -139,22 +141,22 @@ class AfreecaTVIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# adult video
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/26542731',
|
||||
# PARTIAL_ADULT
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/32028439',
|
||||
'info_dict': {
|
||||
'id': '20171001_F1AE1711_196617479_1',
|
||||
'id': '20180327_27901457_202289533_1',
|
||||
'ext': 'mp4',
|
||||
'title': '[생]서아 초심 찾기 방송 (part 1)',
|
||||
'title': '[생]빨개요♥ (part 1)',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'BJ서아',
|
||||
'uploader': '[SA]서아',
|
||||
'uploader_id': 'bjdyrksu',
|
||||
'upload_date': '20171001',
|
||||
'duration': 3600,
|
||||
'age_limit': 18,
|
||||
'upload_date': '20180327',
|
||||
'duration': 3601,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['adult content'],
|
||||
}, {
|
||||
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
|
||||
'only_matching': True,
|
||||
@@ -172,11 +174,60 @@ class AfreecaTVIE(InfoExtractor):
|
||||
video_key['part'] = int(m.group('part'))
|
||||
return video_key
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_form = {
|
||||
'szWork': 'login',
|
||||
'szType': 'json',
|
||||
'szUid': username,
|
||||
'szPassword': password,
|
||||
'isSaveId': 'false',
|
||||
'szScriptVar': 'oLoginRet',
|
||||
'szAction': '',
|
||||
}
|
||||
|
||||
response = self._download_json(
|
||||
'https://login.afreecatv.com/app/LoginAction.php', None,
|
||||
'Logging in', data=urlencode_postdata(login_form))
|
||||
|
||||
_ERRORS = {
|
||||
-4: 'Your account has been suspended due to a violation of our terms and policies.',
|
||||
-5: 'https://member.afreecatv.com/app/user_delete_progress.php',
|
||||
-6: 'https://login.afreecatv.com/membership/changeMember.php',
|
||||
-8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
|
||||
-9: 'https://member.afreecatv.com/app/pop_login_block.php',
|
||||
-11: 'https://login.afreecatv.com/afreeca/second_login.php',
|
||||
-12: 'https://member.afreecatv.com/app/user_security.php',
|
||||
0: 'The username does not exist or you have entered the wrong password.',
|
||||
-1: 'The username does not exist or you have entered the wrong password.',
|
||||
-3: 'You have entered your username/password incorrectly.',
|
||||
-7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
|
||||
-10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
|
||||
-32008: 'You have failed to log in. Please contact our Help Center.',
|
||||
}
|
||||
|
||||
result = int_or_none(response.get('RESULT'))
|
||||
if result != 1:
|
||||
error = _ERRORS.get(result, 'You have failed to log in.')
|
||||
raise ExtractorError(
|
||||
'Unable to login: %s said: %s' % (self.IE_NAME, error),
|
||||
expected=True)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if re.search(r'alert\(["\']This video has been deleted', webpage):
|
||||
raise ExtractorError(
|
||||
'Video %s has been deleted' % video_id, expected=True)
|
||||
|
||||
station_id = self._search_regex(
|
||||
r'nStationNo\s*=\s*(\d+)', webpage, 'station')
|
||||
bbs_id = self._search_regex(
|
||||
@@ -184,26 +235,46 @@ class AfreecaTVIE(InfoExtractor):
|
||||
video_id = self._search_regex(
|
||||
r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id)
|
||||
|
||||
video_xml = self._download_xml(
|
||||
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
|
||||
video_id, headers={
|
||||
'Referer': 'http://vod.afreecatv.com/embed.php',
|
||||
}, query={
|
||||
partial_view = False
|
||||
for _ in range(2):
|
||||
query = {
|
||||
'nTitleNo': video_id,
|
||||
'nStationNo': station_id,
|
||||
'nBbsNo': bbs_id,
|
||||
'partialView': 'SKIP_ADULT',
|
||||
})
|
||||
}
|
||||
if partial_view:
|
||||
query['partialView'] = 'SKIP_ADULT'
|
||||
video_xml = self._download_xml(
|
||||
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
|
||||
video_id, 'Downloading video info XML%s'
|
||||
% (' (skipping adult)' if partial_view else ''),
|
||||
video_id, headers={
|
||||
'Referer': url,
|
||||
}, query=query)
|
||||
|
||||
flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
|
||||
if flag and flag != 'SUCCEED':
|
||||
flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
|
||||
if flag and flag == 'SUCCEED':
|
||||
break
|
||||
if flag == 'PARTIAL_ADULT':
|
||||
self._downloader.report_warning(
|
||||
'In accordance with local laws and regulations, underage users are restricted from watching adult content. '
|
||||
'Only content suitable for all ages will be downloaded. '
|
||||
'Provide account credentials if you wish to download restricted content.')
|
||||
partial_view = True
|
||||
continue
|
||||
elif flag == 'ADULT':
|
||||
error = 'Only users older than 19 are able to watch this video. Provide account credentials to download this content.'
|
||||
else:
|
||||
error = flag
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, flag), expected=True)
|
||||
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unable to download video info')
|
||||
|
||||
video_element = video_xml.findall(compat_xpath('./track/video'))[1]
|
||||
video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
|
||||
if video_element is None or video_element.text is None:
|
||||
raise ExtractorError('Specified AfreecaTV video does not exist',
|
||||
expected=True)
|
||||
raise ExtractorError(
|
||||
'Video %s video does not exist' % video_id, expected=True)
|
||||
|
||||
video_url = video_element.text.strip()
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
'http://archive.org/embed/' + video_id, video_id)
|
||||
jwplayer_playlist = self._parse_json(self._search_regex(
|
||||
r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\);",
|
||||
r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)",
|
||||
webpage, 'jwplayer playlist'), video_id)
|
||||
info = self._parse_jwplayer_data(
|
||||
{'playlist': jwplayer_playlist}, video_id, base_url=url)
|
||||
|
||||
+22
-46
@@ -24,57 +24,30 @@ class ARDMediathekIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ardmediathek.de/tv/Dokumentation-und-Reportage/Ich-liebe-das-Leben-trotzdem/rbb-Fernsehen/Video?documentId=29582122&bcastId=3822114',
|
||||
# available till 26.07.2022
|
||||
'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822',
|
||||
'info_dict': {
|
||||
'id': '29582122',
|
||||
'id': '44726822',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ich liebe das Leben trotzdem',
|
||||
'description': 'md5:45e4c225c72b27993314b31a84a5261c',
|
||||
'duration': 4557,
|
||||
'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?',
|
||||
'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5',
|
||||
'duration': 1740,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}, {
|
||||
'url': 'http://www.ardmediathek.de/tv/Tatort/Tatort-Scheinwelten-H%C3%B6rfassung-Video/Das-Erste/Video?documentId=29522730&bcastId=602916',
|
||||
'md5': 'f4d98b10759ac06c0072bbcd1f0b9e3e',
|
||||
'info_dict': {
|
||||
'id': '29522730',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tatort: Scheinwelten - Hörfassung (Video tgl. ab 20 Uhr)',
|
||||
'description': 'md5:196392e79876d0ac94c94e8cdb2875f1',
|
||||
'duration': 5252,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
|
||||
'md5': '219d94d8980b4f538c7fcb0865eb7f2c',
|
||||
'info_dict': {
|
||||
'id': '28488308',
|
||||
'ext': 'mp3',
|
||||
'title': 'Tod eines Fußballers',
|
||||
'description': 'md5:f6e39f3461f0e1f54bfa48c8875c86ef',
|
||||
'duration': 3240,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
|
||||
'md5': '4e8f00631aac0395fee17368ac0e9867',
|
||||
'info_dict': {
|
||||
'id': '30796318',
|
||||
'ext': 'mp3',
|
||||
'title': 'Vor dem Fest',
|
||||
'description': 'md5:c0c1c8048514deaed2a73b3a60eecacb',
|
||||
'duration': 3287,
|
||||
},
|
||||
'skip': 'Video is no longer available',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_media_info(self, media_info_url, webpage, video_id):
|
||||
@@ -252,20 +225,23 @@ class ARDMediathekIE(InfoExtractor):
|
||||
|
||||
class ARDIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
|
||||
'md5': 'd216c3a86493f9322545e045ddc3eb35',
|
||||
_TESTS = [{
|
||||
# available till 14.02.2019
|
||||
'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
|
||||
'md5': '8e4ec85f31be7c7fc08a26cdbc5a1f49',
|
||||
'info_dict': {
|
||||
'display_id': 'die-story-im-ersten-mission-unter-falscher-flagge',
|
||||
'id': '100',
|
||||
'display_id': 'das-groko-drama-zerlegen-sich-die-volksparteien-video',
|
||||
'id': '102',
|
||||
'ext': 'mp4',
|
||||
'duration': 2600,
|
||||
'title': 'Die Story im Ersten: Mission unter falscher Flagge',
|
||||
'upload_date': '20140804',
|
||||
'duration': 4435.0,
|
||||
'title': 'Das GroKo-Drama: Zerlegen sich die Volksparteien?',
|
||||
'upload_date': '20180214',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
@@ -27,14 +27,14 @@ class BiliBiliIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||
'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e',
|
||||
'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
|
||||
'info_dict': {
|
||||
'id': '1074402',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||
'duration': 308.315,
|
||||
'timestamp': 1398012660,
|
||||
'duration': 308.067,
|
||||
'timestamp': 1398012678,
|
||||
'upload_date': '20140420',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'uploader': '菊子桑',
|
||||
@@ -59,17 +59,38 @@ class BiliBiliIE(InfoExtractor):
|
||||
'url': 'http://www.bilibili.com/video/av8903802/',
|
||||
'info_dict': {
|
||||
'id': '8903802',
|
||||
'ext': 'mp4',
|
||||
'title': '阿滴英文|英文歌分享#6 "Closer',
|
||||
'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
|
||||
'uploader': '阿滴英文',
|
||||
'uploader_id': '65880958',
|
||||
'timestamp': 1488382620,
|
||||
'upload_date': '20170301',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Test metadata only
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '8903802_part1',
|
||||
'ext': 'flv',
|
||||
'title': '阿滴英文|英文歌分享#6 "Closer',
|
||||
'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
|
||||
'uploader': '阿滴英文',
|
||||
'uploader_id': '65880958',
|
||||
'timestamp': 1488382634,
|
||||
'upload_date': '20170301',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Test metadata only
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '8903802_part2',
|
||||
'ext': 'flv',
|
||||
'title': '阿滴英文|英文歌分享#6 "Closer',
|
||||
'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
|
||||
'uploader': '阿滴英文',
|
||||
'uploader_id': '65880958',
|
||||
'timestamp': 1488382634,
|
||||
'upload_date': '20170301',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Test metadata only
|
||||
},
|
||||
}]
|
||||
}]
|
||||
|
||||
_APP_KEY = '84956560bc028eb7'
|
||||
@@ -92,8 +113,12 @@ class BiliBiliIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if 'anime/' not in url:
|
||||
cid = compat_parse_qs(self._search_regex(
|
||||
cid = self._search_regex(
|
||||
r'cid(?:["\']:|=)(\d+)', webpage, 'cid',
|
||||
default=None
|
||||
) or compat_parse_qs(self._search_regex(
|
||||
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
||||
r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)',
|
||||
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
||||
webpage, 'player parameters'))['cid'][0]
|
||||
else:
|
||||
@@ -114,53 +139,66 @@ class BiliBiliIE(InfoExtractor):
|
||||
self._report_error(js)
|
||||
cid = js['result']['cid']
|
||||
|
||||
payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid)
|
||||
sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
|
||||
|
||||
headers = {
|
||||
'Referer': url
|
||||
}
|
||||
headers.update(self.geo_verification_headers())
|
||||
|
||||
video_info = self._download_json(
|
||||
'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign),
|
||||
video_id, note='Downloading video info page',
|
||||
headers=headers)
|
||||
|
||||
if 'durl' not in video_info:
|
||||
self._report_error(video_info)
|
||||
|
||||
entries = []
|
||||
|
||||
for idx, durl in enumerate(video_info['durl']):
|
||||
formats = [{
|
||||
'url': durl['url'],
|
||||
'filesize': int_or_none(durl['size']),
|
||||
}]
|
||||
for backup_url in durl.get('backup_url', []):
|
||||
formats.append({
|
||||
'url': backup_url,
|
||||
# backup URLs have lower priorities
|
||||
'preference': -2 if 'hd.mp4' in backup_url else -3,
|
||||
RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
|
||||
for num, rendition in enumerate(RENDITIONS, start=1):
|
||||
payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition)
|
||||
sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
|
||||
|
||||
video_info = self._download_json(
|
||||
'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign),
|
||||
video_id, note='Downloading video info page',
|
||||
headers=headers, fatal=num == len(RENDITIONS))
|
||||
|
||||
if not video_info:
|
||||
continue
|
||||
|
||||
if 'durl' not in video_info:
|
||||
if num < len(RENDITIONS):
|
||||
continue
|
||||
self._report_error(video_info)
|
||||
|
||||
for idx, durl in enumerate(video_info['durl']):
|
||||
formats = [{
|
||||
'url': durl['url'],
|
||||
'filesize': int_or_none(durl['size']),
|
||||
}]
|
||||
for backup_url in durl.get('backup_url', []):
|
||||
formats.append({
|
||||
'url': backup_url,
|
||||
# backup URLs have lower priorities
|
||||
'preference': -2 if 'hd.mp4' in backup_url else -3,
|
||||
})
|
||||
|
||||
for a_format in formats:
|
||||
a_format.setdefault('http_headers', {}).update({
|
||||
'Referer': url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': '%s_part%s' % (video_id, idx),
|
||||
'duration': float_or_none(durl.get('length'), 1000),
|
||||
'formats': formats,
|
||||
})
|
||||
break
|
||||
|
||||
for a_format in formats:
|
||||
a_format.setdefault('http_headers', {}).update({
|
||||
'Referer': url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': '%s_part%s' % (video_id, idx),
|
||||
'duration': float_or_none(durl.get('length'), 1000),
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
title = self._html_search_regex('<h1[^>]*>([^<]+)</h1>', webpage, 'title')
|
||||
title = self._html_search_regex(
|
||||
('<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||
'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
|
||||
group='title')
|
||||
description = self._html_search_meta('description', webpage)
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None))
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time',
|
||||
default=None) or self._html_search_meta(
|
||||
'uploadDate', webpage, 'timestamp', default=None))
|
||||
thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
|
||||
|
||||
# TODO 'view_count' requires deobfuscating Javascript
|
||||
@@ -174,13 +212,16 @@ class BiliBiliIE(InfoExtractor):
|
||||
}
|
||||
|
||||
uploader_mobj = re.search(
|
||||
r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
|
||||
r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>(?P<name>[^<]+)',
|
||||
webpage)
|
||||
if uploader_mobj:
|
||||
info.update({
|
||||
'uploader': uploader_mobj.group('name'),
|
||||
'uploader_id': uploader_mobj.group('id'),
|
||||
})
|
||||
if not info.get('uploader'):
|
||||
info['uploader'] = self._html_search_meta(
|
||||
'author', webpage, 'uploader', default=None)
|
||||
|
||||
for entry in entries:
|
||||
entry.update(info)
|
||||
|
||||
@@ -31,6 +31,10 @@ class Canalc2IE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
'http://www.canalc2.tv/video/%s' % video_id, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.+?)</h3>',
|
||||
webpage, 'title')
|
||||
|
||||
formats = []
|
||||
for _, video_url in re.findall(r'file\s*=\s*(["\'])(.+?)\1', webpage):
|
||||
if video_url.startswith('rtmp://'):
|
||||
@@ -49,17 +53,21 @@ class Canalc2IE(InfoExtractor):
|
||||
'url': video_url,
|
||||
'format_id': 'http',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.*?)</h3>', webpage, 'title')
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'id=["\']video_duree["\'][^>]*>([^<]+)',
|
||||
webpage, 'duration', fatal=False))
|
||||
if formats:
|
||||
info = {
|
||||
'formats': formats,
|
||||
}
|
||||
else:
|
||||
info = self._parse_html5_media_entries(url, webpage, url)[0]
|
||||
|
||||
return {
|
||||
self._sort_formats(info['formats'])
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
'duration': parse_duration(self._search_regex(
|
||||
r'id=["\']video_duree["\'][^>]*>([^<]+)',
|
||||
webpage, 'duration', fatal=False)),
|
||||
})
|
||||
return info
|
||||
|
||||
@@ -246,7 +246,7 @@ class VrtNUIE(GigyaBaseIE):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?ms)<h1 class="content__heading">(.+?)</h1>',
|
||||
@@ -276,7 +276,7 @@ class VrtNUIE(GigyaBaseIE):
|
||||
webpage, 'release_date', default=None))
|
||||
|
||||
# If there's a ? or a # in the URL, remove them and everything after
|
||||
clean_url = url.split('?')[0].split('#')[0].strip('/')
|
||||
clean_url = urlh.geturl().split('?')[0].split('#')[0].strip('/')
|
||||
securevideo_url = clean_url + '.mssecurevideo.json'
|
||||
|
||||
try:
|
||||
|
||||
@@ -2,6 +2,7 @@ from __future__ import unicode_literals
|
||||
|
||||
from .theplatform import ThePlatformFeedIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
find_xpath_attr,
|
||||
xpath_element,
|
||||
@@ -61,6 +62,7 @@ class CBSIE(CBSBaseIE):
|
||||
asset_types = []
|
||||
subtitles = {}
|
||||
formats = []
|
||||
last_e = None
|
||||
for item in items_data.findall('.//item'):
|
||||
asset_type = xpath_text(item, 'assetType')
|
||||
if not asset_type or asset_type in asset_types:
|
||||
@@ -74,11 +76,17 @@ class CBSIE(CBSBaseIE):
|
||||
query['formats'] = 'MPEG4,M3U'
|
||||
elif asset_type in ('RTMP', 'WIFI', '3G'):
|
||||
query['formats'] = 'MPEG4,FLV'
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
update_url_query(tp_release_url, query), content_id,
|
||||
'Downloading %s SMIL data' % asset_type)
|
||||
try:
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
update_url_query(tp_release_url, query), content_id,
|
||||
'Downloading %s SMIL data' % asset_type)
|
||||
except ExtractorError as e:
|
||||
last_e = e
|
||||
continue
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
if last_e and not formats:
|
||||
raise last_e
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = self._extract_theplatform_metadata(tp_path, content_id)
|
||||
|
||||
@@ -13,6 +13,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
USER_AGENTS,
|
||||
)
|
||||
@@ -265,6 +266,10 @@ class CeskaTelevizePoradyIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# iframe embed
|
||||
'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -272,8 +277,11 @@ class CeskaTelevizePoradyIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data_url = unescapeHTML(self._search_regex(
|
||||
r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'iframe player url', group='url'))
|
||||
data_url = update_url_query(unescapeHTML(self._search_regex(
|
||||
(r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
|
||||
webpage, 'iframe player url', group='url')), query={
|
||||
'autoStart': 'true',
|
||||
})
|
||||
|
||||
return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())
|
||||
|
||||
@@ -644,19 +644,31 @@ class InfoExtractor(object):
|
||||
content, _ = res
|
||||
return content
|
||||
|
||||
def _download_xml_handle(
|
||||
self, url_or_request, video_id, note='Downloading XML',
|
||||
errnote='Unable to download XML', transform_source=None,
|
||||
fatal=True, encoding=None, data=None, headers={}, query={}):
|
||||
"""Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle)"""
|
||||
res = self._download_webpage_handle(
|
||||
url_or_request, video_id, note, errnote, fatal=fatal,
|
||||
encoding=encoding, data=data, headers=headers, query=query)
|
||||
if res is False:
|
||||
return res
|
||||
xml_string, urlh = res
|
||||
return self._parse_xml(
|
||||
xml_string, video_id, transform_source=transform_source,
|
||||
fatal=fatal), urlh
|
||||
|
||||
def _download_xml(self, url_or_request, video_id,
|
||||
note='Downloading XML', errnote='Unable to download XML',
|
||||
transform_source=None, fatal=True, encoding=None,
|
||||
data=None, headers={}, query={}):
|
||||
"""Return the xml as an xml.etree.ElementTree.Element"""
|
||||
xml_string = self._download_webpage(
|
||||
url_or_request, video_id, note, errnote, fatal=fatal,
|
||||
encoding=encoding, data=data, headers=headers, query=query)
|
||||
if xml_string is False:
|
||||
return xml_string
|
||||
return self._parse_xml(
|
||||
xml_string, video_id, transform_source=transform_source,
|
||||
fatal=fatal)
|
||||
res = self._download_xml_handle(
|
||||
url_or_request, video_id, note=note, errnote=errnote,
|
||||
transform_source=transform_source, fatal=fatal, encoding=encoding,
|
||||
data=data, headers=headers, query=query)
|
||||
return res if res is False else res[0]
|
||||
|
||||
def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
|
||||
if transform_source:
|
||||
@@ -1013,7 +1025,7 @@ class InfoExtractor(object):
|
||||
})
|
||||
|
||||
for e in json_ld:
|
||||
if e.get('@context') == 'http://schema.org':
|
||||
if isinstance(e.get('@context'), compat_str) and re.match(r'^https?://schema.org/?$', e.get('@context')):
|
||||
item_type = e.get('@type')
|
||||
if expected_type is not None and expected_type != item_type:
|
||||
return info
|
||||
@@ -1694,22 +1706,24 @@ class InfoExtractor(object):
|
||||
})
|
||||
return subtitles
|
||||
|
||||
def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True):
|
||||
def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True):
|
||||
xspf = self._download_xml(
|
||||
playlist_url, playlist_id, 'Downloading xpsf playlist',
|
||||
xspf_url, playlist_id, 'Downloading xpsf playlist',
|
||||
'Unable to download xspf manifest', fatal=fatal)
|
||||
if xspf is False:
|
||||
return []
|
||||
return self._parse_xspf(xspf, playlist_id)
|
||||
return self._parse_xspf(
|
||||
xspf, playlist_id, xspf_url=xspf_url,
|
||||
xspf_base_url=base_url(xspf_url))
|
||||
|
||||
def _parse_xspf(self, playlist, playlist_id):
|
||||
def _parse_xspf(self, xspf_doc, playlist_id, xspf_url=None, xspf_base_url=None):
|
||||
NS_MAP = {
|
||||
'xspf': 'http://xspf.org/ns/0/',
|
||||
's1': 'http://static.streamone.nl/player/ns/0',
|
||||
}
|
||||
|
||||
entries = []
|
||||
for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
|
||||
for track in xspf_doc.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
|
||||
title = xpath_text(
|
||||
track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
|
||||
description = xpath_text(
|
||||
@@ -1719,12 +1733,18 @@ class InfoExtractor(object):
|
||||
duration = float_or_none(
|
||||
xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
|
||||
|
||||
formats = [{
|
||||
'url': location.text,
|
||||
'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
|
||||
'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
|
||||
'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
|
||||
} for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))]
|
||||
formats = []
|
||||
for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP)):
|
||||
format_url = urljoin(xspf_base_url, location.text)
|
||||
if not format_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'manifest_url': xspf_url,
|
||||
'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
|
||||
'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
|
||||
'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
@@ -1738,18 +1758,18 @@ class InfoExtractor(object):
|
||||
return entries
|
||||
|
||||
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}):
|
||||
res = self._download_webpage_handle(
|
||||
res = self._download_xml_handle(
|
||||
mpd_url, video_id,
|
||||
note=note or 'Downloading MPD manifest',
|
||||
errnote=errnote or 'Failed to download MPD manifest',
|
||||
fatal=fatal)
|
||||
if res is False:
|
||||
return []
|
||||
mpd, urlh = res
|
||||
mpd_doc, urlh = res
|
||||
mpd_base_url = base_url(urlh.geturl())
|
||||
|
||||
return self._parse_mpd_formats(
|
||||
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url,
|
||||
mpd_doc, mpd_id=mpd_id, mpd_base_url=mpd_base_url,
|
||||
formats_dict=formats_dict, mpd_url=mpd_url)
|
||||
|
||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
|
||||
@@ -2023,17 +2043,16 @@ class InfoExtractor(object):
|
||||
return formats
|
||||
|
||||
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
|
||||
res = self._download_webpage_handle(
|
||||
res = self._download_xml_handle(
|
||||
ism_url, video_id,
|
||||
note=note or 'Downloading ISM manifest',
|
||||
errnote=errnote or 'Failed to download ISM manifest',
|
||||
fatal=fatal)
|
||||
if res is False:
|
||||
return []
|
||||
ism, urlh = res
|
||||
ism_doc, urlh = res
|
||||
|
||||
return self._parse_ism_formats(
|
||||
compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id)
|
||||
return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
|
||||
|
||||
def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
|
||||
"""
|
||||
@@ -2131,8 +2150,8 @@ class InfoExtractor(object):
|
||||
return formats
|
||||
|
||||
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None):
|
||||
def absolute_url(video_url):
|
||||
return compat_urlparse.urljoin(base_url, video_url)
|
||||
def absolute_url(item_url):
|
||||
return urljoin(base_url, item_url)
|
||||
|
||||
def parse_content_type(content_type):
|
||||
if not content_type:
|
||||
@@ -2189,7 +2208,7 @@ class InfoExtractor(object):
|
||||
if src:
|
||||
_, formats = _media_formats(src, media_type)
|
||||
media_info['formats'].extend(formats)
|
||||
media_info['thumbnail'] = media_attributes.get('poster')
|
||||
media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
|
||||
if media_content:
|
||||
for source_tag in re.findall(r'<source[^>]+>', media_content):
|
||||
source_attributes = extract_attributes(source_tag)
|
||||
@@ -2353,7 +2372,10 @@ class InfoExtractor(object):
|
||||
for track in tracks:
|
||||
if not isinstance(track, dict):
|
||||
continue
|
||||
if track.get('kind') != 'captions':
|
||||
track_kind = track.get('kind')
|
||||
if not track_kind or not isinstance(track_kind, compat_str):
|
||||
continue
|
||||
if track_kind.lower() not in ('captions', 'subtitles'):
|
||||
continue
|
||||
track_url = urljoin(base_url, track.get('file'))
|
||||
if not track_url:
|
||||
|
||||
+140
-117
@@ -1,31 +1,45 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals, division
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
|
||||
class CrackleIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['US']
|
||||
_VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934',
|
||||
# geo restricted to CA
|
||||
'url': 'https://www.crackle.com/andromeda/2502343',
|
||||
'info_dict': {
|
||||
'id': '2498934',
|
||||
'id': '2502343',
|
||||
'ext': 'mp4',
|
||||
'title': 'Everybody Respects A Bloody Nose',
|
||||
'description': 'Jerry is kaffeeklatsching in L.A. with funnyman J.B. Smoove (Saturday Night Live, Real Husbands of Hollywood). They’re headed for brew at 10 Speed Coffee in a 1964 Studebaker Avanti.',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 906,
|
||||
'series': 'Comedians In Cars Getting Coffee',
|
||||
'season_number': 8,
|
||||
'episode_number': 4,
|
||||
'subtitles': {
|
||||
'en-US': [
|
||||
{'ext': 'vtt'},
|
||||
{'ext': 'tt'},
|
||||
]
|
||||
},
|
||||
'title': 'Under The Night',
|
||||
'description': 'md5:d2b8ca816579ae8a7bf28bfff8cefc8a',
|
||||
'duration': 2583,
|
||||
'view_count': int,
|
||||
'average_rating': 0,
|
||||
'age_limit': 14,
|
||||
'genre': 'Action, Sci-Fi',
|
||||
'creator': 'Allan Kroeker',
|
||||
'artist': 'Keith Hamilton Cobb, Kevin Sorbo, Lisa Ryder, Lexa Doig, Robert Hewitt Wolfe',
|
||||
'release_year': 2000,
|
||||
'series': 'Andromeda',
|
||||
'episode': 'Under The Night',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@@ -33,109 +47,118 @@ class CrackleIE(InfoExtractor):
|
||||
}
|
||||
}
|
||||
|
||||
_THUMBNAIL_RES = [
|
||||
(120, 90),
|
||||
(208, 156),
|
||||
(220, 124),
|
||||
(220, 220),
|
||||
(240, 180),
|
||||
(250, 141),
|
||||
(315, 236),
|
||||
(320, 180),
|
||||
(360, 203),
|
||||
(400, 300),
|
||||
(421, 316),
|
||||
(460, 330),
|
||||
(460, 460),
|
||||
(462, 260),
|
||||
(480, 270),
|
||||
(587, 330),
|
||||
(640, 480),
|
||||
(700, 330),
|
||||
(700, 394),
|
||||
(854, 480),
|
||||
(1024, 1024),
|
||||
(1920, 1080),
|
||||
]
|
||||
|
||||
# extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx
|
||||
_MEDIA_FILE_SLOTS = {
|
||||
'c544.flv': {
|
||||
'width': 544,
|
||||
'height': 306,
|
||||
},
|
||||
'360p.mp4': {
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
},
|
||||
'480p.mp4': {
|
||||
'width': 852,
|
||||
'height': 478,
|
||||
},
|
||||
'480p_1mbps.mp4': {
|
||||
'width': 852,
|
||||
'height': 478,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
config_doc = self._download_xml(
|
||||
'http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx?site=16',
|
||||
video_id, 'Downloading config')
|
||||
country_code = self._downloader.params.get('geo_bypass_country', None)
|
||||
countries = [country_code] if country_code else (
|
||||
'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI')
|
||||
|
||||
item = self._download_xml(
|
||||
'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id,
|
||||
video_id, headers=self.geo_verification_headers()).find('i')
|
||||
title = item.attrib['t']
|
||||
last_e = None
|
||||
|
||||
subtitles = {}
|
||||
formats = self._extract_m3u8_formats(
|
||||
'http://content.uplynk.com/ext/%s/%s.m3u8' % (config_doc.attrib['strUplynkOwnerId'], video_id),
|
||||
video_id, 'mp4', m3u8_id='hls', fatal=None)
|
||||
thumbnails = []
|
||||
path = item.attrib.get('p')
|
||||
if path:
|
||||
for width, height in self._THUMBNAIL_RES:
|
||||
res = '%dx%d' % (width, height)
|
||||
thumbnails.append({
|
||||
'id': res,
|
||||
'url': 'http://images-us-am.crackle.com/%stnl_%s.jpg' % (path, res),
|
||||
'width': width,
|
||||
'height': height,
|
||||
'resolution': res,
|
||||
})
|
||||
http_base_url = 'http://ahttp.crackle.com/' + path
|
||||
for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items():
|
||||
formats.append({
|
||||
'url': http_base_url + mfs_path,
|
||||
'format_id': 'http-' + mfs_path.split('.')[0],
|
||||
'width': mfs_info['width'],
|
||||
'height': mfs_info['height'],
|
||||
})
|
||||
for cc in item.findall('cc'):
|
||||
locale = cc.attrib.get('l')
|
||||
v = cc.attrib.get('v')
|
||||
if locale and v:
|
||||
if locale not in subtitles:
|
||||
subtitles[locale] = []
|
||||
for url_ext, ext in (('vtt', 'vtt'), ('xml', 'tt')):
|
||||
subtitles.setdefault(locale, []).append({
|
||||
'url': '%s/%s%s_%s.%s' % (config_doc.attrib['strSubtitleServer'], path, locale, v, url_ext),
|
||||
'ext': ext,
|
||||
})
|
||||
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
|
||||
for country in countries:
|
||||
try:
|
||||
media = self._download_json(
|
||||
'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s'
|
||||
% (video_id, country), video_id,
|
||||
'Downloading media JSON as %s' % country,
|
||||
'Unable to download media JSON', query={
|
||||
'disableProtocols': 'true',
|
||||
'format': 'json'
|
||||
})
|
||||
except ExtractorError as e:
|
||||
# 401 means geo restriction, trying next country
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
last_e = e
|
||||
continue
|
||||
raise
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': item.attrib.get('d'),
|
||||
'duration': int(item.attrib.get('r'), 16) / 1000 if item.attrib.get('r') else None,
|
||||
'series': item.attrib.get('sn'),
|
||||
'season_number': int_or_none(item.attrib.get('se')),
|
||||
'episode_number': int_or_none(item.attrib.get('ep')),
|
||||
'thumbnails': thumbnails,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
media_urls = media.get('MediaURLs')
|
||||
if not media_urls or not isinstance(media_urls, list):
|
||||
continue
|
||||
|
||||
title = media['Title']
|
||||
|
||||
formats = []
|
||||
for e in media['MediaURLs']:
|
||||
if e.get('UseDRM') is True:
|
||||
continue
|
||||
format_url = e.get('Path')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = media.get('Description')
|
||||
duration = int_or_none(media.get(
|
||||
'DurationInSeconds')) or parse_duration(media.get('Duration'))
|
||||
view_count = int_or_none(media.get('CountViews'))
|
||||
average_rating = float_or_none(media.get('UserRating'))
|
||||
age_limit = parse_age_limit(media.get('Rating'))
|
||||
genre = media.get('Genre')
|
||||
release_year = int_or_none(media.get('ReleaseYear'))
|
||||
creator = media.get('Directors')
|
||||
artist = media.get('Cast')
|
||||
|
||||
if media.get('MediaTypeDisplayValue') == 'Full Episode':
|
||||
series = media.get('ShowName')
|
||||
episode = title
|
||||
season_number = int_or_none(media.get('Season'))
|
||||
episode_number = int_or_none(media.get('Episode'))
|
||||
else:
|
||||
series = episode = season_number = episode_number = None
|
||||
|
||||
subtitles = {}
|
||||
cc_files = media.get('ClosedCaptionFiles')
|
||||
if isinstance(cc_files, list):
|
||||
for cc_file in cc_files:
|
||||
if not isinstance(cc_file, dict):
|
||||
continue
|
||||
cc_url = cc_file.get('Path')
|
||||
if not cc_url or not isinstance(cc_url, compat_str):
|
||||
continue
|
||||
lang = cc_file.get('Locale') or 'en'
|
||||
subtitles.setdefault(lang, []).append({'url': cc_url})
|
||||
|
||||
thumbnails = []
|
||||
images = media.get('Images')
|
||||
if isinstance(images, list):
|
||||
for image_key, image_url in images.items():
|
||||
mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key)
|
||||
if not mobj:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': image_url,
|
||||
'width': int(mobj.group(1)),
|
||||
'height': int(mobj.group(2)),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'average_rating': average_rating,
|
||||
'age_limit': age_limit,
|
||||
'genre': genre,
|
||||
'creator': creator,
|
||||
'artist': artist,
|
||||
'release_year': release_year,
|
||||
'series': series,
|
||||
'episode': episode,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'thumbnails': thumbnails,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
raise last_e
|
||||
|
||||
@@ -2,26 +2,26 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import json
|
||||
|
||||
from .amp import AMPIE
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
remove_end,
|
||||
sanitized_Request,
|
||||
urlencode_postdata
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class DramaFeverBaseIE(AMPIE):
|
||||
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
|
||||
class DramaFeverBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'dramafever'
|
||||
_GEO_COUNTRIES = ['US', 'CA']
|
||||
|
||||
_CONSUMER_SECRET = 'DA59dtVXYLxajktV'
|
||||
|
||||
@@ -38,8 +38,8 @@ class DramaFeverBaseIE(AMPIE):
|
||||
'consumer secret', default=self._CONSUMER_SECRET)
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
self._consumer_secret = self._get_consumer_secret()
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
@@ -51,37 +51,49 @@ class DramaFeverBaseIE(AMPIE):
|
||||
'password': password,
|
||||
}
|
||||
|
||||
request = sanitized_Request(
|
||||
self._LOGIN_URL, urlencode_postdata(login_form))
|
||||
response = self._download_webpage(
|
||||
request, None, 'Logging in')
|
||||
try:
|
||||
response = self._download_json(
|
||||
'https://www.dramafever.com/api/users/login', None, 'Logging in',
|
||||
data=json.dumps(login_form).encode('utf-8'), headers={
|
||||
'x-consumer-key': self._consumer_secret,
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (403, 404):
|
||||
response = self._parse_json(
|
||||
e.cause.read().decode('utf-8'), None)
|
||||
else:
|
||||
raise
|
||||
|
||||
if all(logout_pattern not in response
|
||||
for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
|
||||
error = self._html_search_regex(
|
||||
r'(?s)<h\d[^>]+\bclass="hidden-xs prompt"[^>]*>(.+?)</h\d',
|
||||
response, 'error message', default=None)
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
# Successful login
|
||||
if response.get('result') or response.get('guid') or response.get('user_guid'):
|
||||
return
|
||||
|
||||
errors = response.get('errors')
|
||||
if errors and isinstance(errors, list):
|
||||
error = errors[0]
|
||||
message = error.get('message') or error['reason']
|
||||
raise ExtractorError('Unable to login: %s' % message, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
|
||||
class DramaFeverIE(DramaFeverBaseIE):
|
||||
IE_NAME = 'dramafever'
|
||||
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',
|
||||
'url': 'https://www.dramafever.com/drama/4274/1/Heirs/',
|
||||
'info_dict': {
|
||||
'id': '4512.1',
|
||||
'ext': 'flv',
|
||||
'title': 'Cooking with Shin',
|
||||
'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0',
|
||||
'id': '4274.1',
|
||||
'ext': 'wvm',
|
||||
'title': 'Heirs - Episode 1',
|
||||
'description': 'md5:362a24ba18209f6276e032a651c50bc2',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 3783,
|
||||
'timestamp': 1381354993,
|
||||
'upload_date': '20131009',
|
||||
'series': 'Heirs',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'timestamp': 1404336058,
|
||||
'upload_date': '20140702',
|
||||
'duration': 344,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@@ -110,50 +122,95 @@ class DramaFeverIE(DramaFeverBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _call_api(self, path, video_id, note, fatal=False):
|
||||
return self._download_json(
|
||||
'https://www.dramafever.com/api/5/' + path,
|
||||
video_id, note=note, headers={
|
||||
'x-consumer-key': self._consumer_secret,
|
||||
}, fatal=fatal)
|
||||
|
||||
def _get_subtitles(self, video_id):
|
||||
subtitles = {}
|
||||
subs = self._call_api(
|
||||
'video/%s/subtitles/webvtt/' % video_id, video_id,
|
||||
'Downloading subtitles JSON', fatal=False)
|
||||
if not subs or not isinstance(subs, list):
|
||||
return subtitles
|
||||
for sub in subs:
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
sub_url = sub.get('url')
|
||||
if not sub_url or not isinstance(sub_url, compat_str):
|
||||
continue
|
||||
subtitles.setdefault(
|
||||
sub.get('code') or sub.get('language') or 'en', []).append({
|
||||
'url': sub_url
|
||||
})
|
||||
return subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url).replace('/', '.')
|
||||
|
||||
try:
|
||||
info = self._extract_feed_info(
|
||||
'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
self.raise_geo_restricted(
|
||||
msg='Currently unavailable in your country',
|
||||
countries=self._GEO_COUNTRIES)
|
||||
raise
|
||||
|
||||
# title is postfixed with video id for some reason, removing
|
||||
if info.get('title'):
|
||||
info['title'] = remove_end(info['title'], video_id).strip()
|
||||
|
||||
series_id, episode_number = video_id.split('.')
|
||||
episode_info = self._download_json(
|
||||
# We only need a single episode info, so restricting page size to one episode
|
||||
# and dealing with page number as with episode number
|
||||
r'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_number=%s&page_size=1'
|
||||
% (self._consumer_secret, series_id, episode_number),
|
||||
video_id, 'Downloading episode info JSON', fatal=False)
|
||||
if episode_info:
|
||||
value = episode_info.get('value')
|
||||
if isinstance(value, list):
|
||||
for v in value:
|
||||
if v.get('type') == 'Episode':
|
||||
subfile = v.get('subfile') or v.get('new_subfile')
|
||||
if subfile and subfile != 'http://www.dramafever.com/st/':
|
||||
info.setdefault('subtitles', {}).setdefault('English', []).append({
|
||||
'ext': 'srt',
|
||||
'url': subfile,
|
||||
})
|
||||
episode_number = int_or_none(v.get('number'))
|
||||
episode_fallback = 'Episode'
|
||||
if episode_number:
|
||||
episode_fallback += ' %d' % episode_number
|
||||
info['episode'] = v.get('title') or episode_fallback
|
||||
info['episode_number'] = episode_number
|
||||
break
|
||||
|
||||
return info
|
||||
video = self._call_api(
|
||||
'series/%s/episodes/%s/' % (series_id, episode_number), video_id,
|
||||
'Downloading video JSON')
|
||||
|
||||
formats = []
|
||||
download_assets = video.get('download_assets')
|
||||
if download_assets and isinstance(download_assets, dict):
|
||||
for format_id, format_dict in download_assets.items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
format_url = format_dict.get('url')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
continue
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
'filesize': int_or_none(video.get('filesize')),
|
||||
})
|
||||
|
||||
stream = self._call_api(
|
||||
'video/%s/stream/' % video_id, video_id, 'Downloading stream JSON',
|
||||
fatal=False)
|
||||
if stream:
|
||||
stream_url = stream.get('stream_url')
|
||||
if stream_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = video.get('title') or 'Episode %s' % episode_number
|
||||
description = video.get('description')
|
||||
thumbnail = video.get('thumbnail')
|
||||
timestamp = unified_timestamp(video.get('release_date'))
|
||||
duration = parse_duration(video.get('duration'))
|
||||
age_limit = parse_age_limit(video.get('tv_rating'))
|
||||
series = video.get('series_title')
|
||||
season_number = int_or_none(video.get('season'))
|
||||
|
||||
if series:
|
||||
title = '%s - %s' % (series, title)
|
||||
|
||||
subtitles = self.extract_subtitles(video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'age_limit': age_limit,
|
||||
'series': series,
|
||||
'season_number': season_number,
|
||||
'episode_number': int_or_none(episode_number),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class DramaFeverSeriesIE(DramaFeverBaseIE):
|
||||
|
||||
@@ -66,7 +66,9 @@ class DrTuberIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
|
||||
(r'<h1[^>]+class=["\']title[^>]+>([^<]+)',
|
||||
r'<title>([^<]+)\s*@\s+DrTuber',
|
||||
r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
|
||||
r'<p[^>]+class="title_substrate">([^<]+)</p>',
|
||||
r'<title>([^<]+) - \d+'),
|
||||
webpage, 'title')
|
||||
|
||||
@@ -385,7 +385,10 @@ from .freesound import FreesoundIE
|
||||
from .freespeech import FreespeechIE
|
||||
from .freshlive import FreshLiveIE
|
||||
from .funimation import FunimationIE
|
||||
from .funk import FunkIE
|
||||
from .funk import (
|
||||
FunkMixIE,
|
||||
FunkChannelIE,
|
||||
)
|
||||
from .funnyordie import FunnyOrDieIE
|
||||
from .fusion import FusionIE
|
||||
from .fxnetworks import FXNetworksIE
|
||||
@@ -429,6 +432,7 @@ from .hellporno import HellPornoIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .hgtv import HGTVComShowIE
|
||||
from .hidive import HiDiveIE
|
||||
from .historicfilms import HistoricFilmsIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
from .hitrecord import HitRecordIE
|
||||
@@ -528,13 +532,14 @@ from .lcp import (
|
||||
)
|
||||
from .learnr import LearnrIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .lego import LEGOIE
|
||||
from .lemonde import LemondeIE
|
||||
from .leeco import (
|
||||
LeIE,
|
||||
LePlaylistIE,
|
||||
LetvCloudIE,
|
||||
)
|
||||
from .lego import LEGOIE
|
||||
from .lemonde import LemondeIE
|
||||
from .lenta import LentaIE
|
||||
from .libraryofcongress import LibraryOfCongressIE
|
||||
from .libsyn import LibsynIE
|
||||
from .lifenews import (
|
||||
@@ -546,6 +551,7 @@ from .limelight import (
|
||||
LimelightChannelIE,
|
||||
LimelightChannelListIE,
|
||||
)
|
||||
from .line import LineTVIE
|
||||
from .litv import LiTVIE
|
||||
from .liveleak import (
|
||||
LiveLeakIE,
|
||||
@@ -566,7 +572,11 @@ from .lynda import (
|
||||
)
|
||||
from .m6 import M6IE
|
||||
from .macgamestore import MacGameStoreIE
|
||||
from .mailru import MailRuIE
|
||||
from .mailru import (
|
||||
MailRuIE,
|
||||
MailRuMusicIE,
|
||||
MailRuMusicSearchIE,
|
||||
)
|
||||
from .makerschannel import MakersChannelIE
|
||||
from .makertv import MakerTVIE
|
||||
from .mangomolo import (
|
||||
@@ -867,6 +877,7 @@ from .rai import (
|
||||
RaiPlayPlaylistIE,
|
||||
RaiIE,
|
||||
)
|
||||
from .raywenderlich import RayWenderlichIE
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .rds import RDSIE
|
||||
from .redbulltv import RedBullTVIE
|
||||
@@ -1020,6 +1031,7 @@ from .sunporno import SunPornoIE
|
||||
from .svt import (
|
||||
SVTIE,
|
||||
SVTPlayIE,
|
||||
SVTSeriesIE,
|
||||
)
|
||||
from .swrmediathek import SWRMediathekIE
|
||||
from .syfy import SyfyIE
|
||||
@@ -1045,9 +1057,14 @@ from .telebruxelles import TeleBruxellesIE
|
||||
from .telecinco import TelecincoIE
|
||||
from .telegraaf import TelegraafIE
|
||||
from .telemb import TeleMBIE
|
||||
from .telequebec import TeleQuebecIE
|
||||
from .telequebec import (
|
||||
TeleQuebecIE,
|
||||
TeleQuebecEmissionIE,
|
||||
TeleQuebecLiveIE,
|
||||
)
|
||||
from .teletask import TeleTaskIE
|
||||
from .telewebion import TelewebionIE
|
||||
from .tennistv import TennisTVIE
|
||||
from .testurl import TestURLIE
|
||||
from .tf1 import TF1IE
|
||||
from .tfo import TFOIE
|
||||
@@ -1120,6 +1137,7 @@ from .tvnoe import TVNoeIE
|
||||
from .tvnow import (
|
||||
TVNowIE,
|
||||
TVNowListIE,
|
||||
TVNowShowIE,
|
||||
)
|
||||
from .tvp import (
|
||||
TVPEmbedIE,
|
||||
@@ -1202,7 +1220,6 @@ from .vice import (
|
||||
ViceArticleIE,
|
||||
ViceShowIE,
|
||||
)
|
||||
from .viceland import VicelandIE
|
||||
from .vidbit import VidbitIE
|
||||
from .viddler import ViddlerIE
|
||||
from .videa import VideaIE
|
||||
@@ -1217,6 +1234,7 @@ from .videomore import (
|
||||
from .videopremium import VideoPremiumIE
|
||||
from .videopress import VideoPressIE
|
||||
from .vidio import VidioIE
|
||||
from .vidlii import VidLiiIE
|
||||
from .vidme import (
|
||||
VidmeIE,
|
||||
VidmeUserIE,
|
||||
@@ -1360,6 +1378,7 @@ from .yandexmusic import (
|
||||
YandexMusicPlaylistIE,
|
||||
)
|
||||
from .yandexdisk import YandexDiskIE
|
||||
from .yapfiles import YapFilesIE
|
||||
from .yesjapan import YesJapanIE
|
||||
from .yinyuetai import YinYueTaiIE
|
||||
from .ynet import YnetIE
|
||||
|
||||
@@ -1,43 +1,102 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .nexx import NexxIE
|
||||
from ..utils import extract_attributes
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class FunkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?funk\.net/(?:mix|channel)/(?:[^/]+/)*(?P<id>[^?/#]+)'
|
||||
class FunkBaseIE(InfoExtractor):
|
||||
def _make_url_result(self, video):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'nexx:741:%s' % video['sourceId'],
|
||||
'ie_key': NexxIE.ie_key(),
|
||||
'id': video['sourceId'],
|
||||
'title': video.get('title'),
|
||||
'description': video.get('description'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'season_number': int_or_none(video.get('seasonNr')),
|
||||
'episode_number': int_or_none(video.get('episodeNr')),
|
||||
}
|
||||
|
||||
|
||||
class FunkMixIE(FunkBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?funk\.net/mix/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/0/59d517e741dca10001252574/',
|
||||
'md5': '4d40974481fa3475f8bccfd20c5361f8',
|
||||
'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/die-realste-kifferdoku-aller-zeiten',
|
||||
'md5': '8edf617c2f2b7c9847dfda313f199009',
|
||||
'info_dict': {
|
||||
'id': '716599',
|
||||
'id': '123748',
|
||||
'ext': 'mp4',
|
||||
'title': 'Neue Rechte Welle',
|
||||
'description': 'md5:a30a53f740ffb6bfd535314c2cc5fb69',
|
||||
'timestamp': 1501337639,
|
||||
'upload_date': '20170729',
|
||||
'title': '"Die realste Kifferdoku aller Zeiten"',
|
||||
'description': 'md5:c97160f5bafa8d47ec8e2e461012aa9d',
|
||||
'timestamp': 1490274721,
|
||||
'upload_date': '20170323',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
mix_id = mobj.group('id')
|
||||
alias = mobj.group('alias')
|
||||
|
||||
lists = self._download_json(
|
||||
'https://www.funk.net/api/v3.1/curation/curatedLists/',
|
||||
mix_id, headers={
|
||||
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbC12Mi4wIiwic2NvcGUiOiJzdGF0aWMtY29udGVudC1hcGksY3VyYXRpb24tc2VydmljZSxzZWFyY2gtYXBpIn0.SGCC1IXHLtZYoo8PvRKlU2gXH1su8YSu47sB3S4iXBI',
|
||||
'Referer': url,
|
||||
}, query={
|
||||
'size': 100,
|
||||
})['result']['lists']
|
||||
|
||||
metas = next(
|
||||
l for l in lists
|
||||
if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas']
|
||||
video = next(
|
||||
meta['videoDataDelegate']
|
||||
for meta in metas if meta.get('alias') == alias)
|
||||
|
||||
return self._make_url_result(video)
|
||||
|
||||
|
||||
class FunkChannelIE(FunkBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?funk\.net/channel/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funk.net/channel/ba/die-lustigsten-instrumente-aus-dem-internet-teil-2',
|
||||
'info_dict': {
|
||||
'id': '1155821',
|
||||
'ext': 'mp4',
|
||||
'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2',
|
||||
'description': 'md5:a691d0413ef4835588c5b03ded670c1f',
|
||||
'timestamp': 1514507395,
|
||||
'upload_date': '20171229',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/0/59d52049999264000182e79d/',
|
||||
'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/mein-erster-job-lovemilla-folge-1/lovemilla/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
channel_id = mobj.group('id')
|
||||
alias = mobj.group('alias')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
results = self._download_json(
|
||||
'https://www.funk.net/api/v3.0/content/videos/filter', channel_id,
|
||||
headers={
|
||||
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbCIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxzZWFyY2gtYXBpIn0.q4Y2xZG8PFHai24-4Pjx2gym9RmJejtmK6lMXP5wAgc',
|
||||
'Referer': url,
|
||||
}, query={
|
||||
'channelId': channel_id,
|
||||
'size': 100,
|
||||
})['result']
|
||||
|
||||
domain_id = NexxIE._extract_domain_id(webpage) or '741'
|
||||
nexx_id = extract_attributes(self._search_regex(
|
||||
r'(<div[^>]id=["\']mediaplayer-funk[^>]+>)',
|
||||
webpage, 'media player'))['data-id']
|
||||
video = next(r for r in results if r.get('alias') == alias)
|
||||
|
||||
return self.url_result(
|
||||
'nexx:%s:%s' % (domain_id, nexx_id), ie=NexxIE.ie_key(),
|
||||
video_id=nexx_id)
|
||||
return self._make_url_result(video)
|
||||
|
||||
@@ -58,6 +58,7 @@ from .xhamster import XHamsterEmbedIE
|
||||
from .tnaflix import TNAFlixNetworkEmbedIE
|
||||
from .drtuber import DrTuberIE
|
||||
from .redtube import RedTubeIE
|
||||
from .tube8 import Tube8IE
|
||||
from .vimeo import VimeoIE
|
||||
from .dailymotion import DailymotionIE
|
||||
from .dailymail import DailyMailIE
|
||||
@@ -102,6 +103,9 @@ from .channel9 import Channel9IE
|
||||
from .vshare import VShareIE
|
||||
from .mediasite import MediasiteIE
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
from .yapfiles import YapFilesIE
|
||||
from .vice import ViceIE
|
||||
from .xfileshare import XFileShareIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -1267,24 +1271,6 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': ['Kaltura'],
|
||||
},
|
||||
# EaglePlatform embed (generic URL)
|
||||
{
|
||||
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
||||
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
|
||||
'info_dict': {
|
||||
'id': '227304',
|
||||
'ext': 'mp4',
|
||||
'title': 'Навальный вышел на свободу',
|
||||
'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 87,
|
||||
'view_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# referrer protected EaglePlatform embed
|
||||
{
|
||||
'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',
|
||||
@@ -1970,7 +1956,29 @@ class GenericIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
|
||||
'info_dict': {
|
||||
'id': 'vMDE4NzI1Mjgt690b',
|
||||
'ext': 'mp4',
|
||||
'title': 'Котята',
|
||||
},
|
||||
'add_ie': [YapFilesIE.ie_key()],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://share-videos.se/auto/video/83645793?uid=13',
|
||||
'md5': 'b68d276de422ab07ee1d49388103f457',
|
||||
'info_dict': {
|
||||
'id': '83645793',
|
||||
'title': 'Lock up and get excited',
|
||||
'ext': 'mp4'
|
||||
},
|
||||
'skip': 'TODO: fix nested playlists processing in tests',
|
||||
},
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
@@ -2217,7 +2225,11 @@ class GenericIE(InfoExtractor):
|
||||
self._sort_formats(smil['formats'])
|
||||
return smil
|
||||
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
|
||||
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
|
||||
return self.playlist_result(
|
||||
self._parse_xspf(
|
||||
doc, video_id, xspf_url=url,
|
||||
xspf_base_url=compat_str(full_response.geturl())),
|
||||
video_id)
|
||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||
info_dict['formats'] = self._parse_mpd_formats(
|
||||
doc,
|
||||
@@ -2545,6 +2557,11 @@ class GenericIE(InfoExtractor):
|
||||
if redtube_urls:
|
||||
return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
|
||||
|
||||
# Look for embedded Tube8 player
|
||||
tube8_urls = Tube8IE._extract_urls(webpage)
|
||||
if tube8_urls:
|
||||
return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
|
||||
|
||||
# Look for embedded Tvigle player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
|
||||
@@ -2947,6 +2964,28 @@ class GenericIE(InfoExtractor):
|
||||
springboardplatform_urls, video_id, video_title,
|
||||
ie=SpringboardPlatformIE.ie_key())
|
||||
|
||||
yapfiles_urls = YapFilesIE._extract_urls(webpage)
|
||||
if yapfiles_urls:
|
||||
return self.playlist_from_matches(
|
||||
yapfiles_urls, video_id, video_title, ie=YapFilesIE.ie_key())
|
||||
|
||||
vice_urls = ViceIE._extract_urls(webpage)
|
||||
if vice_urls:
|
||||
return self.playlist_from_matches(
|
||||
vice_urls, video_id, video_title, ie=ViceIE.ie_key())
|
||||
|
||||
xfileshare_urls = XFileShareIE._extract_urls(webpage)
|
||||
if xfileshare_urls:
|
||||
return self.playlist_from_matches(
|
||||
xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
|
||||
|
||||
sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
|
||||
r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
|
||||
webpage)]
|
||||
if sharevideos_urls:
|
||||
return self.playlist_from_matches(
|
||||
sharevideos_urls, video_id, video_title)
|
||||
|
||||
def merge_dicts(dict1, dict2):
|
||||
merged = {}
|
||||
for k, v in dict1.items():
|
||||
|
||||
@@ -2,11 +2,14 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
NO_DEFAULT,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
@@ -14,18 +17,19 @@ from ..utils import (
|
||||
class HeiseIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?heise\.de/(?:[^/]+/)+[^/]+-(?P<id>[0-9]+)\.html'
|
||||
_TESTS = [{
|
||||
# kaltura embed
|
||||
'url': 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html',
|
||||
'md5': 'ffed432483e922e88545ad9f2f15d30e',
|
||||
'info_dict': {
|
||||
'id': '2404147',
|
||||
'id': '1_kkrq94sm',
|
||||
'ext': 'mp4',
|
||||
'title': "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone",
|
||||
'format_id': 'mp4_720p',
|
||||
'timestamp': 1411812600,
|
||||
'upload_date': '20140927',
|
||||
'timestamp': 1512734959,
|
||||
'upload_date': '20171208',
|
||||
'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20',
|
||||
'thumbnail': r're:^https?://.*/gallery/$',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# YouTube embed
|
||||
'url': 'http://www.heise.de/newsticker/meldung/Netflix-In-20-Jahren-vom-Videoverleih-zum-TV-Revolutionaer-3814130.html',
|
||||
@@ -42,6 +46,32 @@ class HeiseIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.heise.de/video/artikel/nachgehakt-Wie-sichert-das-c-t-Tool-Restric-tor-Windows-10-ab-3700244.html',
|
||||
'info_dict': {
|
||||
'id': '1_ntrmio2s',
|
||||
'ext': 'mp4',
|
||||
'title': "nachgehakt: Wie sichert das c't-Tool Restric'tor Windows 10 ab?",
|
||||
'description': 'md5:47e8ffb6c46d85c92c310a512d6db271',
|
||||
'timestamp': 1512470717,
|
||||
'upload_date': '20171205',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.heise.de/ct/artikel/c-t-uplink-20-8-Staubsaugerroboter-Xiaomi-Vacuum-2-AR-Brille-Meta-2-und-Android-rooten-3959893.html',
|
||||
'info_dict': {
|
||||
'id': '1_59mk80sf',
|
||||
'ext': 'mp4',
|
||||
'title': "c't uplink 20.8: Staubsaugerroboter Xiaomi Vacuum 2, AR-Brille Meta 2 und Android rooten",
|
||||
'description': 'md5:f50fe044d3371ec73a8f79fcebd74afc',
|
||||
'timestamp': 1517567237,
|
||||
'upload_date': '20180202',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html',
|
||||
'only_matching': True,
|
||||
@@ -57,19 +87,45 @@ class HeiseIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_meta('fulltitle', webpage, default=None)
|
||||
if not title or title == "c't":
|
||||
title = self._search_regex(
|
||||
r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
|
||||
webpage, 'title')
|
||||
def extract_title(default=NO_DEFAULT):
|
||||
title = self._html_search_meta(
|
||||
('fulltitle', 'title'), webpage, default=None)
|
||||
if not title or title == "c't":
|
||||
title = self._search_regex(
|
||||
r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
|
||||
webpage, 'title', default=None)
|
||||
if not title:
|
||||
title = self._html_search_regex(
|
||||
r'<h1[^>]+\bclass=["\']article_page_title[^>]+>(.+?)<',
|
||||
webpage, 'title', default=default)
|
||||
return title
|
||||
|
||||
title = extract_title(default=None)
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'description', webpage)
|
||||
|
||||
kaltura_url = KalturaIE._extract_url(webpage)
|
||||
if kaltura_url:
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(kaltura_url, {'source_url': url}),
|
||||
'ie_key': KalturaIE.ie_key(),
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
||||
|
||||
yt_urls = YoutubeIE._extract_urls(webpage)
|
||||
if yt_urls:
|
||||
return self.playlist_from_matches(yt_urls, video_id, title, ie=YoutubeIE.ie_key())
|
||||
return self.playlist_from_matches(
|
||||
yt_urls, video_id, title, ie=YoutubeIE.ie_key())
|
||||
|
||||
title = extract_title()
|
||||
|
||||
container_id = self._search_regex(
|
||||
r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
|
||||
webpage, 'container ID')
|
||||
|
||||
sequenz_id = self._search_regex(
|
||||
r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"',
|
||||
webpage, 'sequenz ID')
|
||||
@@ -95,10 +151,6 @@ class HeiseIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'description', webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
|
||||
@@ -0,0 +1,96 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class HiDiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hidive\.com/stream/(?P<title>[^/]+)/(?P<key>[^/?#&]+)'
|
||||
# Using X-Forwarded-For results in 403 HTTP error for HLS fragments,
|
||||
# so disabling geo bypass completely
|
||||
_GEO_BYPASS = False
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hidive.com/stream/the-comic-artist-and-his-assistants/s01e001',
|
||||
'info_dict': {
|
||||
'id': 'the-comic-artist-and-his-assistants/s01e001',
|
||||
'ext': 'mp4',
|
||||
'title': 'the-comic-artist-and-his-assistants/s01e001',
|
||||
'series': 'the-comic-artist-and-his-assistants',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title, key = mobj.group('title', 'key')
|
||||
video_id = '%s/%s' % (title, key)
|
||||
|
||||
settings = self._download_json(
|
||||
'https://www.hidive.com/play/settings', video_id,
|
||||
data=urlencode_postdata({
|
||||
'Title': title,
|
||||
'Key': key,
|
||||
}))
|
||||
|
||||
restriction = settings.get('restrictionReason')
|
||||
if restriction == 'RegionRestricted':
|
||||
self.raise_geo_restricted()
|
||||
|
||||
if restriction and restriction != 'None':
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, restriction), expected=True)
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for rendition_id, rendition in settings['renditions'].items():
|
||||
bitrates = rendition.get('bitrates')
|
||||
if not isinstance(bitrates, dict):
|
||||
continue
|
||||
m3u8_url = bitrates.get('hls')
|
||||
if not isinstance(m3u8_url, compat_str):
|
||||
continue
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='%s-hls' % rendition_id, fatal=False))
|
||||
cc_files = rendition.get('ccFiles')
|
||||
if not isinstance(cc_files, list):
|
||||
continue
|
||||
for cc_file in cc_files:
|
||||
if not isinstance(cc_file, list) or len(cc_file) < 3:
|
||||
continue
|
||||
cc_lang = cc_file[0]
|
||||
cc_url = cc_file[2]
|
||||
if not isinstance(cc_lang, compat_str) or not isinstance(
|
||||
cc_url, compat_str):
|
||||
continue
|
||||
subtitles.setdefault(cc_lang, []).append({
|
||||
'url': cc_url,
|
||||
})
|
||||
|
||||
season_number = int_or_none(self._search_regex(
|
||||
r's(\d+)', key, 'season number', default=None))
|
||||
episode_number = int_or_none(self._search_regex(
|
||||
r'e(\d+)', key, 'episode number', default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
'series': title,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -238,36 +239,36 @@ class InstagramUserIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _entries(self, uploader_id):
|
||||
query = {
|
||||
'__a': 1,
|
||||
}
|
||||
|
||||
def get_count(kind):
|
||||
def get_count(suffix):
|
||||
return int_or_none(try_get(
|
||||
node, lambda x: x['%ss' % kind]['count']))
|
||||
node, lambda x: x['edge_media_' + suffix]['count']))
|
||||
|
||||
self._set_cookie('instagram.com', 'ig_pr', '1')
|
||||
|
||||
cursor = ''
|
||||
for page_num in itertools.count(1):
|
||||
page = self._download_json(
|
||||
'https://instagram.com/%s/' % uploader_id, uploader_id,
|
||||
note='Downloading page %d' % page_num,
|
||||
fatal=False, query=query)
|
||||
if not page:
|
||||
media = self._download_json(
|
||||
'https://www.instagram.com/graphql/query/', uploader_id,
|
||||
'Downloading JSON page %d' % page_num, query={
|
||||
'query_hash': '472f257a40c653c64c666ce877d59d2b',
|
||||
'variables': json.dumps({
|
||||
'id': uploader_id,
|
||||
'first': 100,
|
||||
'after': cursor,
|
||||
})
|
||||
})['data']['user']['edge_owner_to_timeline_media']
|
||||
|
||||
edges = media.get('edges')
|
||||
if not edges or not isinstance(edges, list):
|
||||
break
|
||||
|
||||
nodes = try_get(page, lambda x: x['user']['media']['nodes'], list)
|
||||
if not nodes:
|
||||
break
|
||||
|
||||
max_id = None
|
||||
|
||||
for node in nodes:
|
||||
node_id = node.get('id')
|
||||
if node_id:
|
||||
max_id = node_id
|
||||
|
||||
for edge in edges:
|
||||
node = edge.get('node')
|
||||
if not node or not isinstance(node, dict):
|
||||
continue
|
||||
if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True:
|
||||
continue
|
||||
video_id = node.get('code')
|
||||
video_id = node.get('shortcode')
|
||||
if not video_id:
|
||||
continue
|
||||
|
||||
@@ -276,14 +277,14 @@ class InstagramUserIE(InfoExtractor):
|
||||
ie=InstagramIE.ie_key(), video_id=video_id)
|
||||
|
||||
description = try_get(
|
||||
node, [lambda x: x['caption'], lambda x: x['text']['id']],
|
||||
node, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
|
||||
compat_str)
|
||||
thumbnail = node.get('thumbnail_src') or node.get('display_src')
|
||||
timestamp = int_or_none(node.get('date'))
|
||||
timestamp = int_or_none(node.get('taken_at_timestamp'))
|
||||
|
||||
comment_count = get_count('comment')
|
||||
like_count = get_count('like')
|
||||
view_count = int_or_none(node.get('video_views'))
|
||||
comment_count = get_count('to_comment')
|
||||
like_count = get_count('preview_like')
|
||||
view_count = int_or_none(node.get('video_view_count'))
|
||||
|
||||
info.update({
|
||||
'description': description,
|
||||
@@ -296,12 +297,23 @@ class InstagramUserIE(InfoExtractor):
|
||||
|
||||
yield info
|
||||
|
||||
if not max_id:
|
||||
page_info = media.get('page_info')
|
||||
if not page_info or not isinstance(page_info, dict):
|
||||
break
|
||||
|
||||
query['max_id'] = max_id
|
||||
has_next_page = page_info.get('has_next_page')
|
||||
if not has_next_page:
|
||||
break
|
||||
|
||||
cursor = page_info.get('end_cursor')
|
||||
if not cursor or not isinstance(cursor, compat_str):
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
uploader_id = self._match_id(url)
|
||||
username = self._match_id(url)
|
||||
uploader_id = self._download_json(
|
||||
'https://instagram.com/%s/' % username, username, query={
|
||||
'__a': 1,
|
||||
})['graphql']['user']['id']
|
||||
return self.playlist_result(
|
||||
self._entries(uploader_id), uploader_id, uploader_id)
|
||||
self._entries(uploader_id), username, username)
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class LentaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lenta\.ru/[^/]+/\d+/\d+/\d+/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://lenta.ru/news/2018/03/22/savshenko_go/',
|
||||
'info_dict': {
|
||||
'id': '964400',
|
||||
'ext': 'mp4',
|
||||
'title': 'Надежду Савченко задержали',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 61,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# EaglePlatform iframe embed
|
||||
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
||||
'info_dict': {
|
||||
'id': '227304',
|
||||
'ext': 'mp4',
|
||||
'title': 'Навальный вышел на свободу',
|
||||
'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 87,
|
||||
'view_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'vid\s*:\s*["\']?(\d+)', webpage, 'eagleplatform id',
|
||||
default=None)
|
||||
if video_id:
|
||||
return self.url_result(
|
||||
'eagleplatform:lentaru.media.eagleplatform.com:%s' % video_id,
|
||||
ie='EaglePlatform', video_id=video_id)
|
||||
|
||||
return self.url_result(url, ie='Generic')
|
||||
@@ -1,24 +1,28 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class LibsynIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<mainurl>https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+))'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/',
|
||||
'md5': '443360ee1b58007bc3dcf09b41d093bb',
|
||||
'url': 'http://html5-player.libsyn.com/embed/episode/id/6385796/',
|
||||
'md5': '2a55e75496c790cdeb058e7e6c087746',
|
||||
'info_dict': {
|
||||
'id': '3377616',
|
||||
'id': '6385796',
|
||||
'ext': 'mp3',
|
||||
'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
|
||||
'description': 'md5:601cb790edd05908957dae8aaa866465',
|
||||
'upload_date': '20150220',
|
||||
'title': "Champion Minded - Developing a Growth Mindset",
|
||||
'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.',
|
||||
'upload_date': '20180320',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
},
|
||||
}, {
|
||||
@@ -39,31 +43,45 @@ class LibsynIE(InfoExtractor):
|
||||
url = m.group('mainurl')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats = [{
|
||||
'url': media_url,
|
||||
} for media_url in set(re.findall(r'var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))]
|
||||
|
||||
podcast_title = self._search_regex(
|
||||
r'<h2>([^<]+)</h2>', webpage, 'podcast title', default=None)
|
||||
r'<h3>([^<]+)</h3>', webpage, 'podcast title', default=None)
|
||||
if podcast_title:
|
||||
podcast_title = podcast_title.strip()
|
||||
episode_title = self._search_regex(
|
||||
r'(?:<div class="episode-title">|<h3>)([^<]+)</', webpage, 'episode title')
|
||||
r'(?:<div class="episode-title">|<h4>)([^<]+)</', webpage, 'episode title')
|
||||
if episode_title:
|
||||
episode_title = episode_title.strip()
|
||||
|
||||
title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'<div id="info_text_body">(.+?)</div>', webpage,
|
||||
r'<p\s+id="info_text_body">(.+?)</p>', webpage,
|
||||
'description', default=None)
|
||||
thumbnail = self._search_regex(
|
||||
r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
if description:
|
||||
# Strip non-breaking and normal spaces
|
||||
description = description.replace('\u00A0', ' ').strip()
|
||||
release_date = unified_strdate(self._search_regex(
|
||||
r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
|
||||
|
||||
data_json = self._search_regex(r'var\s+playlistItem\s*=\s*(\{.*?\});\n', webpage, 'JSON data block')
|
||||
data = json.loads(data_json)
|
||||
|
||||
formats = [{
|
||||
'url': data['media_url'],
|
||||
'format_id': 'main',
|
||||
}, {
|
||||
'url': data['media_url_libsyn'],
|
||||
'format_id': 'libsyn',
|
||||
}]
|
||||
thumbnail = data.get('thumbnail_url')
|
||||
duration = parse_duration(data.get('duration'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': release_date,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -0,0 +1,90 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
|
||||
|
||||
class LineTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv\.line\.me/v/(?P<id>\d+)_[^/]+-(?P<segment>ep\d+-\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.line.me/v/793123_goodbye-mrblack-ep1-1/list/69246',
|
||||
'info_dict': {
|
||||
'id': '793123_ep1-1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Goodbye Mr.Black | EP.1-1',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 998.509,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.line.me/v/2587507_%E6%B4%BE%E9%81%A3%E5%A5%B3%E9%86%ABx-ep1-02/list/185245',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id, segment = re.match(self._VALID_URL, url).groups()
|
||||
video_id = '%s_%s' % (series_id, segment)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_params = self._parse_json(self._search_regex(
|
||||
r'naver\.WebPlayer\(({[^}]+})\)', webpage, 'player parameters'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
video_info = self._download_json(
|
||||
'https://global-nvapis.line.me/linetv/rmcnmv/vod_play_videoInfo.json',
|
||||
video_id, query={
|
||||
'videoId': player_params['videoId'],
|
||||
'key': player_params['key'],
|
||||
})
|
||||
|
||||
stream = video_info['streams'][0]
|
||||
extra_query = '?__gda__=' + stream['key']['value']
|
||||
formats = self._extract_m3u8_formats(
|
||||
stream['source'] + extra_query, video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
|
||||
for a_format in formats:
|
||||
a_format['url'] += extra_query
|
||||
|
||||
duration = None
|
||||
for video in video_info.get('videos', {}).get('list', []):
|
||||
encoding_option = video.get('encodingOption', {})
|
||||
abr = video['bitrate']['audio']
|
||||
vbr = video['bitrate']['video']
|
||||
tbr = abr + vbr
|
||||
formats.append({
|
||||
'url': video['source'],
|
||||
'format_id': 'http-%d' % int(tbr),
|
||||
'height': encoding_option.get('height'),
|
||||
'width': encoding_option.get('width'),
|
||||
'abr': abr,
|
||||
'vbr': vbr,
|
||||
'filesize': video.get('size'),
|
||||
})
|
||||
if video.get('duration') and duration is None:
|
||||
duration = video['duration']
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
if not formats[0].get('width'):
|
||||
formats[0]['vcodec'] = 'none'
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
# like_count requires an additional API request https://tv.line.me/api/likeit/getCount
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'extra_param_to_segment_url': extra_query[1:],
|
||||
'duration': duration,
|
||||
'thumbnails': [{'url': thumbnail['source']}
|
||||
for thumbnail in video_info.get('thumbnails', {}).get('list', [])],
|
||||
'view_count': video_info.get('meta', {}).get('count'),
|
||||
}
|
||||
@@ -7,7 +7,7 @@ from ..utils import int_or_none
|
||||
|
||||
|
||||
class LiveLeakIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<id>[\w_]+)(?:.*)'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?.*?\b[it]=(?P<id>[\w_]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.liveleak.com/view?i=757_1364311680',
|
||||
'md5': '0813c2430bea7a46bf13acf3406992f4',
|
||||
@@ -79,6 +79,9 @@ class LiveLeakIE(InfoExtractor):
|
||||
'title': 'Fuel Depot in China Explosion caught on video',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://www.liveleak.com/view?t=HvHi_1523016227',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
|
||||
@@ -1,12 +1,17 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
remove_end,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -157,3 +162,153 @@ class MailRuIE(InfoExtractor):
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class MailRuMusicSearchBaseIE(InfoExtractor):
|
||||
def _search(self, query, url, audio_id, limit=100, offset=0):
|
||||
search = self._download_json(
|
||||
'https://my.mail.ru/cgi-bin/my/ajax', audio_id,
|
||||
'Downloading songs JSON page %d' % (offset // limit + 1),
|
||||
headers={
|
||||
'Referer': url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}, query={
|
||||
'xemail': '',
|
||||
'ajax_call': '1',
|
||||
'func_name': 'music.search',
|
||||
'mna': '',
|
||||
'mnb': '',
|
||||
'arg_query': query,
|
||||
'arg_extended': '1',
|
||||
'arg_search_params': json.dumps({
|
||||
'music': {
|
||||
'limit': limit,
|
||||
'offset': offset,
|
||||
},
|
||||
}),
|
||||
'arg_limit': limit,
|
||||
'arg_offset': offset,
|
||||
})
|
||||
return next(e for e in search if isinstance(e, dict))
|
||||
|
||||
@staticmethod
|
||||
def _extract_track(t, fatal=True):
|
||||
audio_url = t['URL'] if fatal else t.get('URL')
|
||||
if not audio_url:
|
||||
return
|
||||
|
||||
audio_id = t['File'] if fatal else t.get('File')
|
||||
if not audio_id:
|
||||
return
|
||||
|
||||
thumbnail = t.get('AlbumCoverURL') or t.get('FiledAlbumCover')
|
||||
uploader = t.get('OwnerName') or t.get('OwnerName_Text_HTML')
|
||||
uploader_id = t.get('UploaderID')
|
||||
duration = int_or_none(t.get('DurationInSeconds')) or parse_duration(
|
||||
t.get('Duration') or t.get('DurationStr'))
|
||||
view_count = int_or_none(t.get('PlayCount') or t.get('PlayCount_hr'))
|
||||
|
||||
track = t.get('Name') or t.get('Name_Text_HTML')
|
||||
artist = t.get('Author') or t.get('Author_Text_HTML')
|
||||
|
||||
if track:
|
||||
title = '%s - %s' % (artist, track) if artist else track
|
||||
else:
|
||||
title = audio_id
|
||||
|
||||
return {
|
||||
'extractor_key': MailRuMusicIE.ie_key(),
|
||||
'id': audio_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'vcodec': 'none',
|
||||
'abr': int_or_none(t.get('BitRate')),
|
||||
'track': track,
|
||||
'artist': artist,
|
||||
'album': t.get('Album'),
|
||||
'url': audio_url,
|
||||
}
|
||||
|
||||
|
||||
class MailRuMusicIE(MailRuMusicSearchBaseIE):
|
||||
IE_NAME = 'mailru:music'
|
||||
IE_DESC = 'Музыка@Mail.Ru'
|
||||
_VALID_URL = r'https?://my\.mail\.ru/music/songs/[^/?#&]+-(?P<id>[\da-f]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://my.mail.ru/music/songs/%D0%BC8%D0%BB8%D1%82%D1%85-l-a-h-luciferian-aesthetics-of-herrschaft-single-2017-4e31f7125d0dfaef505d947642366893',
|
||||
'md5': '0f8c22ef8c5d665b13ac709e63025610',
|
||||
'info_dict': {
|
||||
'id': '4e31f7125d0dfaef505d947642366893',
|
||||
'ext': 'mp3',
|
||||
'title': 'L.A.H. (Luciferian Aesthetics of Herrschaft) single, 2017 - М8Л8ТХ',
|
||||
'uploader': 'Игорь Мудрый',
|
||||
'uploader_id': '1459196328',
|
||||
'duration': 280,
|
||||
'view_count': int,
|
||||
'vcodec': 'none',
|
||||
'abr': 320,
|
||||
'track': 'L.A.H. (Luciferian Aesthetics of Herrschaft) single, 2017',
|
||||
'artist': 'М8Л8ТХ',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, audio_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
music_data = self._search(title, url, audio_id)['MusicData']
|
||||
t = next(t for t in music_data if t.get('File') == audio_id)
|
||||
|
||||
info = self._extract_track(t)
|
||||
info['title'] = title
|
||||
return info
|
||||
|
||||
|
||||
class MailRuMusicSearchIE(MailRuMusicSearchBaseIE):
|
||||
IE_NAME = 'mailru:music:search'
|
||||
IE_DESC = 'Музыка@Mail.Ru'
|
||||
_VALID_URL = r'https?://my\.mail\.ru/music/search/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://my.mail.ru/music/search/black%20shadow',
|
||||
'info_dict': {
|
||||
'id': 'black shadow',
|
||||
},
|
||||
'playlist_mincount': 532,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
query = compat_urllib_parse_unquote(self._match_id(url))
|
||||
|
||||
entries = []
|
||||
|
||||
LIMIT = 100
|
||||
offset = 0
|
||||
|
||||
for _ in itertools.count(1):
|
||||
search = self._search(query, url, query, LIMIT, offset)
|
||||
|
||||
music_data = search.get('MusicData')
|
||||
if not music_data or not isinstance(music_data, list):
|
||||
break
|
||||
|
||||
for t in music_data:
|
||||
track = self._extract_track(t, fatal=False)
|
||||
if track:
|
||||
entries.append(track)
|
||||
|
||||
total = try_get(
|
||||
search, lambda x: x['Results']['music']['Total'], int)
|
||||
|
||||
if total is not None:
|
||||
if offset > total:
|
||||
break
|
||||
|
||||
offset += LIMIT
|
||||
|
||||
return self.playlist_result(entries, query)
|
||||
|
||||
@@ -141,6 +141,7 @@ class MedialaanIE(GigyaBaseIE):
|
||||
|
||||
vod_id = config.get('vodId') or self._search_regex(
|
||||
(r'\\"vodId\\"\s*:\s*\\"(.+?)\\"',
|
||||
r'"vodId"\s*:\s*"(.+?)"',
|
||||
r'<[^>]+id=["\']vod-(\d+)'),
|
||||
webpage, 'video_id', default=None)
|
||||
|
||||
|
||||
@@ -68,11 +68,11 @@ class NationalGeographicVideoIE(InfoExtractor):
|
||||
|
||||
class NationalGeographicIE(ThePlatformIE, AdobePassIE):
|
||||
IE_NAME = 'natgeo'
|
||||
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:wild/)?[^/]+/)?(?:videos|episodes)/(?P<id>[^/?]+)'
|
||||
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:(?:wild/)?[^/]+/)?(?:videos|episodes)|u)/(?P<id>[^/?]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/',
|
||||
'url': 'http://channel.nationalgeographic.com/u/kdi9Ld0PN2molUUIMSBGxoeDhD729KRjQcnxtetilWPMevo8ZwUBIDuPR0Q3D2LVaTsk0MPRkRWDB8ZhqWVeyoxfsZZm36yRp1j-zPfsHEyI_EgAeFY/',
|
||||
'md5': '518c9aa655686cf81493af5cc21e2a04',
|
||||
'info_dict': {
|
||||
'id': 'vKInpacll2pC',
|
||||
@@ -86,7 +86,7 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE):
|
||||
'add_ie': ['ThePlatform'],
|
||||
},
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/',
|
||||
'url': 'http://channel.nationalgeographic.com/u/kdvOstqYaBY-vSBPyYgAZRUL4sWUJ5XUUPEhc7ISyBHqoIO4_dzfY3K6EjHIC0hmFXoQ7Cpzm6RkET7S3oMlm6CFnrQwSUwo/',
|
||||
'md5': 'c4912f656b4cbe58f3e000c489360989',
|
||||
'info_dict': {
|
||||
'id': 'Pok5lWCkiEFA',
|
||||
@@ -106,6 +106,14 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE):
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/videos/treasures-rediscovered/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@@ -43,9 +41,14 @@ class NaverIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m_id = re.search(r'var rmcPlayer = new nhn\.rmcnmv\.RMCVideoPlayer\("(.+?)", "(.+?)"',
|
||||
webpage)
|
||||
if m_id is None:
|
||||
vid = self._search_regex(
|
||||
r'videoId["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||
'video id', fatal=None, group='value')
|
||||
in_key = self._search_regex(
|
||||
r'inKey["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||
'key', default=None, group='value')
|
||||
|
||||
if not vid or not in_key:
|
||||
error = self._html_search_regex(
|
||||
r'(?s)<div class="(?:nation_error|nation_box|error_box)">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',
|
||||
webpage, 'error', default=None)
|
||||
@@ -53,9 +56,9 @@ class NaverIE(InfoExtractor):
|
||||
raise ExtractorError(error, expected=True)
|
||||
raise ExtractorError('couldn\'t extract vid and key')
|
||||
video_data = self._download_json(
|
||||
'http://play.rmcnmv.naver.com/vod/play/v2.0/' + m_id.group(1),
|
||||
'http://play.rmcnmv.naver.com/vod/play/v2.0/' + vid,
|
||||
video_id, query={
|
||||
'key': m_id.group(2),
|
||||
'key': in_key,
|
||||
})
|
||||
meta = video_data['meta']
|
||||
title = meta['subject']
|
||||
|
||||
@@ -1,22 +1,27 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class NexxIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
https?://api\.nexx(?:\.cloud|cdn\.com)/v3/\d+/videos/byid/|
|
||||
nexx:(?:\d+:)?|
|
||||
https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/|
|
||||
nexx:(?:(?P<domain_id_s>\d+):)?|
|
||||
https?://arc\.nexx\.cloud/api/video/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
@@ -57,6 +62,21 @@ class NexxIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# does not work via arc
|
||||
'url': 'nexx:741:1269984',
|
||||
'md5': 'c714b5b238b2958dc8d5642addba6886',
|
||||
'info_dict': {
|
||||
'id': '1269984',
|
||||
'ext': 'mp4',
|
||||
'title': '1 TAG ohne KLO... wortwörtlich! 😑',
|
||||
'alt_title': '1 TAG ohne KLO... wortwörtlich! 😑',
|
||||
'description': 'md5:4604539793c49eda9443ab5c5b1d612f',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 607,
|
||||
'timestamp': 1518614955,
|
||||
'upload_date': '20180214',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
|
||||
'only_matching': True,
|
||||
@@ -103,12 +123,99 @@ class NexxIE(InfoExtractor):
|
||||
def _extract_url(webpage):
|
||||
return NexxIE._extract_urls(webpage)[0]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
def _handle_error(self, response):
|
||||
status = int_or_none(try_get(
|
||||
response, lambda x: x['metadata']['status']) or 200)
|
||||
if 200 <= status < 300:
|
||||
return
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, response['metadata']['errorhint']),
|
||||
expected=True)
|
||||
|
||||
video = self._download_json(
|
||||
def _call_api(self, domain_id, path, video_id, data=None, headers={}):
|
||||
headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
|
||||
result = self._download_json(
|
||||
'https://api.nexx.cloud/v3/%s/%s' % (domain_id, path), video_id,
|
||||
'Downloading %s JSON' % path, data=urlencode_postdata(data),
|
||||
headers=headers)
|
||||
self._handle_error(result)
|
||||
return result['result']
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video = None
|
||||
|
||||
response = self._download_json(
|
||||
'https://arc.nexx.cloud/api/video/%s.json' % video_id,
|
||||
video_id)['result']
|
||||
video_id, fatal=False)
|
||||
if response and isinstance(response, dict):
|
||||
result = response.get('result')
|
||||
if result and isinstance(result, dict):
|
||||
video = result
|
||||
|
||||
# not all videos work via arc, e.g. nexx:741:1269984
|
||||
if not video:
|
||||
# Reverse engineered from JS code (see getDeviceID function)
|
||||
device_id = '%d:%d:%d%d' % (
|
||||
random.randint(1, 4), int(time.time()),
|
||||
random.randint(1e4, 99999), random.randint(1, 9))
|
||||
|
||||
result = self._call_api(domain_id, 'session/init', video_id, data={
|
||||
'nxp_devh': device_id,
|
||||
'nxp_userh': '',
|
||||
'precid': '0',
|
||||
'playlicense': '0',
|
||||
'screenx': '1920',
|
||||
'screeny': '1080',
|
||||
'playerversion': '6.0.00',
|
||||
'gateway': 'html5',
|
||||
'adGateway': '',
|
||||
'explicitlanguage': 'en-US',
|
||||
'addTextTemplates': '1',
|
||||
'addDomainData': '1',
|
||||
'addAdModel': '1',
|
||||
}, headers={
|
||||
'X-Request-Enable-Auth-Fallback': '1',
|
||||
})
|
||||
|
||||
cid = result['general']['cid']
|
||||
|
||||
# As described in [1] X-Request-Token generation algorithm is
|
||||
# as follows:
|
||||
# md5( operation + domain_id + domain_secret )
|
||||
# where domain_secret is a static value that will be given by nexx.tv
|
||||
# as per [1]. Here is how this "secret" is generated (reversed
|
||||
# from _play.api.init function, search for clienttoken). So it's
|
||||
# actually not static and not that much of a secret.
|
||||
# 1. https://nexxtvstorage.blob.core.windows.net/files/201610/27.pdf
|
||||
secret = result['device']['clienttoken'][int(device_id[0]):]
|
||||
secret = secret[0:len(secret) - int(device_id[-1])]
|
||||
|
||||
op = 'byid'
|
||||
|
||||
# Reversed from JS code for _play.api.call function (search for
|
||||
# X-Request-Token)
|
||||
request_token = hashlib.md5(
|
||||
''.join((op, domain_id, secret)).encode('utf-8')).hexdigest()
|
||||
|
||||
video = self._call_api(
|
||||
domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={
|
||||
'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description',
|
||||
'addInteractionOptions': '1',
|
||||
'addStatusDetails': '1',
|
||||
'addStreamDetails': '1',
|
||||
'addCaptions': '1',
|
||||
'addScenes': '1',
|
||||
'addHotSpots': '1',
|
||||
'addBumpers': '1',
|
||||
'captionFormat': 'data',
|
||||
}, headers={
|
||||
'X-Request-CID': cid,
|
||||
'X-Request-Token': request_token,
|
||||
})
|
||||
|
||||
general = video['general']
|
||||
title = general['title']
|
||||
|
||||
@@ -198,7 +198,7 @@ class NickNightIE(NickDeIE):
|
||||
|
||||
class NickRuIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'nickelodeonru'
|
||||
_VALID_URL = r'https?://(?:www\.)nickelodeon\.(?:ru|fr|es|pt|ro|hu)/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)nickelodeon\.(?:ru|fr|es|pt|ro|hu|com\.tr)/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nickelodeon.ru/shows/henrydanger/videos/episodes/3-sezon-15-seriya-licenziya-na-polyot/pmomfb#playlist/7airc6',
|
||||
'only_matching': True,
|
||||
@@ -220,6 +220,9 @@ class NickRuIE(MTVServicesInfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.nickelodeon.hu/musorok/spongyabob-kockanadrag/videok/episodes/buborekfujas-az-elszakadt-nadrag/q57iob#playlist/k6te4y',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.nickelodeon.com.tr/programlar/sunger-bob/videolar/kayip-yatak/mgqbjy',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -4,15 +4,17 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
ExtractorError,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class NineNowIE(InfoExtractor):
|
||||
IE_NAME = '9now.com.au'
|
||||
_VALID_URL = r'https?://(?:www\.)?9now\.com\.au/(?:[^/]+/){2}(?P<id>[^/?#]+)'
|
||||
_GEO_COUNTRIES = ['AU']
|
||||
_TESTS = [{
|
||||
# clip
|
||||
'url': 'https://www.9now.com.au/afl-footy-show/2016/clip-ciql02091000g0hp5oktrnytc',
|
||||
@@ -75,7 +77,9 @@ class NineNowIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
'url': smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
{'geo_countries': self._GEO_COUNTRIES}),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': common_data.get('description'),
|
||||
|
||||
@@ -43,7 +43,8 @@ class NJPWWorldIE(InfoExtractor):
|
||||
webpage, urlh = self._download_webpage_handle(
|
||||
'https://njpwworld.com/auth/login', None,
|
||||
note='Logging in', errnote='Unable to login',
|
||||
data=urlencode_postdata({'login_id': username, 'pw': password}))
|
||||
data=urlencode_postdata({'login_id': username, 'pw': password}),
|
||||
headers={'Referer': 'https://njpwworld.com/auth'})
|
||||
# /auth/login will return 302 for successful logins
|
||||
if urlh.geturl() == 'https://njpwworld.com/auth/login':
|
||||
self.report_warning('unable to login')
|
||||
|
||||
@@ -195,6 +195,10 @@ class NPOIE(NPOBaseIE):
|
||||
formats = []
|
||||
urls = set()
|
||||
|
||||
def is_legal_url(format_url):
|
||||
return format_url and format_url not in urls and re.match(
|
||||
r'^(?:https?:)?//', format_url)
|
||||
|
||||
QUALITY_LABELS = ('Laag', 'Normaal', 'Hoog')
|
||||
QUALITY_FORMATS = ('adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std')
|
||||
|
||||
@@ -208,7 +212,7 @@ class NPOIE(NPOBaseIE):
|
||||
})['items'][0]
|
||||
for num, item in enumerate(items):
|
||||
item_url = item.get('url')
|
||||
if not item_url or item_url in urls:
|
||||
if not is_legal_url(item_url):
|
||||
continue
|
||||
urls.add(item_url)
|
||||
format_id = self._search_regex(
|
||||
@@ -229,7 +233,7 @@ class NPOIE(NPOBaseIE):
|
||||
quality = quality_from_format_id(format_id)
|
||||
f_id = format_id
|
||||
else:
|
||||
quality, f_id = None
|
||||
quality, f_id = [None] * 2
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': f_id,
|
||||
@@ -279,7 +283,7 @@ class NPOIE(NPOBaseIE):
|
||||
if not is_live:
|
||||
for num, stream in enumerate(metadata.get('streams', [])):
|
||||
stream_url = stream.get('url')
|
||||
if not stream_url or stream_url in urls:
|
||||
if not is_legal_url(stream_url):
|
||||
continue
|
||||
urls.add(stream_url)
|
||||
# smooth streaming is not supported
|
||||
|
||||
@@ -19,7 +19,18 @@ from ..utils import (
|
||||
|
||||
|
||||
class OdnoklassnikiIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|m|mobile)\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer|live)/(?P<id>[\d-]+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:(?:www|m|mobile)\.)?
|
||||
(?:odnoklassniki|ok)\.ru/
|
||||
(?:
|
||||
video(?:embed)?/|
|
||||
web-api/video/moviePlayer/|
|
||||
live/|
|
||||
dk\?.*?st\.mvId=
|
||||
)
|
||||
(?P<id>[\d-]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
# metadata in JSON
|
||||
'url': 'http://ok.ru/video/20079905452',
|
||||
@@ -101,6 +112,9 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.ok.ru/live/484531969818',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -243,7 +243,7 @@ class PhantomJSwrapper(object):
|
||||
|
||||
|
||||
class OpenloadIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://openload.co/f/kUEfGclsU9o',
|
||||
@@ -298,6 +298,9 @@ class OpenloadIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://oload.stream/f/KnG-kKZdcfY',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://oload.xyz/f/WwRBpzW8Wtk',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
|
||||
@@ -334,10 +337,11 @@ class OpenloadIE(InfoExtractor):
|
||||
|
||||
decoded_id = (get_element_by_id('streamurl', webpage) or
|
||||
get_element_by_id('streamuri', webpage) or
|
||||
get_element_by_id('streamurj', webpage))
|
||||
|
||||
if not decoded_id:
|
||||
raise ExtractorError('Can\'t find stream URL', video_id=video_id)
|
||||
get_element_by_id('streamurj', webpage) or
|
||||
self._search_regex(
|
||||
(r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<',
|
||||
r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)'), webpage,
|
||||
'stream URL'))
|
||||
|
||||
video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id
|
||||
|
||||
|
||||
@@ -115,12 +115,13 @@ class PornHubIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
self._set_cookie('pornhub.com', 'age_verified', '1')
|
||||
|
||||
def dl_webpage(platform):
|
||||
self._set_cookie('pornhub.com', 'platform', platform)
|
||||
return self._download_webpage(
|
||||
'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id,
|
||||
video_id, headers={
|
||||
'Cookie': 'age_verified=1; platform=%s' % platform,
|
||||
})
|
||||
video_id)
|
||||
|
||||
webpage = dl_webpage('pc')
|
||||
|
||||
|
||||
@@ -133,7 +133,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||
(?:
|
||||
prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia
|
||||
)\.(?:de|at|ch)|
|
||||
ran\.de|fem\.com|advopedia\.de
|
||||
ran\.de|fem\.com|advopedia\.de|galileo\.tv/video
|
||||
)
|
||||
/(?P<id>.+)
|
||||
'''
|
||||
@@ -326,6 +326,11 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||
'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# geo restricted to Germany
|
||||
'url': 'https://www.galileo.tv/video/diese-emojis-werden-oft-missverstanden',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel',
|
||||
'only_matching': True,
|
||||
@@ -343,7 +348,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||
r'"clip_id"\s*:\s+"(\d+)"',
|
||||
r'clipid: "(\d+)"',
|
||||
r'clip[iI]d=(\d+)',
|
||||
r'clip[iI]d\s*=\s*["\'](\d+)',
|
||||
r'clip[iI][dD]\s*=\s*["\'](\d+)',
|
||||
r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
|
||||
r'proMamsId"\s*:\s*"(\d+)',
|
||||
r'proMamsId"\s*:\s*"(\d+)',
|
||||
|
||||
@@ -0,0 +1,102 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class RayWenderlichIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://videos\.raywenderlich\.com/courses/(?P<course_id>[^/]+)/lessons/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
|
||||
'info_dict': {
|
||||
'id': '248377018',
|
||||
'ext': 'mp4',
|
||||
'title': 'Testing In iOS Episode 1: Introduction',
|
||||
'duration': 133,
|
||||
'uploader': 'Ray Wenderlich',
|
||||
'uploader_id': 'user3304672',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [VimeoIE.ie_key()],
|
||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||
}, {
|
||||
'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
|
||||
'info_dict': {
|
||||
'title': 'Testing in iOS',
|
||||
'id': '105-testing-in-ios',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': False,
|
||||
},
|
||||
'playlist_count': 29,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
course_id, lesson_id = mobj.group('course_id', 'id')
|
||||
video_id = '%s/%s' % (course_id, lesson_id)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
no_playlist = self._downloader.params.get('noplaylist')
|
||||
if no_playlist or smuggled_data.get('force_video', False):
|
||||
if no_playlist:
|
||||
self.to_screen(
|
||||
'Downloading just video %s because of --no-playlist'
|
||||
% video_id)
|
||||
if '>Subscribe to unlock' in webpage:
|
||||
raise ExtractorError(
|
||||
'This content is only available for subscribers',
|
||||
expected=True)
|
||||
vimeo_id = self._search_regex(
|
||||
r'data-vimeo-id=["\'](\d+)', webpage, 'video id')
|
||||
return self.url_result(
|
||||
VimeoIE._smuggle_referrer(
|
||||
'https://player.vimeo.com/video/%s' % vimeo_id, url),
|
||||
ie=VimeoIE.ie_key(), video_id=vimeo_id)
|
||||
|
||||
self.to_screen(
|
||||
'Downloading playlist %s - add --no-playlist to just download video'
|
||||
% course_id)
|
||||
|
||||
lesson_ids = set((lesson_id, ))
|
||||
for lesson in re.findall(
|
||||
r'(<a[^>]+\bclass=["\']lesson-link[^>]+>)', webpage):
|
||||
attrs = extract_attributes(lesson)
|
||||
if not attrs:
|
||||
continue
|
||||
lesson_url = attrs.get('href')
|
||||
if not lesson_url:
|
||||
continue
|
||||
lesson_id = self._search_regex(
|
||||
r'/lessons/(\d+)', lesson_url, 'lesson id', default=None)
|
||||
if not lesson_id:
|
||||
continue
|
||||
lesson_ids.add(lesson_id)
|
||||
|
||||
entries = []
|
||||
for lesson_id in sorted(lesson_ids):
|
||||
entries.append(self.url_result(
|
||||
smuggle_url(urljoin(url, lesson_id), {'force_video': True}),
|
||||
ie=RayWenderlichIE.ie_key()))
|
||||
|
||||
title = self._search_regex(
|
||||
r'class=["\']course-title[^>]+>([^<]+)', webpage, 'course title',
|
||||
default=None)
|
||||
|
||||
return self.playlist_result(entries, course_id, title)
|
||||
@@ -53,6 +53,12 @@ class RuutuIE(InfoExtractor):
|
||||
'age_limit': 0,
|
||||
},
|
||||
},
|
||||
# Episode where <SourceFile> is "NOT-USED", but has other
|
||||
# downloadable sources available.
|
||||
{
|
||||
'url': 'http://www.ruutu.fi/video/3193728',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -72,7 +78,7 @@ class RuutuIE(InfoExtractor):
|
||||
video_url = child.text
|
||||
if (not video_url or video_url in processed_urls or
|
||||
any(p in video_url for p in ('NOT_USED', 'NOT-USED'))):
|
||||
return
|
||||
continue
|
||||
processed_urls.append(video_url)
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
|
||||
@@ -4,22 +4,30 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from ..utils import update_url_query
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
try_get,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class SevenPlusIE(BrightcoveNewIE):
|
||||
IE_NAME = '7plus'
|
||||
_VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P<path>[^?]+\?.*?\bepisode-id=(?P<id>[^&#]+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://7plus.com.au/BEAT?episode-id=BEAT-001',
|
||||
'url': 'https://7plus.com.au/MTYS?episode-id=MTYS7-003',
|
||||
'info_dict': {
|
||||
'id': 'BEAT-001',
|
||||
'id': 'MTYS7-003',
|
||||
'ext': 'mp4',
|
||||
'title': 'S1 E1 - Help / Lucy In The Sky With Diamonds',
|
||||
'description': 'md5:37718bea20a8eedaca7f7361af566131',
|
||||
'title': 'S7 E3 - Wind Surf',
|
||||
'description': 'md5:29c6a69f21accda7601278f81b46483d',
|
||||
'uploader_id': '5303576322001',
|
||||
'upload_date': '20171031',
|
||||
'timestamp': 1509440068,
|
||||
'upload_date': '20171201',
|
||||
'timestamp': 1512106377,
|
||||
'series': 'Mighty Ships',
|
||||
'season_number': 7,
|
||||
'episode_number': 3,
|
||||
'episode': 'Wind Surf',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
@@ -63,5 +71,14 @@ class SevenPlusIE(BrightcoveNewIE):
|
||||
value = item.get(src_key)
|
||||
if value:
|
||||
info[dst_key] = value
|
||||
info['series'] = try_get(
|
||||
item, lambda x: x['seriesLogo']['name'], compat_str)
|
||||
mobj = re.search(r'^S(\d+)\s+E(\d+)\s+-\s+(.+)$', info['title'])
|
||||
if mobj:
|
||||
info.update({
|
||||
'season_number': int(mobj.group(1)),
|
||||
'episode_number': int(mobj.group(2)),
|
||||
'episode': mobj.group(3),
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
@@ -157,7 +157,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
},
|
||||
]
|
||||
|
||||
_CLIENT_ID = 'DQskPX1pntALRzMp4HSxya3Mc0AO66Ro'
|
||||
_CLIENT_ID = 'LvWovRaJZlWCHql0bISuum8Bd2KX79mb'
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
|
||||
@@ -3,7 +3,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
parse_resolution,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class SpankBangIE(InfoExtractor):
|
||||
@@ -15,7 +20,7 @@ class SpankBangIE(InfoExtractor):
|
||||
'id': '3vvn',
|
||||
'ext': 'mp4',
|
||||
'title': 'fantasy solo',
|
||||
'description': 'Watch fantasy solo free HD porn video - 05 minutes - Babe,Masturbation,Solo,Toy - dillion harper masturbates on a bed free adult movies sexy clips.',
|
||||
'description': 'dillion harper masturbates on a bed',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'silly2587',
|
||||
'age_limit': 18,
|
||||
@@ -32,36 +37,49 @@ class SpankBangIE(InfoExtractor):
|
||||
# mobile page
|
||||
'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# 4k
|
||||
'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(url, video_id, headers={
|
||||
'Cookie': 'country=US'
|
||||
})
|
||||
|
||||
if re.search(r'<[^>]+\bid=["\']video_removed', webpage):
|
||||
raise ExtractorError(
|
||||
'Video %s is not available' % video_id, expected=True)
|
||||
|
||||
stream_key = self._html_search_regex(
|
||||
r'''var\s+stream_key\s*=\s*['"](.+?)['"]''',
|
||||
webpage, 'stream key')
|
||||
|
||||
formats = [{
|
||||
'url': 'http://spankbang.com/_%s/%s/title/%sp__mp4' % (video_id, stream_key, height),
|
||||
'ext': 'mp4',
|
||||
'format_id': '%sp' % height,
|
||||
'height': int(height),
|
||||
} for height in re.findall(r'<(?:span|li|p)[^>]+[qb]_(\d+)p', webpage)]
|
||||
self._check_formats(formats, video_id)
|
||||
formats = []
|
||||
for mobj in re.finditer(
|
||||
r'stream_url_(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2',
|
||||
webpage):
|
||||
format_id, format_url = mobj.group('id', 'url')
|
||||
f = parse_resolution(format_id)
|
||||
f.update({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title')
|
||||
description = self._og_search_description(webpage)
|
||||
description = self._search_regex(
|
||||
r'<div[^>]+\bclass=["\']bottom[^>]+>\s*<p>[^<]*</p>\s*<p>([^<]+)',
|
||||
webpage, 'description', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
uploader = self._search_regex(
|
||||
r'class="user"[^>]*><img[^>]+>([^<]+)',
|
||||
webpage, 'uploader', default=None)
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'<div[^>]+\bclass=["\']right_side[^>]+>\s*<span>([^<]+)',
|
||||
webpage, 'duration', fatal=False))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'([\d,.]+)\s+plays', webpage, 'view count', fatal=False))
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
@@ -71,6 +89,8 @@ class SpankBangIE(InfoExtractor):
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
||||
@@ -75,6 +75,9 @@ class SteamIE(InfoExtractor):
|
||||
gameID = m.group('gameID')
|
||||
playlist_id = gameID
|
||||
videourl = self._VIDEO_PAGE_TEMPLATE % playlist_id
|
||||
|
||||
self._set_cookie('steampowered.com', 'mature_content', '1')
|
||||
|
||||
webpage = self._download_webpage(videourl, playlist_id)
|
||||
|
||||
if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:
|
||||
|
||||
@@ -4,8 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_chr
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
)
|
||||
@@ -32,12 +34,34 @@ class StreamangoIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'gone',
|
||||
}, {
|
||||
'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
def decrypt_src(encoded, val):
|
||||
ALPHABET = '=/+9876543210zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA'
|
||||
encoded = re.sub(r'[^A-Za-z0-9+/=]', '', encoded)
|
||||
decoded = ''
|
||||
sm = [None] * 4
|
||||
i = 0
|
||||
str_len = len(encoded)
|
||||
while i < str_len:
|
||||
for j in range(4):
|
||||
sm[j % 4] = ALPHABET.index(encoded[i])
|
||||
i += 1
|
||||
char_code = ((sm[0] << 0x2) | (sm[1] >> 0x4)) ^ val
|
||||
decoded += compat_chr(char_code)
|
||||
if sm[2] != 0x40:
|
||||
char_code = ((sm[1] & 0xf) << 0x4) | (sm[2] >> 0x2)
|
||||
decoded += compat_chr(char_code)
|
||||
if sm[3] != 0x40:
|
||||
char_code = ((sm[2] & 0x3) << 0x6) | sm[3]
|
||||
decoded += compat_chr(char_code)
|
||||
return decoded
|
||||
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
@@ -46,13 +70,26 @@ class StreamangoIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage):
|
||||
video = self._parse_json(
|
||||
format_, video_id, transform_source=js_to_json, fatal=False)
|
||||
if not video:
|
||||
mobj = re.search(r'(src\s*:\s*[^(]+\(([^)]*)\)[\s,]*)', format_)
|
||||
if mobj is None:
|
||||
continue
|
||||
src = video.get('src')
|
||||
|
||||
format_ = format_.replace(mobj.group(0), '')
|
||||
|
||||
video = self._parse_json(
|
||||
format_, video_id, transform_source=js_to_json,
|
||||
fatal=False) or {}
|
||||
|
||||
mobj = re.search(
|
||||
r'([\'"])(?P<src>(?:(?!\1).)+)\1\s*,\s*(?P<val>\d+)',
|
||||
mobj.group(1))
|
||||
if mobj is None:
|
||||
continue
|
||||
|
||||
src = decrypt_src(mobj.group('src'), int_or_none(mobj.group('val')))
|
||||
if not src:
|
||||
continue
|
||||
|
||||
ext = determine_ext(src, default_ext=None)
|
||||
if video.get('type') == 'application/dash+xml' or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
@@ -65,6 +102,16 @@ class StreamangoIE(InfoExtractor):
|
||||
'height': int_or_none(video.get('height')),
|
||||
'tbr': int_or_none(video.get('bitrate')),
|
||||
})
|
||||
|
||||
if not formats:
|
||||
error = self._search_regex(
|
||||
r'<p[^>]+\bclass=["\']lead[^>]+>(.+?)</p>', webpage,
|
||||
'error', default=None)
|
||||
if not error and '>Sorry' in webpage:
|
||||
error = 'Video %s is not available' % video_id
|
||||
if error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
||||
@@ -4,11 +4,17 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
try_get,
|
||||
urljoin,
|
||||
compat_str,
|
||||
)
|
||||
|
||||
|
||||
@@ -122,7 +128,11 @@ class SVTIE(SVTBaseIE):
|
||||
return info_dict
|
||||
|
||||
|
||||
class SVTPlayIE(SVTBaseIE):
|
||||
class SVTPlayBaseIE(SVTBaseIE):
|
||||
_SVTPLAY_RE = r'root\s*\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P<json>{.+?})\s*;\s*\n'
|
||||
|
||||
|
||||
class SVTPlayIE(SVTPlayBaseIE):
|
||||
IE_DESC = 'SVT Play and Öppet arkiv'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
@@ -157,8 +167,8 @@ class SVTPlayIE(SVTBaseIE):
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'root\["__svtplay"\]\s*=\s*([^;]+);',
|
||||
webpage, 'embedded data', default='{}'),
|
||||
self._SVTPLAY_RE, webpage, 'embedded data', default='{}',
|
||||
group='json'),
|
||||
video_id, fatal=False)
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
@@ -189,3 +199,84 @@ class SVTPlayIE(SVTBaseIE):
|
||||
r'\s*\|\s*.+?$', '',
|
||||
info_dict.get('episode') or self._og_search_title(webpage))
|
||||
return info_dict
|
||||
|
||||
|
||||
class SVTSeriesIE(SVTPlayBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.svtplay.se/rederiet',
|
||||
'info_dict': {
|
||||
'id': 'rederiet',
|
||||
'title': 'Rederiet',
|
||||
'description': 'md5:505d491a58f4fcf6eb418ecab947e69e',
|
||||
},
|
||||
'playlist_mincount': 318,
|
||||
}, {
|
||||
'url': 'https://www.svtplay.se/rederiet?tab=sasong2',
|
||||
'info_dict': {
|
||||
'id': 'rederiet-sasong2',
|
||||
'title': 'Rederiet - Säsong 2',
|
||||
'description': 'md5:505d491a58f4fcf6eb418ecab947e69e',
|
||||
},
|
||||
'playlist_count': 12,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
|
||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
season_slug = qs.get('tab', [None])[0]
|
||||
|
||||
if season_slug:
|
||||
series_id += '-%s' % season_slug
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, series_id, 'Downloading series page')
|
||||
|
||||
root = self._parse_json(
|
||||
self._search_regex(
|
||||
self._SVTPLAY_RE, webpage, 'content', group='json'),
|
||||
series_id)
|
||||
|
||||
season_name = None
|
||||
|
||||
entries = []
|
||||
for season in root['relatedVideoContent']['relatedVideosAccordion']:
|
||||
if not isinstance(season, dict):
|
||||
continue
|
||||
if season_slug:
|
||||
if season.get('slug') != season_slug:
|
||||
continue
|
||||
season_name = season.get('name')
|
||||
videos = season.get('videos')
|
||||
if not isinstance(videos, list):
|
||||
continue
|
||||
for video in videos:
|
||||
content_url = video.get('contentUrl')
|
||||
if not content_url or not isinstance(content_url, compat_str):
|
||||
continue
|
||||
entries.append(
|
||||
self.url_result(
|
||||
urljoin(url, content_url),
|
||||
ie=SVTPlayIE.ie_key(),
|
||||
video_title=video.get('title')
|
||||
))
|
||||
|
||||
metadata = root.get('metaData')
|
||||
if not isinstance(metadata, dict):
|
||||
metadata = {}
|
||||
|
||||
title = metadata.get('title')
|
||||
season_name = season_name or season_slug
|
||||
|
||||
if title and season_name:
|
||||
title = '%s - %s' % (title, season_name)
|
||||
elif season_slug:
|
||||
title = season_slug
|
||||
|
||||
return self.playlist_result(
|
||||
entries, series_id, title, metadata.get('description'))
|
||||
|
||||
@@ -10,19 +10,33 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class TeleQuebecIE(InfoExtractor):
|
||||
class TeleQuebecBaseIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def _limelight_result(media_id):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(
|
||||
'limelight:media:' + media_id, {'geo_countries': ['CA']}),
|
||||
'ie_key': 'LimelightMedia',
|
||||
}
|
||||
|
||||
|
||||
class TeleQuebecIE(TeleQuebecBaseIE):
|
||||
_VALID_URL = r'https?://zonevideo\.telequebec\.tv/media/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://zonevideo.telequebec.tv/media/20984/le-couronnement-de-new-york/couronnement-de-new-york',
|
||||
'md5': 'fe95a0957e5707b1b01f5013e725c90f',
|
||||
# available till 01.01.2023
|
||||
'url': 'http://zonevideo.telequebec.tv/media/37578/un-petit-choc-et-puis-repart/un-chef-a-la-cabane',
|
||||
'info_dict': {
|
||||
'id': '20984',
|
||||
'id': '577116881b4b439084e6b1cf4ef8b1b3',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le couronnement de New York',
|
||||
'description': 'md5:f5b3d27a689ec6c1486132b2d687d432',
|
||||
'upload_date': '20170201',
|
||||
'timestamp': 1485972222,
|
||||
}
|
||||
'title': 'Un petit choc et puis repart!',
|
||||
'description': 'md5:b04a7e6b3f74e32d7b294cffe8658374',
|
||||
'upload_date': '20180222',
|
||||
'timestamp': 1519326631,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# no description
|
||||
'url': 'http://zonevideo.telequebec.tv/media/30261',
|
||||
@@ -31,19 +45,107 @@ class TeleQuebecIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
|
||||
media_data = self._download_json(
|
||||
'https://mnmedias.api.telequebec.tv/api/v2/media/' + media_id,
|
||||
media_id)['media']
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': media_id,
|
||||
'url': smuggle_url(
|
||||
'limelight:media:' + media_data['streamInfo']['sourceId'],
|
||||
{'geo_countries': ['CA']}),
|
||||
'title': media_data['title'],
|
||||
|
||||
info = self._limelight_result(media_data['streamInfo']['sourceId'])
|
||||
info.update({
|
||||
'title': media_data.get('title'),
|
||||
'description': try_get(
|
||||
media_data, lambda x: x['descriptions'][0]['text'], compat_str),
|
||||
'duration': int_or_none(
|
||||
media_data.get('durationInMilliseconds'), 1000),
|
||||
'ie_key': 'LimelightMedia',
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class TeleQuebecEmissionIE(TeleQuebecBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
[^/]+\.telequebec\.tv/emissions/|
|
||||
(?:www\.)?telequebec\.tv/
|
||||
)
|
||||
(?P<id>[^?#&]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://lindicemcsween.telequebec.tv/emissions/100430013/des-soins-esthetiques-a-377-d-interets-annuels-ca-vous-tente',
|
||||
'info_dict': {
|
||||
'id': '66648a6aef914fe3badda25e81a4d50a',
|
||||
'ext': 'mp4',
|
||||
'title': "Des soins esthétiques à 377 % d'intérêts annuels, ça vous tente?",
|
||||
'description': 'md5:369e0d55d0083f1fc9b71ffb640ea014',
|
||||
'upload_date': '20171024',
|
||||
'timestamp': 1508862118,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://bancpublic.telequebec.tv/emissions/emission-49/31986/jeunes-meres-sous-pression',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.telequebec.tv/masha-et-michka/epi059masha-et-michka-3-053-078',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.telequebec.tv/documentaire/bebes-sur-mesure/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
media_id = self._search_regex(
|
||||
r'mediaUID\s*:\s*["\'][Ll]imelight_(?P<id>[a-z0-9]{32})', webpage,
|
||||
'limelight id')
|
||||
|
||||
info = self._limelight_result(media_id)
|
||||
info.update({
|
||||
'title': self._og_search_title(webpage, default=None),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class TeleQuebecLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://zonevideo\.telequebec\.tv/(?P<id>endirect)'
|
||||
_TEST = {
|
||||
'url': 'http://zonevideo.telequebec.tv/endirect/',
|
||||
'info_dict': {
|
||||
'id': 'endirect',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^Télé-Québec - En direct [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
m3u8_url = None
|
||||
webpage = self._download_webpage(
|
||||
'https://player.telequebec.tv/Tq_VideoPlayer.js', video_id,
|
||||
fatal=False)
|
||||
if webpage:
|
||||
m3u8_url = self._search_regex(
|
||||
r'm3U8Url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'm3u8 url', default=None, group='url')
|
||||
if not m3u8_url:
|
||||
m3u8_url = 'https://teleqmmd.mmdlive.lldns.net/teleqmmd/f386e3b206814e1f8c8c1c71c0f8e748/manifest.m3u8'
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._live_title('Télé-Québec - En direct'),
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -0,0 +1,112 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class TennisTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tennistv\.com/videos/(?P<id>[-a-z0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.tennistv.com/videos/indian-wells-2018-verdasco-fritz',
|
||||
'info_dict': {
|
||||
'id': 'indian-wells-2018-verdasco-fritz',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fernando Verdasco v Taylor Fritz',
|
||||
'description': 're:^After his stunning victory.{174}$',
|
||||
'thumbnail': 'https://atp-prod.akamaized.net/api/images/v1/images/112831/landscape/1242/0',
|
||||
'timestamp': 1521017381,
|
||||
'upload_date': '20180314',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires email and password of a subscribed account',
|
||||
}
|
||||
_NETRC_MACHINE = 'tennistv'
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if not username or not password:
|
||||
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
||||
|
||||
login_form = {
|
||||
'Email': username,
|
||||
'Password': password,
|
||||
}
|
||||
login_json = json.dumps(login_form).encode('utf-8')
|
||||
headers = {
|
||||
'content-type': 'application/json',
|
||||
'Referer': 'https://www.tennistv.com/login',
|
||||
'Origin': 'https://www.tennistv.com',
|
||||
}
|
||||
|
||||
login_result = self._download_json(
|
||||
'https://www.tennistv.com/api/users/v1/login', None,
|
||||
note='Logging in',
|
||||
errnote='Login failed (wrong password?)',
|
||||
headers=headers,
|
||||
data=login_json)
|
||||
|
||||
if login_result['error']['errorCode']:
|
||||
raise ExtractorError('Login failed, %s said: %r' % (self.IE_NAME, login_result['error']['errorMessage']))
|
||||
|
||||
if login_result['entitlement'] != 'SUBSCRIBED':
|
||||
self.report_warning('%s may not be subscribed to %s.' % (username, self.IE_NAME))
|
||||
|
||||
self._session_token = login_result['sessionToken']
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
internal_id = self._search_regex(r'video=([0-9]+)', webpage, 'internal video id')
|
||||
|
||||
headers = {
|
||||
'Origin': 'https://www.tennistv.com',
|
||||
'authorization': 'ATP %s' % self._session_token,
|
||||
'content-type': 'application/json',
|
||||
'Referer': url,
|
||||
}
|
||||
check_data = {
|
||||
'videoID': internal_id,
|
||||
'VideoUrlType': 'HLSV3',
|
||||
}
|
||||
check_json = json.dumps(check_data).encode('utf-8')
|
||||
check_result = self._download_json(
|
||||
'https://www.tennistv.com/api/users/v1/entitlementchecknondiva',
|
||||
video_id, note='Checking video authorization', headers=headers, data=check_json)
|
||||
formats = self._extract_m3u8_formats(check_result['contentUrl'], video_id, ext='mp4')
|
||||
|
||||
vdata_url = 'https://www.tennistv.com/api/channels/v1/de/none/video/%s' % video_id
|
||||
vdata = self._download_json(vdata_url, video_id)
|
||||
|
||||
timestamp = unified_timestamp(vdata['timestamp'])
|
||||
thumbnail = vdata['video']['thumbnailUrl']
|
||||
description = vdata['displayText']['description']
|
||||
title = vdata['video']['title']
|
||||
|
||||
series = vdata['tour']
|
||||
venue = vdata['displayText']['venue']
|
||||
round_str = vdata['seo']['round']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'series': series,
|
||||
'season': venue,
|
||||
'episode': round_str,
|
||||
}
|
||||
@@ -132,7 +132,7 @@ class ToggleIE(InfoExtractor):
|
||||
formats = []
|
||||
for video_file in info.get('Files', []):
|
||||
video_url, vid_format = video_file.get('URL'), video_file.get('Format')
|
||||
if not video_url or not vid_format:
|
||||
if not video_url or video_url == 'NA' or not vid_format:
|
||||
continue
|
||||
ext = determine_ext(video_url)
|
||||
vid_format = vid_format.replace(' ', '')
|
||||
@@ -143,6 +143,18 @@ class ToggleIE(InfoExtractor):
|
||||
note='Downloading %s m3u8 information' % vid_format,
|
||||
errnote='Failed to download %s m3u8 information' % vid_format,
|
||||
fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_url, video_id, mpd_id=vid_format,
|
||||
note='Downloading %s MPD manifest' % vid_format,
|
||||
errnote='Failed to download %s MPD manifest' % vid_format,
|
||||
fatal=False))
|
||||
elif ext == 'ism':
|
||||
formats.extend(self._extract_ism_formats(
|
||||
video_url, video_id, ism_id=vid_format,
|
||||
note='Downloading %s ISM manifest' % vid_format,
|
||||
errnote='Failed to download %s ISM manifest' % vid_format,
|
||||
fatal=False))
|
||||
elif ext in ('mp4', 'wvm'):
|
||||
# wvm are drm-protected files
|
||||
formats.append({
|
||||
|
||||
@@ -31,6 +31,12 @@ class Tube8IE(KeezMoviesIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?tube8\.com/embed/(?:[^/]+/)+\d+)',
|
||||
webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage, info = self._extract_info(url)
|
||||
|
||||
|
||||
+140
-36
@@ -7,8 +7,10 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
parse_duration,
|
||||
try_get,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
@@ -16,8 +18,9 @@ from ..utils import (
|
||||
class TVNowBaseIE(InfoExtractor):
|
||||
_VIDEO_FIELDS = (
|
||||
'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
|
||||
'broadcastStartDate', 'isDrm', 'duration', 'manifest.dashclear',
|
||||
'format.defaultImage169Format', 'format.defaultImage169Logo')
|
||||
'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode',
|
||||
'manifest.dashclear', 'format.title', 'format.defaultImage169Format',
|
||||
'format.defaultImage169Logo')
|
||||
|
||||
def _call_api(self, path, video_id, query):
|
||||
return self._download_json(
|
||||
@@ -56,61 +59,84 @@ class TVNowBaseIE(InfoExtractor):
|
||||
duration = parse_duration(info.get('duration'))
|
||||
|
||||
f = info.get('format', {})
|
||||
|
||||
thumbnails = [{
|
||||
'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id,
|
||||
}]
|
||||
thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
|
||||
if thumbnail:
|
||||
thumbnails.append({
|
||||
'url': thumbnail,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'series': f.get('title'),
|
||||
'season_number': int_or_none(info.get('season')),
|
||||
'episode_number': int_or_none(info.get('episode')),
|
||||
'episode': title,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class TVNowIE(TVNowBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P<id>[^/]+)/(?:player|preview)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
|
||||
(?P<show_id>[^/]+)/
|
||||
(?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
# rtl
|
||||
'url': 'https://www.tvnow.de/rtl/alarm-fuer-cobra-11/freier-fall/player?return=/rtl',
|
||||
'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
|
||||
'info_dict': {
|
||||
'id': '385314',
|
||||
'display_id': 'alarm-fuer-cobra-11/freier-fall',
|
||||
'id': '331082',
|
||||
'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
|
||||
'ext': 'mp4',
|
||||
'title': 'Freier Fall',
|
||||
'description': 'md5:8c2d8f727261adf7e0dc18366124ca02',
|
||||
'title': 'Der neue Porsche 911 GT 3',
|
||||
'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1512677700,
|
||||
'upload_date': '20171207',
|
||||
'duration': 2862.0,
|
||||
'timestamp': 1495994400,
|
||||
'upload_date': '20170528',
|
||||
'duration': 5283,
|
||||
'series': 'GRIP - Das Motormagazin',
|
||||
'season_number': 14,
|
||||
'episode_number': 405,
|
||||
'episode': 'Der neue Porsche 911 GT 3',
|
||||
},
|
||||
}, {
|
||||
# rtl2
|
||||
'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player',
|
||||
'only_matching': 'True',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# rtlnitro
|
||||
'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player',
|
||||
'only_matching': 'True',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# superrtl
|
||||
'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player',
|
||||
'only_matching': 'True',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# ntv
|
||||
'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player',
|
||||
'only_matching': 'True',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# vox
|
||||
'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player',
|
||||
'only_matching': 'True',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# rtlplus
|
||||
'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player',
|
||||
'only_matching': 'True',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -124,8 +150,30 @@ class TVNowIE(TVNowBaseIE):
|
||||
return self._extract_video(info, display_id)
|
||||
|
||||
|
||||
class TVNowListIE(TVNowBaseIE):
|
||||
_VALID_URL = r'(?P<base_url>https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/)list/(?P<id>[^?/#&]+)$'
|
||||
class TVNowListBaseIE(TVNowBaseIE):
|
||||
_SHOW_VALID_URL = r'''(?x)
|
||||
(?P<base_url>
|
||||
https?://
|
||||
(?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
|
||||
(?P<show_id>[^/]+)
|
||||
)
|
||||
'''
|
||||
|
||||
def _extract_list_info(self, display_id, show_id):
|
||||
fields = list(self._SHOW_FIELDS)
|
||||
fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
|
||||
fields.extend(
|
||||
'formatTabs.formatTabPages.container.movies.%s' % field
|
||||
for field in self._VIDEO_FIELDS)
|
||||
return self._call_api(
|
||||
'formats/seo', display_id, query={
|
||||
'fields': ','.join(fields),
|
||||
'name': show_id + '.php'
|
||||
})
|
||||
|
||||
|
||||
class TVNowListIE(TVNowListBaseIE):
|
||||
_VALID_URL = r'%s/(?:list|jahr)/(?P<id>[^?\#&]+)' % TVNowListBaseIE._SHOW_VALID_URL
|
||||
|
||||
_SHOW_FIELDS = ('title', )
|
||||
_SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
|
||||
@@ -138,38 +186,94 @@ class TVNowListIE(TVNowBaseIE):
|
||||
'title': '30 Minuten Deutschland - Aktuell',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}, {
|
||||
'url': 'https://www.tvnow.de/vox/ab-ins-beet/list/staffel-14',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/2018/3',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False if TVNowIE.suitable(url)
|
||||
else super(TVNowListIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
base_url, show_id, season_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
fields = []
|
||||
fields.extend(self._SHOW_FIELDS)
|
||||
fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
|
||||
fields.extend(
|
||||
'formatTabs.formatTabPages.container.movies.%s' % field
|
||||
for field in self._VIDEO_FIELDS)
|
||||
|
||||
list_info = self._call_api(
|
||||
'formats/seo', season_id, query={
|
||||
'fields': ','.join(fields),
|
||||
'name': show_id + '.php'
|
||||
})
|
||||
list_info = self._extract_list_info(season_id, show_id)
|
||||
|
||||
season = next(
|
||||
season for season in list_info['formatTabs']['items']
|
||||
if season.get('seoheadline') == season_id)
|
||||
|
||||
title = '%s - %s' % (list_info['title'], season['headline'])
|
||||
title = list_info.get('title')
|
||||
headline = season.get('headline')
|
||||
if title and headline:
|
||||
title = '%s - %s' % (title, headline)
|
||||
else:
|
||||
title = headline or title
|
||||
|
||||
entries = []
|
||||
for container in season['formatTabPages']['items']:
|
||||
for info in ((container.get('container') or {}).get('movies') or {}).get('items') or []:
|
||||
items = try_get(
|
||||
container, lambda x: x['container']['movies']['items'],
|
||||
list) or []
|
||||
for info in items:
|
||||
seo_url = info.get('seoUrl')
|
||||
if not seo_url:
|
||||
continue
|
||||
video_id = info.get('id')
|
||||
entries.append(self.url_result(
|
||||
base_url + seo_url + '/player', 'TVNow', info.get('id')))
|
||||
'%s/%s/player' % (base_url, seo_url), TVNowIE.ie_key(),
|
||||
compat_str(video_id) if video_id else None))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, compat_str(season.get('id') or season_id), title)
|
||||
|
||||
|
||||
class TVNowShowIE(TVNowListBaseIE):
|
||||
_VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
|
||||
|
||||
_SHOW_FIELDS = ('id', 'title', )
|
||||
_SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
|
||||
_VIDEO_FIELDS = ()
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tvnow.at/vox/ab-ins-beet',
|
||||
'info_dict': {
|
||||
'id': 'ab-ins-beet',
|
||||
'title': 'Ab ins Beet!',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}, {
|
||||
'url': 'https://www.tvnow.at/vox/ab-ins-beet/list',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False if TVNowIE.suitable(url) or TVNowListIE.suitable(url)
|
||||
else super(TVNowShowIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
base_url, show_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
list_info = self._extract_list_info(show_id, show_id)
|
||||
|
||||
entries = []
|
||||
for season_info in list_info['formatTabs']['items']:
|
||||
season_url = season_info.get('seoheadline')
|
||||
if not season_url:
|
||||
continue
|
||||
season_id = season_info.get('id')
|
||||
entries.append(self.url_result(
|
||||
'%s/list/%s' % (base_url, season_url), TVNowListIE.ie_key(),
|
||||
compat_str(season_id) if season_id else None,
|
||||
season_info.get('headline')))
|
||||
|
||||
return self.playlist_result(entries, show_id, list_info.get('title'))
|
||||
|
||||
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
class TwentyFourVideoIE(InfoExtractor):
|
||||
IE_NAME = '24video'
|
||||
_VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sex|tube|adult))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sexy?|tube|adult))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.24video.net/video/view/1044982',
|
||||
|
||||
@@ -5,6 +5,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_kwargs,
|
||||
compat_str,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
@@ -114,6 +115,11 @@ class UdemyIE(InfoExtractor):
|
||||
error_str += ' - %s' % error_data.get('formErrors')
|
||||
raise ExtractorError(error_str, expected=True)
|
||||
|
||||
def _download_webpage(self, *args, **kwargs):
|
||||
kwargs.setdefault('headers', {})['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4'
|
||||
return super(UdemyIE, self)._download_webpage(
|
||||
*args, **compat_kwargs(kwargs))
|
||||
|
||||
def _download_json(self, url_or_request, *args, **kwargs):
|
||||
headers = {
|
||||
'X-Udemy-Snail-Case': 'true',
|
||||
|
||||
+176
-110
@@ -5,113 +5,52 @@ import re
|
||||
import time
|
||||
import hashlib
|
||||
import json
|
||||
import random
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from .youtube import YoutubeIE
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
str_or_none,
|
||||
parse_duration,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class ViceBaseIE(AdobePassIE):
|
||||
def _extract_preplay_video(self, url, locale, webpage):
|
||||
watch_hub_data = extract_attributes(self._search_regex(
|
||||
r'(?s)(<watch-hub\s*.+?</watch-hub>)', webpage, 'watch hub'))
|
||||
video_id = watch_hub_data['vms-id']
|
||||
title = watch_hub_data['video-title']
|
||||
|
||||
query = {}
|
||||
is_locked = watch_hub_data.get('video-locked') == '1'
|
||||
if is_locked:
|
||||
resource = self._get_mvpd_resource(
|
||||
'VICELAND', title, video_id,
|
||||
watch_hub_data.get('video-rating'))
|
||||
query['tvetoken'] = self._extract_mvpd_auth(
|
||||
url, video_id, 'VICELAND', resource)
|
||||
|
||||
# signature generation algorithm is reverse engineered from signatureGenerator in
|
||||
# webpack:///../shared/~/vice-player/dist/js/vice-player.js in
|
||||
# https://www.viceland.com/assets/common/js/web.vendor.bundle.js
|
||||
exp = int(time.time()) + 14400
|
||||
query.update({
|
||||
'exp': exp,
|
||||
'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
|
||||
})
|
||||
|
||||
try:
|
||||
host = 'www.viceland' if is_locked else self._PREPLAY_HOST
|
||||
preplay = self._download_json(
|
||||
'https://%s.com/%s/preplay/%s' % (host, locale, video_id),
|
||||
video_id, query=query)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||
error = json.loads(e.cause.read().decode())
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, error['details']), expected=True)
|
||||
raise
|
||||
|
||||
video_data = preplay['video']
|
||||
base = video_data['base']
|
||||
uplynk_preplay_url = preplay['preplayURL']
|
||||
episode = video_data.get('episode', {})
|
||||
channel = video_data.get('channel', {})
|
||||
|
||||
subtitles = {}
|
||||
cc_url = preplay.get('ccURL')
|
||||
if cc_url:
|
||||
subtitles['en'] = [{
|
||||
'url': cc_url,
|
||||
}]
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': uplynk_preplay_url,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': base.get('body') or base.get('display_body'),
|
||||
'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'),
|
||||
'duration': int_or_none(video_data.get('video_duration')) or parse_duration(watch_hub_data.get('video-duration')),
|
||||
'timestamp': int_or_none(video_data.get('created_at'), 1000),
|
||||
'age_limit': parse_age_limit(video_data.get('video_rating')),
|
||||
'series': video_data.get('show_title') or watch_hub_data.get('show-title'),
|
||||
'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')),
|
||||
'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
|
||||
'season_number': int_or_none(watch_hub_data.get('season')),
|
||||
'season_id': str_or_none(episode.get('season_id')),
|
||||
'uploader': channel.get('base', {}).get('title') or watch_hub_data.get('channel-title'),
|
||||
'uploader_id': str_or_none(channel.get('id')),
|
||||
'subtitles': subtitles,
|
||||
'ie_key': 'UplynkPreplay',
|
||||
}
|
||||
|
||||
|
||||
class ViceIE(ViceBaseIE):
|
||||
class ViceIE(AdobePassIE):
|
||||
IE_NAME = 'vice'
|
||||
_VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:(?P<locale>[^/]+)/)?videos?/(?P<id>[^/?#&]+)'
|
||||
|
||||
_VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?viceland)\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
|
||||
'md5': '7d3ae2f9ba5f196cdd9f9efd43657ac2',
|
||||
'url': 'https://video.vice.com/en_us/video/pet-cremator/58c69e38a55424f1227dc3f7',
|
||||
'info_dict': {
|
||||
'id': 'N2bzkydjraWDGwnt8jAttCF6Y0PDv4Zj',
|
||||
'ext': 'flv',
|
||||
'title': 'Monkey Labs of Holland',
|
||||
'description': 'md5:92b3c7dcbfe477f772dd4afa496c9149',
|
||||
'id': '5e647f0125e145c9aef2069412c0cbde',
|
||||
'ext': 'mp4',
|
||||
'title': '10 Questions You Always Wanted To Ask: Pet Cremator',
|
||||
'description': 'md5:fe856caacf61fe0e74fab15ce2b07ca5',
|
||||
'uploader': 'vice',
|
||||
'uploader_id': '57a204088cb727dec794c67b',
|
||||
'timestamp': 1489664942,
|
||||
'upload_date': '20170316',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['UplynkPreplay'],
|
||||
}, {
|
||||
# geo restricted to US
|
||||
'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
|
||||
'info_dict': {
|
||||
'id': '5816510690b70e6c5fd39a56',
|
||||
'id': '930c0ad1f47141cc955087eecaddb0e2',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Waypoint',
|
||||
'uploader': 'waypoint',
|
||||
'title': 'The Signal From Tölva',
|
||||
'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5',
|
||||
'uploader_id': '57f7d621e05ca860fa9ccaf9',
|
||||
@@ -139,27 +78,131 @@ class ViceIE(ViceBaseIE):
|
||||
'params': {
|
||||
# AES-encrypted m3u8
|
||||
'skip_download': True,
|
||||
'proxy': '127.0.0.1:8118',
|
||||
},
|
||||
'add_ie': ['UplynkPreplay'],
|
||||
}, {
|
||||
'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.vice.com/en_us/embed/57f41d3556a0a80f54726060',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vms.vice.com/en_us/video/preplay/58c69e38a55424f1227dc3f7',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.viceland.com/en_us/video/thursday-march-1-2018/5a8f2d7ff1cdb332dd446ec1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PREPLAY_HOST = 'video.vice'
|
||||
_PREPLAY_HOST = 'vms.vice'
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe\b[^>]+\bsrc=["\']((?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]+)',
|
||||
webpage)
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
urls = ViceIE._extract_urls(webpage)
|
||||
return urls[0] if urls else None
|
||||
|
||||
def _real_extract(self, url):
|
||||
locale, video_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
embed_code = self._search_regex(
|
||||
r'embedCode=([^&\'"]+)', webpage,
|
||||
'ooyala embed code', default=None)
|
||||
if embed_code:
|
||||
return self.url_result('ooyala:%s' % embed_code, 'Ooyala')
|
||||
youtube_id = self._search_regex(
|
||||
r'data-youtube-id="([^"]+)"', webpage, 'youtube id', default=None)
|
||||
if youtube_id:
|
||||
return self.url_result(youtube_id, 'Youtube')
|
||||
return self._extract_preplay_video(urlh.geturl(), locale, webpage)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://video.vice.com/%s/embed/%s' % (locale, video_id),
|
||||
video_id)
|
||||
|
||||
video = self._parse_json(
|
||||
self._search_regex(
|
||||
r'PREFETCH_DATA\s*=\s*({.+?})\s*;\s*\n', webpage,
|
||||
'app state'), video_id)['video']
|
||||
video_id = video.get('vms_id') or video.get('id') or video_id
|
||||
title = video['title']
|
||||
is_locked = video.get('locked')
|
||||
rating = video.get('rating')
|
||||
thumbnail = video.get('thumbnail_url')
|
||||
duration = int_or_none(video.get('duration'))
|
||||
series = try_get(
|
||||
video, lambda x: x['episode']['season']['show']['title'],
|
||||
compat_str)
|
||||
episode_number = try_get(
|
||||
video, lambda x: x['episode']['episode_number'])
|
||||
season_number = try_get(
|
||||
video, lambda x: x['episode']['season']['season_number'])
|
||||
uploader = None
|
||||
|
||||
query = {}
|
||||
if is_locked:
|
||||
resource = self._get_mvpd_resource(
|
||||
'VICELAND', title, video_id, rating)
|
||||
query['tvetoken'] = self._extract_mvpd_auth(
|
||||
url, video_id, 'VICELAND', resource)
|
||||
|
||||
# signature generation algorithm is reverse engineered from signatureGenerator in
|
||||
# webpack:///../shared/~/vice-player/dist/js/vice-player.js in
|
||||
# https://www.viceland.com/assets/common/js/web.vendor.bundle.js
|
||||
# new JS is located here https://vice-web-statics-cdn.vice.com/vice-player/player-embed.js
|
||||
exp = int(time.time()) + 1440
|
||||
|
||||
query.update({
|
||||
'exp': exp,
|
||||
'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
|
||||
'_ad_blocked': None,
|
||||
'_ad_unit': '',
|
||||
'_debug': '',
|
||||
'platform': 'desktop',
|
||||
'rn': random.randint(10000, 100000),
|
||||
'fbprebidtoken': '',
|
||||
})
|
||||
|
||||
try:
|
||||
host = 'www.viceland' if is_locked else self._PREPLAY_HOST
|
||||
preplay = self._download_json(
|
||||
'https://%s.com/%s/video/preplay/%s' % (host, locale, video_id),
|
||||
video_id, query=query)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401):
|
||||
error = json.loads(e.cause.read().decode())
|
||||
error_message = error.get('error_description') or error['details']
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, error_message), expected=True)
|
||||
raise
|
||||
|
||||
video_data = preplay['video']
|
||||
base = video_data['base']
|
||||
uplynk_preplay_url = preplay['preplayURL']
|
||||
episode = video_data.get('episode', {})
|
||||
channel = video_data.get('channel', {})
|
||||
|
||||
subtitles = {}
|
||||
cc_url = preplay.get('ccURL')
|
||||
if cc_url:
|
||||
subtitles['en'] = [{
|
||||
'url': cc_url,
|
||||
}]
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': uplynk_preplay_url,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': base.get('body') or base.get('display_body'),
|
||||
'thumbnail': thumbnail,
|
||||
'duration': int_or_none(video_data.get('video_duration')) or duration,
|
||||
'timestamp': int_or_none(video_data.get('created_at'), 1000),
|
||||
'age_limit': parse_age_limit(video_data.get('video_rating')),
|
||||
'series': video_data.get('show_title') or series,
|
||||
'episode_number': int_or_none(episode.get('episode_number') or episode_number),
|
||||
'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
|
||||
'season_number': int_or_none(season_number),
|
||||
'season_id': str_or_none(episode.get('season_id')),
|
||||
'uploader': channel.get('base', {}).get('title') or channel.get('name') or uploader,
|
||||
'uploader_id': str_or_none(channel.get('id')),
|
||||
'subtitles': subtitles,
|
||||
'ie_key': 'UplynkPreplay',
|
||||
}
|
||||
|
||||
|
||||
class ViceShowIE(InfoExtractor):
|
||||
@@ -203,14 +246,15 @@ class ViceArticleIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',
|
||||
'info_dict': {
|
||||
'id': '58dc0a3dee202d2a0ccfcbd8',
|
||||
'id': '41eae2a47b174a1398357cec55f1f6fc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mormon War on Porn ',
|
||||
'description': 'md5:ad396a2481e7f8afb5ed486878421090',
|
||||
'uploader': 'VICE',
|
||||
'uploader_id': '57a204088cb727dec794c693',
|
||||
'timestamp': 1489160690,
|
||||
'upload_date': '20170310',
|
||||
'description': 'md5:6394a8398506581d0346b9ab89093fef',
|
||||
'uploader': 'vice',
|
||||
'uploader_id': '57a204088cb727dec794c67b',
|
||||
'timestamp': 1491883129,
|
||||
'upload_date': '20170411',
|
||||
'age_limit': 17,
|
||||
},
|
||||
'params': {
|
||||
# AES-encrypted m3u8
|
||||
@@ -219,17 +263,35 @@ class ViceArticleIE(InfoExtractor):
|
||||
'add_ie': ['UplynkPreplay'],
|
||||
}, {
|
||||
'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car',
|
||||
'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
|
||||
'md5': '7fe8ebc4fa3323efafc127b82bd821d9',
|
||||
'info_dict': {
|
||||
'id': '3jstaBeXgAs',
|
||||
'ext': 'mp4',
|
||||
'title': 'How to Hack a Car: Phreaked Out (Episode 2)',
|
||||
'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30',
|
||||
'uploader_id': 'MotherboardTV',
|
||||
'uploader': 'Motherboard',
|
||||
'uploader_id': 'MotherboardTV',
|
||||
'upload_date': '20140529',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'https://www.vice.com/en_us/article/znm9dx/karley-sciortino-slutever-reloaded',
|
||||
'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
|
||||
'info_dict': {
|
||||
'id': 'e2ed435eb67e43efb66e6ef9a6930a88',
|
||||
'ext': 'mp4',
|
||||
'title': "Making The World's First Male Sex Doll",
|
||||
'description': 'md5:916078ef0e032d76343116208b6cc2c4',
|
||||
'uploader': 'vice',
|
||||
'uploader_id': '57a204088cb727dec794c67b',
|
||||
'timestamp': 1476919911,
|
||||
'upload_date': '20161019',
|
||||
'age_limit': 17,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [ViceIE.ie_key()],
|
||||
}, {
|
||||
'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1',
|
||||
'only_matching': True,
|
||||
@@ -244,8 +306,8 @@ class ViceArticleIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
prefetch_data = self._parse_json(self._search_regex(
|
||||
r'window\.__PREFETCH_DATA\s*=\s*({.*});',
|
||||
webpage, 'prefetch data'), display_id)
|
||||
r'__APP_STATE\s*=\s*({.+?})(?:\s*\|\|\s*{}\s*)?;\s*\n',
|
||||
webpage, 'app state'), display_id)['pageData']
|
||||
body = prefetch_data['body']
|
||||
|
||||
def _url_res(video_url, ie_key):
|
||||
@@ -256,6 +318,10 @@ class ViceArticleIE(InfoExtractor):
|
||||
'ie_key': ie_key,
|
||||
}
|
||||
|
||||
vice_url = ViceIE._extract_url(webpage)
|
||||
if vice_url:
|
||||
return _url_res(vice_url, ViceIE.ie_key())
|
||||
|
||||
embed_code = self._search_regex(
|
||||
r'embedCode=([^&\'"]+)', body,
|
||||
'ooyala embed code', default=None)
|
||||
|
||||
@@ -1,38 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .vice import ViceBaseIE
|
||||
|
||||
|
||||
class VicelandIE(ViceBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?viceland\.com/(?P<locale>[^/]+)/video/[^/]+/(?P<id>[a-f0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.viceland.com/en_us/video/trapped/588a70d0dba8a16007de7316',
|
||||
'info_dict': {
|
||||
'id': '588a70d0dba8a16007de7316',
|
||||
'ext': 'mp4',
|
||||
'title': 'TRAPPED (Series Trailer)',
|
||||
'description': 'md5:7a8e95c2b6cd86461502a2845e581ccf',
|
||||
'age_limit': 14,
|
||||
'timestamp': 1485474122,
|
||||
'upload_date': '20170126',
|
||||
'uploader_id': '57a204098cb727dec794c6a3',
|
||||
'uploader': 'Viceland',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['UplynkPreplay'],
|
||||
'skip': '404',
|
||||
}
|
||||
_PREPLAY_HOST = 'www.viceland'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
locale = mobj.group('locale')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
return self._extract_preplay_video(url, locale, webpage)
|
||||
@@ -16,7 +16,7 @@ from ..utils import (
|
||||
class VideaIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
videa\.hu/
|
||||
videa(?:kid)?\.hu/
|
||||
(?:
|
||||
videok/(?:[^/]+/)*[^?#&]+-|
|
||||
player\?.*?\bv=|
|
||||
@@ -31,7 +31,7 @@ class VideaIE(InfoExtractor):
|
||||
'id': '8YfIAjxwWGwT8HVQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Az őrült kígyász 285 kígyót enged szabadon',
|
||||
'thumbnail': 'http://videa.hu/static/still/1.4.1.1007274.1204470.3',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'duration': 21,
|
||||
},
|
||||
}, {
|
||||
@@ -43,6 +43,15 @@ class VideaIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://videakid.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://videakid.hu/player?v=8YfIAjxwWGwT8HVQ',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://videakid.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
|
||||
@@ -0,0 +1,125 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
unified_strdate,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class VidLiiIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vidlii\.com/(?:watch|embed)\?.*?\bv=(?P<id>[0-9A-Za-z_-]{11})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.vidlii.com/watch?v=tJluaH4BJ3v',
|
||||
'md5': '9bf7d1e005dfa909b6efb0a1ff5175e2',
|
||||
'info_dict': {
|
||||
'id': 'tJluaH4BJ3v',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vidlii is against me',
|
||||
'description': 'md5:fa3f119287a2bfb922623b52b1856145',
|
||||
'thumbnail': 're:https://.*.jpg',
|
||||
'uploader': 'APPle5auc31995',
|
||||
'uploader_url': 'https://www.vidlii.com/user/APPle5auc31995',
|
||||
'upload_date': '20171107',
|
||||
'duration': 212,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'average_rating': float,
|
||||
'categories': ['News & Politics'],
|
||||
'tags': ['Vidlii', 'Jan', 'Videogames'],
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.vidlii.com/embed?v=tJluaH4BJ3v&a=0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://www.vidlii.com/watch?v=%s' % video_id, video_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'src\s*:\s*(["\'])(?P<url>(?:https?://)?(?:(?!\1).)+)\1', webpage,
|
||||
'video url', group='url')
|
||||
|
||||
title = self._search_regex(
|
||||
(r'<h1>([^<]+)</h1>', r'<title>([^<]+) - VidLii<'), webpage,
|
||||
'title')
|
||||
|
||||
description = self._html_search_meta(
|
||||
('description', 'twitter:description'), webpage,
|
||||
default=None) or strip_or_none(
|
||||
get_element_by_id('des_text', webpage))
|
||||
|
||||
thumbnail = self._html_search_meta(
|
||||
'twitter:image', webpage, default=None)
|
||||
if not thumbnail:
|
||||
thumbnail_path = self._search_regex(
|
||||
r'img\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'thumbnail', fatal=False, group='url')
|
||||
if thumbnail_path:
|
||||
thumbnail = urljoin(url, thumbnail_path)
|
||||
|
||||
uploader = self._search_regex(
|
||||
r'<div[^>]+class=["\']wt_person[^>]+>\s*<a[^>]+\bhref=["\']/user/[^>]+>([^<]+)',
|
||||
webpage, 'uploader', fatal=False)
|
||||
uploader_url = 'https://www.vidlii.com/user/%s' % uploader if uploader else None
|
||||
|
||||
upload_date = unified_strdate(self._html_search_meta(
|
||||
'datePublished', webpage, default=None) or self._search_regex(
|
||||
r'<date>([^<]+)', webpage, 'upload date', fatal=False))
|
||||
|
||||
duration = int_or_none(self._html_search_meta(
|
||||
'video:duration', webpage, 'duration',
|
||||
default=None) or self._search_regex(
|
||||
r'duration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
|
||||
|
||||
view_count = int_or_none(self._search_regex(
|
||||
(r'<strong>(\d+)</strong> views',
|
||||
r'Views\s*:\s*<strong>(\d+)</strong>'),
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
(r'<span[^>]+id=["\']cmt_num[^>]+>(\d+)',
|
||||
r'Comments\s*:\s*<strong>(\d+)'),
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
average_rating = float_or_none(self._search_regex(
|
||||
r'rating\s*:\s*([\d.]+)', webpage, 'average rating', fatal=False))
|
||||
|
||||
category = self._html_search_regex(
|
||||
r'<div>Category\s*:\s*</div>\s*<div>\s*<a[^>]+>([^<]+)', webpage,
|
||||
'category', fatal=False)
|
||||
categories = [category] if category else None
|
||||
|
||||
tags = [
|
||||
strip_or_none(tag)
|
||||
for tag in re.findall(
|
||||
r'<a[^>]+\bhref=["\']/results\?.*?q=[^>]*>([^<]+)',
|
||||
webpage) if strip_or_none(tag)
|
||||
] or None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_url': uploader_url,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'average_rating': average_rating,
|
||||
'categories': categories,
|
||||
'tags': tags,
|
||||
}
|
||||
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class VidziIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc|si)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://vidzi.tv/cghql9yq6emu.html',
|
||||
'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
|
||||
@@ -32,6 +32,9 @@ class VidziIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://vidzi.cc/cghql9yq6emu.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vidzi.si/rph9gztxj1et.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -41,21 +41,30 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
if self._LOGIN_REQUIRED:
|
||||
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
||||
return
|
||||
self.report_login()
|
||||
webpage = self._download_webpage(self._LOGIN_URL, None, False)
|
||||
webpage = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
token, vuid = self._extract_xsrft_and_vuid(webpage)
|
||||
data = urlencode_postdata({
|
||||
data = {
|
||||
'action': 'login',
|
||||
'email': username,
|
||||
'password': password,
|
||||
'service': 'vimeo',
|
||||
'token': token,
|
||||
})
|
||||
login_request = sanitized_Request(self._LOGIN_URL, data)
|
||||
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
login_request.add_header('Referer', self._LOGIN_URL)
|
||||
}
|
||||
self._set_vimeo_cookie('vuid', vuid)
|
||||
self._download_webpage(login_request, None, False, 'Wrong login info')
|
||||
try:
|
||||
self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Logging in',
|
||||
data=urlencode_postdata(data), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Referer': self._LOGIN_URL,
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 418:
|
||||
raise ExtractorError(
|
||||
'Unable to log in: bad username or password',
|
||||
expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _verify_video_password(self, url, video_id, webpage):
|
||||
password = self._downloader.params.get('videopassword')
|
||||
|
||||
@@ -12,7 +12,7 @@ import time
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urlparse,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
@@ -39,11 +39,11 @@ class VRVBaseIE(InfoExtractor):
|
||||
data = json.dumps(data).encode()
|
||||
headers['Content-Type'] = 'application/json'
|
||||
method = 'POST' if data else 'GET'
|
||||
base_string = '&'.join([method, compat_urlparse.quote(base_url, ''), compat_urlparse.quote(encoded_query, '')])
|
||||
base_string = '&'.join([method, compat_urllib_parse.quote(base_url, ''), compat_urllib_parse.quote(encoded_query, '')])
|
||||
oauth_signature = base64.b64encode(hmac.new(
|
||||
(self._API_PARAMS['oAuthSecret'] + '&').encode('ascii'),
|
||||
base_string.encode(), hashlib.sha1).digest()).decode()
|
||||
encoded_query += '&oauth_signature=' + compat_urlparse.quote(oauth_signature, '')
|
||||
encoded_query += '&oauth_signature=' + compat_urllib_parse.quote(oauth_signature, '')
|
||||
return self._download_json(
|
||||
'?'.join([base_url, encoded_query]), video_id,
|
||||
note='Downloading %s JSON metadata' % note, headers=headers, data=data)
|
||||
|
||||
@@ -118,6 +118,15 @@ class XFileShareIE(InfoExtractor):
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1'
|
||||
% '|'.join(site for site in list(zip(*XFileShareIE._SITES))[0]),
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
@@ -1,19 +1,29 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
NO_DEFAULT,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class XNXXIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:video|www)\.xnxx\.com/video-?(?P<id>[0-9a-z]+)/'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.xnxx.com/video-55awb78/skyrim_test_video',
|
||||
'md5': 'ef7ecee5af78f8b03dca2cf31341d3a0',
|
||||
'md5': '7583e96c15c0f21e9da3453d9920fbba',
|
||||
'info_dict': {
|
||||
'id': '55awb78',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Skyrim Test Video',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 469,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
},
|
||||
}, {
|
||||
@@ -26,23 +36,49 @@ class XNXXIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(r'flv_url=(.*?)&',
|
||||
webpage, 'video URL')
|
||||
video_url = compat_urllib_parse_unquote(video_url)
|
||||
def get(meta, default=NO_DEFAULT, fatal=True):
|
||||
return self._search_regex(
|
||||
r'set%s\s*\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % meta,
|
||||
webpage, meta, default=default, fatal=fatal, group='value')
|
||||
|
||||
video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XNXX.COM',
|
||||
webpage, 'title')
|
||||
title = self._og_search_title(
|
||||
webpage, default=None) or get('VideoTitle')
|
||||
|
||||
video_thumbnail = self._search_regex(r'url_bigthumb=(.*?)&',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
formats = []
|
||||
for mobj in re.finditer(
|
||||
r'setVideo(?:Url(?P<id>Low|High)|HLS)\s*\(\s*(?P<q>["\'])(?P<url>(?:https?:)?//.+?)(?P=q)', webpage):
|
||||
format_url = mobj.group('url')
|
||||
if determine_ext(format_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
preference=1, m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
format_id = mobj.group('id')
|
||||
if format_id:
|
||||
format_id = format_id.lower()
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
'quality': -1 if format_id == 'low' else 0,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage, default=None) or get(
|
||||
'ThumbUrl', fatal=False) or get('ThumbUrl169', fatal=False)
|
||||
duration = int_or_none(self._og_search_property('duration', webpage))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'id=["\']nb-views-number[^>]+>([\d,.]+)', webpage, 'view count',
|
||||
default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title,
|
||||
'ext': 'flv',
|
||||
'thumbnail': video_thumbnail,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'age_limit': 18,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -58,7 +58,9 @@ class XVideosIE(InfoExtractor):
|
||||
group='title') or self._og_search_title(webpage)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'url_bigthumb=(.+?)&', webpage, 'thumbnail', fatal=False)
|
||||
(r'setThumbUrl\(\s*(["\'])(?P<thumbnail>(?:(?!\1).)+)\1',
|
||||
r'url_bigthumb=(?P<thumbnail>.+?)&'),
|
||||
webpage, 'thumbnail', fatal=False, group='thumbnail')
|
||||
duration = int_or_none(self._og_search_property(
|
||||
'duration', webpage, default=None)) or parse_duration(
|
||||
self._search_regex(
|
||||
|
||||
@@ -0,0 +1,101 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class YapFilesIE(InfoExtractor):
|
||||
_YAPFILES_URL = r'//(?:(?:www|api)\.)?yapfiles\.ru/get_player/*\?.*?\bv=(?P<id>\w+)'
|
||||
_VALID_URL = r'https?:%s' % _YAPFILES_URL
|
||||
_TESTS = [{
|
||||
# with hd
|
||||
'url': 'http://www.yapfiles.ru/get_player/?v=vMDE1NjcyNDUt0413',
|
||||
'md5': '2db19e2bfa2450568868548a1aa1956c',
|
||||
'info_dict': {
|
||||
'id': 'vMDE1NjcyNDUt0413',
|
||||
'ext': 'mp4',
|
||||
'title': 'Самый худший пароль WIFI',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 72,
|
||||
},
|
||||
}, {
|
||||
# without hd
|
||||
'url': 'https://api.yapfiles.ru/get_player/?uid=video_player_1872528&plroll=1&adv=1&v=vMDE4NzI1Mjgt690b',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
|
||||
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.*?)\1'
|
||||
% YapFilesIE._YAPFILES_URL, webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id, fatal=False)
|
||||
|
||||
player_url = None
|
||||
query = {}
|
||||
if webpage:
|
||||
player_url = self._search_regex(
|
||||
r'player\.init\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'player url', default=None, group='url')
|
||||
|
||||
if not player_url:
|
||||
player_url = 'http://api.yapfiles.ru/load/%s/' % video_id
|
||||
query = {
|
||||
'md5': 'ded5f369be61b8ae5f88e2eeb2f3caff',
|
||||
'type': 'json',
|
||||
'ref': url,
|
||||
}
|
||||
|
||||
player = self._download_json(
|
||||
player_url, video_id, query=query)['player']
|
||||
|
||||
playlist_url = player['playlist']
|
||||
title = player['title']
|
||||
thumbnail = player.get('poster')
|
||||
|
||||
if title == 'Ролик удален' or 'deleted.jpg' in (thumbnail or ''):
|
||||
raise ExtractorError(
|
||||
'Video %s has been removed' % video_id, expected=True)
|
||||
|
||||
playlist = self._download_json(
|
||||
playlist_url, video_id)['player']['main']
|
||||
|
||||
hd_height = int_or_none(player.get('hd'))
|
||||
|
||||
QUALITIES = ('sd', 'hd')
|
||||
quality_key = qualities(QUALITIES)
|
||||
formats = []
|
||||
for format_id in QUALITIES:
|
||||
is_hd = format_id == 'hd'
|
||||
format_url = playlist.get(
|
||||
'file%s' % ('_hd' if is_hd else ''))
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
continue
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
'quality': quality_key(format_id),
|
||||
'height': hd_height if is_hd else None,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': int_or_none(player.get('length')),
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -154,7 +154,7 @@ class YoukuIE(InfoExtractor):
|
||||
# request basic data
|
||||
basic_data_params = {
|
||||
'vid': video_id,
|
||||
'ccode': '0507',
|
||||
'ccode': '0590',
|
||||
'client_ip': '192.168.1.1',
|
||||
'utid': cna,
|
||||
'client_ts': time.time() / 1000,
|
||||
|
||||
@@ -2583,7 +2583,11 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
|
||||
}]
|
||||
|
||||
|
||||
class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
|
||||
class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
|
||||
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
|
||||
|
||||
|
||||
class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube.com searches'
|
||||
# there doesn't appear to be a real limit, for example if you search for
|
||||
# 'python' you get more than 8.000.000 results
|
||||
@@ -2617,8 +2621,7 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
|
||||
raise ExtractorError(
|
||||
'[youtube] No video results', expected=True)
|
||||
|
||||
new_videos = self._ids_to_results(orderedSet(re.findall(
|
||||
r'href="/watch\?v=(.{11})', html_content)))
|
||||
new_videos = list(self._process_page(html_content))
|
||||
videos += new_videos
|
||||
if not new_videos or len(videos) > limit:
|
||||
break
|
||||
@@ -2641,11 +2644,10 @@ class YoutubeSearchDateIE(YoutubeSearchIE):
|
||||
_EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
|
||||
|
||||
|
||||
class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):
|
||||
class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube.com search URLs'
|
||||
IE_NAME = 'youtube:search_url'
|
||||
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
|
||||
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
|
||||
'playlist_mincount': 5,
|
||||
|
||||
@@ -534,7 +534,7 @@ def parseOpts(overrideArguments=None):
|
||||
workarounds.add_option(
|
||||
'--prefer-insecure',
|
||||
'--prefer-unsecure', action='store_true', dest='prefer_insecure',
|
||||
help='Use an unencrypted connection to retrieve information whenever possible')
|
||||
help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
|
||||
workarounds.add_option(
|
||||
'--user-agent',
|
||||
metavar='UA', dest='user_agent',
|
||||
@@ -676,7 +676,8 @@ def parseOpts(overrideArguments=None):
|
||||
filesystem.add_option(
|
||||
'-a', '--batch-file',
|
||||
dest='batchfile', metavar='FILE',
|
||||
help='File containing URLs to download (\'-\' for stdin)')
|
||||
help="File containing URLs to download ('-' for stdin), one URL per line. "
|
||||
"Lines starting with '#', ';' or ']' are considered as comments and ignored.")
|
||||
filesystem.add_option(
|
||||
'--id', default=False,
|
||||
action='store_true', dest='useid', help='Use only video ID in file name')
|
||||
|
||||
@@ -31,7 +31,8 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
|
||||
temp_filename = prepend_extension(filename, 'temp')
|
||||
|
||||
if not info.get('thumbnails'):
|
||||
raise EmbedThumbnailPPError('Thumbnail was not found. Nothing to do.')
|
||||
self._downloader.to_screen('[embedthumbnail] There aren\'t any thumbnails to embed')
|
||||
return [], info
|
||||
|
||||
thumbnail_filename = info['thumbnails'][-1]['filename']
|
||||
|
||||
|
||||
@@ -28,10 +28,10 @@ def rsa_verify(message, signature, key):
|
||||
return expected == signature
|
||||
|
||||
|
||||
def update_self(to_screen, verbose, opener, prefer_insecure=False):
|
||||
def update_self(to_screen, verbose, opener):
|
||||
"""Update the program file with the latest version from the repository"""
|
||||
|
||||
UPDATE_URL = '//rg3.github.io/youtube-dl/update/'
|
||||
UPDATE_URL = 'https://rg3.github.io/youtube-dl/update/'
|
||||
VERSION_URL = UPDATE_URL + 'LATEST_VERSION'
|
||||
JSON_URL = UPDATE_URL + 'versions.json'
|
||||
UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
|
||||
@@ -40,13 +40,9 @@ def update_self(to_screen, verbose, opener, prefer_insecure=False):
|
||||
to_screen('It looks like you installed youtube-dl with a package manager, pip, setup.py or a tarball. Please use that to update.')
|
||||
return
|
||||
|
||||
def guess_scheme(url, insecure=False):
|
||||
return 'http%s:%s' % ('' if insecure is True else 's', url)
|
||||
|
||||
# Check if there is a new version
|
||||
try:
|
||||
newversion = opener.open(guess_scheme(
|
||||
VERSION_URL, prefer_insecure)).read().decode('utf-8').strip()
|
||||
newversion = opener.open(VERSION_URL).read().decode('utf-8').strip()
|
||||
except Exception:
|
||||
if verbose:
|
||||
to_screen(encode_compat_str(traceback.format_exc()))
|
||||
@@ -58,8 +54,7 @@ def update_self(to_screen, verbose, opener, prefer_insecure=False):
|
||||
|
||||
# Download and check versions info
|
||||
try:
|
||||
versions_info = opener.open(guess_scheme(
|
||||
JSON_URL, prefer_insecure)).read().decode('utf-8')
|
||||
versions_info = opener.open(JSON_URL).read().decode('utf-8')
|
||||
versions_info = json.loads(versions_info)
|
||||
except Exception:
|
||||
if verbose:
|
||||
|
||||
@@ -1211,6 +1211,11 @@ def unified_timestamp(date_str, day_first=True):
|
||||
if m:
|
||||
date_str = date_str[:-len(m.group('tz'))]
|
||||
|
||||
# Python only supports microseconds, so remove nanoseconds
|
||||
m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
|
||||
if m:
|
||||
date_str = m.group(1)
|
||||
|
||||
for expression in date_formats(day_first):
|
||||
try:
|
||||
dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
|
||||
@@ -1689,6 +1694,28 @@ def parse_count(s):
|
||||
return lookup_unit_table(_UNIT_TABLE, s)
|
||||
|
||||
|
||||
def parse_resolution(s):
|
||||
if s is None:
|
||||
return {}
|
||||
|
||||
mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
|
||||
if mobj:
|
||||
return {
|
||||
'width': int(mobj.group('w')),
|
||||
'height': int(mobj.group('h')),
|
||||
}
|
||||
|
||||
mobj = re.search(r'\b(\d+)[pPiI]\b', s)
|
||||
if mobj:
|
||||
return {'height': int(mobj.group(1))}
|
||||
|
||||
mobj = re.search(r'\b([48])[kK]\b', s)
|
||||
if mobj:
|
||||
return {'height': int(mobj.group(1)) * 540}
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
def month_by_name(name, lang='en'):
|
||||
""" Return the number of a month by (locale-independently) English name """
|
||||
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2018.02.22'
|
||||
__version__ = '2018.04.09'
|
||||
|
||||
Reference in New Issue
Block a user