mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-03 19:57:06 -05:00 
			
		
		
		
	[xiami] Improve extraction (Closes #9079)
* Switch to JSON source * Add abstract IE for playlists * Extract more track related metadata
This commit is contained in:
		@@ -942,7 +942,7 @@ from .xhamster import (
 | 
			
		||||
    XHamsterEmbedIE,
 | 
			
		||||
)
 | 
			
		||||
from .xiami import (
 | 
			
		||||
    XiamiIE,
 | 
			
		||||
    XiamiSongIE,
 | 
			
		||||
    XiamiAlbumIE,
 | 
			
		||||
    XiamiArtistIE,
 | 
			
		||||
    XiamiCollectionIE
 | 
			
		||||
 
 | 
			
		||||
@@ -1,50 +1,42 @@
 | 
			
		||||
# -*- coding: utf-8 -*-
 | 
			
		||||
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    xpath_element,
 | 
			
		||||
    xpath_text,
 | 
			
		||||
    xpath_with_ns,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    ExtractorError
 | 
			
		||||
)
 | 
			
		||||
from ..compat import compat_urllib_parse_unquote
 | 
			
		||||
from ..utils import int_or_none
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class XiamiBaseIE(InfoExtractor):
 | 
			
		||||
    _API_BASE_URL = 'http://www.xiami.com/song/playlist/cat/json/id'
 | 
			
		||||
 | 
			
		||||
    _XML_BASE_URL = 'http://www.xiami.com/song/playlist/id'
 | 
			
		||||
    _NS_MAP = {'xm': 'http://xspf.org/ns/0/'}
 | 
			
		||||
    def _extract_track(self, track, track_id=None):
 | 
			
		||||
        title = track['title']
 | 
			
		||||
        track_url = self._decrypt(track['location'])
 | 
			
		||||
 | 
			
		||||
    def _extract_track(self, track):
 | 
			
		||||
        artist = xpath_text(track, xpath_with_ns('xm:artist', self._NS_MAP), default='')
 | 
			
		||||
        artist = artist.split(';')
 | 
			
		||||
        subtitles = {}
 | 
			
		||||
        lyrics_url = track.get('lyric_url') or track.get('lyric')
 | 
			
		||||
        if lyrics_url and lyrics_url.startswith('http'):
 | 
			
		||||
            subtitles['origin'] = [{'url': lyrics_url}]
 | 
			
		||||
 | 
			
		||||
        ret = {
 | 
			
		||||
            'id': xpath_text(track, xpath_with_ns('xm:song_id', self._NS_MAP)),
 | 
			
		||||
            'title': xpath_text(track, xpath_with_ns('xm:title', self._NS_MAP)),
 | 
			
		||||
            'album': xpath_text(track, xpath_with_ns('xm:album_name', self._NS_MAP)),
 | 
			
		||||
            'artist': ';'.join(artist) if artist else None,
 | 
			
		||||
            'creator': artist[0] if artist else None,
 | 
			
		||||
            'url': self._decrypt(xpath_text(track, xpath_with_ns('xm:location', self._NS_MAP))),
 | 
			
		||||
            'thumbnail': xpath_text(track, xpath_with_ns('xm:pic', self._NS_MAP), default=None),
 | 
			
		||||
            'duration': int_or_none(xpath_text(track, xpath_with_ns('xm:length', self._NS_MAP))),
 | 
			
		||||
        return {
 | 
			
		||||
            'id': track.get('song_id') or track_id,
 | 
			
		||||
            'url': track_url,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'thumbnail': track.get('pic') or track.get('album_pic'),
 | 
			
		||||
            'duration': int_or_none(track.get('length')),
 | 
			
		||||
            'creator': track.get('artist', '').split(';')[0],
 | 
			
		||||
            'track': title,
 | 
			
		||||
            'album': track.get('album_name'),
 | 
			
		||||
            'artist': track.get('artist'),
 | 
			
		||||
            'subtitles': subtitles,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        lyrics_url = xpath_text(track, xpath_with_ns('xm:lyric', self._NS_MAP))
 | 
			
		||||
        if lyrics_url and lyrics_url.endswith('.lrc'):
 | 
			
		||||
            ret['description'] = self._download_webpage(lyrics_url, ret['id'])
 | 
			
		||||
        return ret
 | 
			
		||||
 | 
			
		||||
    def _extract_xml(self, _id, typ=''):
 | 
			
		||||
        playlist = self._download_xml('%s/%s%s' % (self._XML_BASE_URL, _id, typ), _id)
 | 
			
		||||
        tracklist = xpath_element(playlist, xpath_with_ns('./xm:trackList', self._NS_MAP))
 | 
			
		||||
 | 
			
		||||
        if not len(tracklist):
 | 
			
		||||
            raise ExtractorError('No track found')
 | 
			
		||||
        return [self._extract_track(track) for track in tracklist]
 | 
			
		||||
    def _extract_tracks(self, item_id, typ=None):
 | 
			
		||||
        playlist = self._download_json(
 | 
			
		||||
            '%s/%s%s' % (self._API_BASE_URL, item_id, '/type/%s' % typ if typ else ''), item_id)
 | 
			
		||||
        return [
 | 
			
		||||
            self._extract_track(track, item_id)
 | 
			
		||||
            for track in playlist['data']['trackList']]
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _decrypt(origin):
 | 
			
		||||
@@ -62,75 +54,87 @@ class XiamiBaseIE(InfoExtractor):
 | 
			
		||||
        ans = ''
 | 
			
		||||
        for i in range(0, short_lenth + 1):
 | 
			
		||||
            for j in range(0, n):
 | 
			
		||||
                if len(l[j])>i:
 | 
			
		||||
                if len(l[j]) > i:
 | 
			
		||||
                    ans += l[j][i]
 | 
			
		||||
        return compat_urllib_parse_unquote(ans).replace('^', '0')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class XiamiIE(XiamiBaseIE):
 | 
			
		||||
class XiamiSongIE(XiamiBaseIE):
 | 
			
		||||
    IE_NAME = 'xiami:song'
 | 
			
		||||
    IE_DESC = '虾米音乐'
 | 
			
		||||
    _VALID_URL = r'http://www\.xiami\.com/song/(?P<id>[0-9]+)'
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.xiami.com/song/1775610518',
 | 
			
		||||
            'md5': '521dd6bea40fd5c9c69f913c232cb57e',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '1775610518',
 | 
			
		||||
                'ext': 'mp3',
 | 
			
		||||
                'title': 'Woman',
 | 
			
		||||
                'creator': 'HONNE',
 | 
			
		||||
                'album': 'Woman',
 | 
			
		||||
                'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
 | 
			
		||||
                'description': 'md5:052ec7de41ca19f67e7fd70a1bfc4e0b',
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.xiami.com/song/1775256504',
 | 
			
		||||
            'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '1775256504',
 | 
			
		||||
                'ext': 'mp3',
 | 
			
		||||
                'title': '悟空',
 | 
			
		||||
                'creator': '戴荃',
 | 
			
		||||
                'album': '悟空',
 | 
			
		||||
                'description': 'md5:206e67e84f9bed1d473d04196a00b990',
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P<id>[0-9]+)'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.xiami.com/song/1775610518',
 | 
			
		||||
        'md5': '521dd6bea40fd5c9c69f913c232cb57e',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '1775610518',
 | 
			
		||||
            'ext': 'mp3',
 | 
			
		||||
            'title': 'Woman',
 | 
			
		||||
            'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
 | 
			
		||||
            'duration': 265,
 | 
			
		||||
            'creator': 'HONNE',
 | 
			
		||||
            'track': 'Woman',
 | 
			
		||||
            'album': 'Woman',
 | 
			
		||||
            'artist': 'HONNE',
 | 
			
		||||
            'subtitles': {
 | 
			
		||||
                'origin': [{
 | 
			
		||||
                    'ext': 'lrc',
 | 
			
		||||
                }],
 | 
			
		||||
            },
 | 
			
		||||
        }
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.xiami.com/song/1775256504',
 | 
			
		||||
        'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '1775256504',
 | 
			
		||||
            'ext': 'mp3',
 | 
			
		||||
            'title': '悟空',
 | 
			
		||||
            'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
 | 
			
		||||
            'duration': 200,
 | 
			
		||||
            'creator': '戴荃',
 | 
			
		||||
            'track': '悟空',
 | 
			
		||||
            'album': '悟空',
 | 
			
		||||
            'artist': '戴荃',
 | 
			
		||||
            'subtitles': {
 | 
			
		||||
                'origin': [{
 | 
			
		||||
                    'ext': 'lrc',
 | 
			
		||||
                }],
 | 
			
		||||
            },
 | 
			
		||||
        }
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        _id = self._match_id(url)
 | 
			
		||||
        return self._extract_xml(_id)[0]
 | 
			
		||||
        return self._extract_tracks(self._match_id(url))[0]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class XiamiAlbumIE(XiamiBaseIE):
 | 
			
		||||
class XiamiPlaylistBaseIE(XiamiBaseIE):
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        item_id = self._match_id(url)
 | 
			
		||||
        return self.playlist_result(self._extract_tracks(item_id, self._TYPE), item_id)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class XiamiAlbumIE(XiamiPlaylistBaseIE):
 | 
			
		||||
    IE_NAME = 'xiami:album'
 | 
			
		||||
    IE_DESC = '虾米音乐 - 专辑'
 | 
			
		||||
    _VALID_URL = r'http://www\.xiami\.com/album/(?P<id>[0-9]+)'
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.xiami.com/album/2100300444',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '2100300444',
 | 
			
		||||
            },
 | 
			
		||||
            'playlist_count': 10,
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P<id>[0-9]+)'
 | 
			
		||||
    _TYPE = '1'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.xiami.com/album/2100300444',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '2100300444',
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9',
 | 
			
		||||
            'only_matching': True,
 | 
			
		||||
        }
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        _id = self._match_id(url)
 | 
			
		||||
        return self.playlist_result(self._extract_xml(_id, '/type/1'), _id)
 | 
			
		||||
        'playlist_count': 10,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class XiamiArtistIE(XiamiBaseIE):
 | 
			
		||||
class XiamiArtistIE(XiamiPlaylistBaseIE):
 | 
			
		||||
    IE_NAME = 'xiami:artist'
 | 
			
		||||
    IE_DESC = '虾米音乐 - 歌手'
 | 
			
		||||
    _VALID_URL = r'http://www\.xiami\.com/artist/(?P<id>[0-9]+)'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P<id>[0-9]+)'
 | 
			
		||||
    _TYPE = '2'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
@@ -139,23 +143,16 @@ class XiamiArtistIE(XiamiBaseIE):
 | 
			
		||||
        'playlist_count': 20,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        _id = self._match_id(url)
 | 
			
		||||
        return self.playlist_result(self._extract_xml(_id, '/type/2'), _id)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class XiamiCollectionIE(XiamiBaseIE):
 | 
			
		||||
class XiamiCollectionIE(XiamiPlaylistBaseIE):
 | 
			
		||||
    IE_NAME = 'xiami:collection'
 | 
			
		||||
    IE_DESC = '虾米音乐 - 精选集'
 | 
			
		||||
    _VALID_URL = r'http://www\.xiami\.com/collect/(?P<id>[0-9]+)'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P<id>[0-9]+)'
 | 
			
		||||
    _TYPE = '3'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '156527391',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_count': 26,
 | 
			
		||||
        'playlist_mincount': 29,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        _id = self._match_id(url)
 | 
			
		||||
        return self.playlist_result(self._extract_xml(_id, '/type/3'), _id)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user