mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-03 22:57:08 -05:00 
			
		
		
		
	[generic] Detect bandcamp pages that use custom domains (closes #1662)
They embed the original url in the 'og:url' property.
This commit is contained in:
		@@ -41,7 +41,17 @@ class GenericIE(InfoExtractor):
 | 
				
			|||||||
                u"uploader_id": u"skillsmatter",
 | 
					                u"uploader_id": u"skillsmatter",
 | 
				
			||||||
                u"uploader": u"Skills Matter",
 | 
					                u"uploader": u"Skills Matter",
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        },
 | 
				
			||||||
 | 
					        # bandcamp page with custom domain
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            u'url': u'http://bronyrock.com/track/the-pony-mash',
 | 
				
			||||||
 | 
					            u'file': u'3235767654.mp3',
 | 
				
			||||||
 | 
					            u'info_dict': {
 | 
				
			||||||
 | 
					                u'title': u'The Pony Mash',
 | 
				
			||||||
 | 
					                u'uploader': u'M_Pallante',
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            u'skip': u'There is a limit of 200 free downloads / month for the test song',
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
    ]
 | 
					    ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def report_download_webpage(self, video_id):
 | 
					    def report_download_webpage(self, video_id):
 | 
				
			||||||
@@ -155,6 +165,12 @@ class GenericIE(InfoExtractor):
 | 
				
			|||||||
            surl = unescapeHTML(mobj.group(1))
 | 
					            surl = unescapeHTML(mobj.group(1))
 | 
				
			||||||
            return self.url_result(surl, 'Youtube')
 | 
					            return self.url_result(surl, 'Youtube')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Look for Bandcamp pages with custom domain
 | 
				
			||||||
 | 
					        mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
 | 
				
			||||||
 | 
					        if mobj is not None:
 | 
				
			||||||
 | 
					            burl = unescapeHTML(mobj.group(1))
 | 
				
			||||||
 | 
					            return self.url_result(burl, 'Bandcamp')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Start with something easy: JW Player in SWFObject
 | 
					        # Start with something easy: JW Player in SWFObject
 | 
				
			||||||
        mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
 | 
					        mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
 | 
				
			||||||
        if mobj is None:
 | 
					        if mobj is None:
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user