mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-04 08:17:08 -05:00 
			
		
		
		
	[youtube] Improve chapters extraction (closes #13247)
This commit is contained in:
		@@ -254,6 +254,13 @@ class TestYoutubeChapters(unittest.TestCase):
 | 
				
			|||||||
                'title': '3 - Из серпов луны...[Iz serpov luny]',
 | 
					                'title': '3 - Из серпов луны...[Iz serpov luny]',
 | 
				
			||||||
            }]
 | 
					            }]
 | 
				
			||||||
        ),
 | 
					        ),
 | 
				
			||||||
 | 
					        (
 | 
				
			||||||
 | 
					            # https://www.youtube.com/watch?v=xZW70zEasOk
 | 
				
			||||||
 | 
					            # time point more than duration
 | 
				
			||||||
 | 
					            '''● LCS Spring finals: Saturday and Sunday from <a href="#" onclick="yt.www.watch.player.seekTo(13*60+30);return false;">13:30</a> outside the venue! <br />● PAX East: Fri, Sat & Sun - more info in tomorrows video on the main channel!''',
 | 
				
			||||||
 | 
					            283,
 | 
				
			||||||
 | 
					            []
 | 
				
			||||||
 | 
					        ),
 | 
				
			||||||
    ]
 | 
					    ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_youtube_chapters(self):
 | 
					    def test_youtube_chapters(self):
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1353,10 +1353,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 | 
				
			|||||||
            start_time = parse_duration(time_point)
 | 
					            start_time = parse_duration(time_point)
 | 
				
			||||||
            if start_time is None:
 | 
					            if start_time is None:
 | 
				
			||||||
                continue
 | 
					                continue
 | 
				
			||||||
 | 
					            if start_time > duration:
 | 
				
			||||||
 | 
					                break
 | 
				
			||||||
            end_time = (duration if next_num == len(chapter_lines)
 | 
					            end_time = (duration if next_num == len(chapter_lines)
 | 
				
			||||||
                        else parse_duration(chapter_lines[next_num][1]))
 | 
					                        else parse_duration(chapter_lines[next_num][1]))
 | 
				
			||||||
            if end_time is None:
 | 
					            if end_time is None:
 | 
				
			||||||
                continue
 | 
					                continue
 | 
				
			||||||
 | 
					            if end_time > duration:
 | 
				
			||||||
 | 
					                end_time = duration
 | 
				
			||||||
 | 
					            if start_time > end_time:
 | 
				
			||||||
 | 
					                break
 | 
				
			||||||
            chapter_title = re.sub(
 | 
					            chapter_title = re.sub(
 | 
				
			||||||
                r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
 | 
					                r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
 | 
				
			||||||
            chapter_title = re.sub(r'\s+', ' ', chapter_title)
 | 
					            chapter_title = re.sub(r'\s+', ' ', chapter_title)
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user