1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-10-04 19:44:33 -04:00

[utils] is_html: Handle double BOM

Closes #2885
This commit is contained in:
pukkandan
2022-05-18 06:42:43 +05:30
parent aedaa455d9
commit 80e8493ee7
2 changed files with 21 additions and 7 deletions

View File

@@ -3290,14 +3290,13 @@ def is_html(first_bytes):
(b'\xff\xfe', 'utf-16-le'),
(b'\xfe\xff', 'utf-16-be'),
]
for bom, enc in BOMS:
if first_bytes.startswith(bom):
s = first_bytes[len(bom):].decode(enc, 'replace')
break
else:
s = first_bytes.decode('utf-8', 'replace')
return re.match(r'^\s*<', s)
encoding = 'utf-8'
for bom, enc in BOMS:
while first_bytes.startswith(bom):
encoding, first_bytes = enc, first_bytes[len(bom):]
return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
def determine_protocol(info_dict):