看到有佬友在问bilibli字幕的相关话题,用Python糊了一个,能跑就行。
这里只对字幕做简单输出打印了
Python
import time
import requests
import json
import re
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36 Edg/127.0.0.0",
"cookie":"SESSDATA=填入自己cookie中的SESSDATA"
}
def get_aid_cid(url): # 获取视频的aid和cid
response = requests.get(url, headers=headers)
html_content = response.text
res_data = re.search(r"window.__INITIAL_STATE__=(.*?);\(function", html_content).group(1)
cid = json.loads(res_data)["videoData"]["cid"]
aid = json.loads(res_data)["videoData"]["aid"]
# print(f"aid:{aid},cid:{cid}")
return aid, cid
def get_subtitle_url(aid, cid): # 获取字幕网页地址
url = f"https://api.bilibili.com/x/player/wbi/v2?aid={aid}&cid={cid}"
resp = requests.get(url, headers=headers)
subtitle_url = resp.json()['data']['subtitle']['subtitles'][0]['subtitle_url']
# print(subtitle_url)
return "https:" + subtitle_url
def hanle_subtitle(subtitle_url): # 处理字幕文字
response = requests.get(subtitle_url, headers=headers)
subtitle_ls = response.json()['body']
for i in subtitle_ls[:10]: # 这里举例遍历前10条字幕文字
time_from = i['from']
time_to = i['to']
print(f"{time_from} -> {time_to}\n{i['content']}")
if __name__ == "__main__":
url = "https://www.bilibili.com/video/BV1e1YyeoEt6/" # B站视频地址
aid,cid = get_aid_cid(url)
time.sleep(1)
subtitle_url = get_subtitle_url(aid,cid)
time.sleep(1)
hanle_subtitle(subtitle_url)
输出: