#一个佬帮写的将油管视频字幕保存到notion,我就想怎么改用能实现自动读取BLIBLI字幕保存到notion
import tkinter as tk
from tkinter import messagebox
from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound, TranscriptsDisabled
import requests
import json
def get_youtube_subtitles(video_url):
try:
video_id = video_url.split(“v=”)[1]
transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
subtitles = " “.join([entry[‘text’] for entry in transcript_list])
with open(‘sub.txt’, ‘w’, encoding=‘utf-8’) as file:
file.write(subtitles)
return subtitles, “Subtitles retrieved successfully”
except (IndexError, KeyError):
return None, “Invalid YouTube URL.”
except NoTranscriptFound:
return None, “No subtitles found for this video.”
except TranscriptsDisabled:
return None, “Subtitles are disabled for this video.”
except Exception as e:
return None, f"Error retrieving subtitles: {str(e)}”
def clean_subtitles(srt_captions):
lines = srt_captions.splitlines()
cleaned_text =
for line in lines:
if not line.isdigit() and ‘–>’ not in line:
cleaned_text.append(line)
return " ".join(cleaned_text)
def save_to_notion(page_id, text, notion_token):
url = f"https://api.notion.com/v1/blocks/{page_id}/children"
headers = {
“Authorization”: f"Bearer {notion_token}",
“Content-Type”: “application/json”,
“Notion-Version”: “2022-06-28”
}
# Split text into chunks of 2000 characters or less
chunks = [text[i:i+2000] for i in range(0, len(text), 2000)]
children = []
for chunk in chunks:
children.append({
"object": "block",
"type": "paragraph",
"paragraph": {
"rich_text": [{"type": "text", "text": {"content": chunk}}]
}
})
data = {"children": children}
response = requests.patch(url, headers=headers, data=json.dumps(data))
if response.status_code == 200:
return response, "Text saved to Notion successfully."
else:
return response, f"Failed to save text to Notion: {response.status_code} - {response.text}"
def extract_and_save():
video_url = url_entry.get()
page_id = page_id_entry.get()
notion_token = token_entry.get()
if not video_url or not page_id or not notion_token:
messagebox.showerror("Error", "Please fill in all fields.")
return
srt_captions, message = get_youtube_subtitles(video_url)
if srt_captions:
cleaned_text = clean_subtitles(srt_captions)
response, error_message = save_to_notion(page_id, cleaned_text, notion_token)
result_label.config(text=error_message)
else:
result_label.config(text=message)
创建主窗口
root = tk.Tk()
root.title(“YouTube Subtitle Extractor”)
URL输入
tk.Label(root, text=“YouTube URL:”).grid(row=0, column=0, padx=10, pady=5, sticky=tk.E)
url_entry = tk.Entry(root, width=50)
url_entry.grid(row=0, column=1, padx=10, pady=5)
Notion页面ID输入
tk.Label(root, text=“Notion Page ID:”).grid(row=1, column=0, padx=10, pady=5, sticky=tk.E)
page_id_entry = tk.Entry(root, width=50)
page_id_entry.grid(row=1, column=1, padx=10, pady=5)
Notion集成令牌输入
tk.Label(root, text=“Notion Token:”).grid(row=2, column=0, padx=10, pady=5, sticky=tk.E)
token_entry = tk.Entry(root, width=50, show=“*”)
token_entry.grid(row=2, column=1, padx=10, pady=5)
提取和保存按钮
extract_save_button = tk.Button(root, text=“Extract & Save”, command=extract_and_save)
extract_save_button.grid(row=3, column=0, columnspan=2, pady=10)
结果标签
result_label = tk.Label(root, text=“”, wraplength=400)
result_label.grid(row=4, column=0, columnspan=2, pady=5)
启动主循环
root.mainloop()