始皇的新玩具4o+TTS+Whisper糊了个玩具

始皇的玩具始皇的新玩具TTS+Whisper糊了个玩具

代码

import PySimpleGUI as sg
import requests
import json
import tempfile
import os
import sounddevice as sd
import numpy as np
import wave
import pygame
import threading

class AudioRecorder:
    def __init__(self, sample_rate=44100, channels=1):
        self.sample_rate = sample_rate
        self.channels = channels
        self.recording = False
        self.frames = []

    def callback(self, indata, frames, time_info, status):
        if self.recording:
            self.frames.append(indata.copy())

    def start(self):
        self.recording = True
        self.frames = []
        self.stream = sd.InputStream(callback=self.callback, channels=self.channels, samplerate=self.sample_rate)
        self.stream.start()

    def stop(self):
        self.recording = False
        self.stream.stop()
        self.stream.close()
        return np.concatenate(self.frames, axis=0)

def text_to_speech(text, voice, auth_token):
    url = "https://api.oaifree.com/v1/audio/speech"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {auth_token}"
    }
    data = {
        "model": "tts-1-hd",
        "input": text,
        "voice": voice,
        "response_format": "mp3",
        "speed": 1
    }
    
    try:
        response = requests.post(url, headers=headers, data=json.dumps(data))
        response.raise_for_status()
        
        with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
            temp_file.write(response.content)
            temp_file_path = temp_file.name
        
        pygame.mixer.init()
        pygame.mixer.music.load(temp_file_path)
        pygame.mixer.music.play()
        while pygame.mixer.music.get_busy():
            pygame.time.Clock().tick(10)
        pygame.mixer.quit()
        
        os.unlink(temp_file_path)
    except Exception as e:
        print(f"语音转换错误: {e}")

def speech_to_text(audio_file_path, auth_token):
    url = "https://api.oaifree.com/v1/audio/transcriptions"
    headers = {
        "Authorization": f"Bearer {auth_token}"
    }
    files = {
        "file": open(audio_file_path, "rb"),
        "model": (None, "whisper-1")
    }
    
    try:
        response = requests.post(url, headers=headers, files=files)
        response.raise_for_status()
        return response.json()["text"]
    except requests.exceptions.RequestException as e:
        print(f"语音转文字错误: {e}")
        return None

def chat_with_ai(messages, auth_token):
    url = "https://api.oaifree.com/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {auth_token}",
        "Content-Type": "application/json"
    }
    data = {
        "messages": messages,
        "stream": False,
        "model": "gpt-4o",
        "temperature": 0.5,
        "max_tokens": 8000,
        "top_p": 1
    }
    
    try:
        response = requests.post(url, headers=headers, data=json.dumps(data))
        response.raise_for_status()
        response_data = response.json()
        
        if 'choices' in response_data and len(response_data['choices']) > 0:
            if 'message' in response_data['choices'][0] and 'content' in response_data['choices'][0]['message']:
                return response_data['choices'][0]['message']['content']
            else:
                print("意外的响应结构:")
                print(json.dumps(response_data, indent=2))
                return None
        else:
            print("未找到有效的响应内容:")
            print(json.dumps(response_data, indent=2))
            return None
    except requests.exceptions.RequestException as e:
        print(f"AI 对话错误: {e}")
        return None

def main():
    recorder = AudioRecorder()
    messages = []  # 用于存储对话历史
    
    voices = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
    
    layout = [
        [sg.Text("语音助手", justification='center', font=('Any', 20))],
        [sg.Text("AUTH_TOKEN:"), sg.Input(key="-AUTH-", password_char='*')],
        [sg.Text("状态: 准备就绪", key="-STATUS-", justification='center')],
        [sg.Text("选择语音模型:"), sg.Combo(voices, default_value="echo", key="-VOICE-")],
        [sg.Button("开始录音", key="-RECORD-"), sg.Button("停止录音", key="-STOP-", disabled=True)],
        [sg.Text("您的问题:", justification='left')],
        [sg.Multiline(size=(50, 5), key="-USER-INPUT-", disabled=True)],
        [sg.Text("AI 回答:", justification='left')],
        [sg.Multiline(size=(50, 10), key="-AI-RESPONSE-", disabled=True)],
        [sg.Button("清除对话历史", key="-CLEAR-")]
    ]

    window = sg.Window("语音助手", layout, finalize=True, element_justification='center')

    recording = False

    while True:
        event, values = window.read(timeout=100)
        
        if event == sg.WINDOW_CLOSED:
            break
        
        if event == "-RECORD-" and not recording:
            recording = True
            window["-STATUS-"].update("正在录音...")
            window["-RECORD-"].update(disabled=True)
            window["-STOP-"].update(disabled=False)
            recorder.start()
        
        elif event == "-STOP-" and recording:
            recording = False
            window["-STATUS-"].update("正在处理...")
            window["-RECORD-"].update(disabled=True)
            window["-STOP-"].update(disabled=True)
            
            audio_data = recorder.stop()
            
            with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_file:
                temp_file_path = temp_file.name
                with wave.open(temp_file_path, 'wb') as wf:
                    wf.setnchannels(recorder.channels)
                    wf.setsampwidth(2)
                    wf.setframerate(recorder.sample_rate)
                    wf.writeframes((audio_data * 32767).astype(np.int16).tobytes())
            
            auth_token = values["-AUTH-"]
            user_input = speech_to_text(temp_file_path, auth_token)
            os.unlink(temp_file_path)

            if user_input:
                window["-USER-INPUT-"].update(user_input)
                messages.append({"role": "user", "content": user_input})
                window["-STATUS-"].update("AI 正在思考...")
                ai_response = chat_with_ai(messages, auth_token)
                if ai_response:
                    messages.append({"role": "assistant", "content": ai_response})
                    window["-AI-RESPONSE-"].update(ai_response)
                    window["-STATUS-"].update("AI 正在说话...")
                    selected_voice = values["-VOICE-"]
                    threading.Thread(target=text_to_speech, args=(ai_response, selected_voice, auth_token)).start()
            else:
                window["-STATUS-"].update("未能理解。请重试。")
            
            window["-RECORD-"].update(disabled=False)
            window["-STOP-"].update(disabled=True)
        
        elif event == "-CLEAR-":
            messages.clear()
            window["-USER-INPUT-"].update("")
            window["-AI-RESPONSE-"].update("")
            window["-STATUS-"].update("对话历史已清除")
        
        if not pygame.mixer.get_busy():
            window["-STATUS-"].update("准备就绪")

    window.close()

if __name__ == "__main__":
    main()

可以直接使用已打包的exe
[123云盘下载地址](https://www.123pan.com/s/pTVtjv-DyQWd.html)旧版已失效

填入Acces token就可以愉快的玩耍了
tts

当然,需要plus账户哈

各位佬友好像都觉得这个UI真的似乎不是很友好(确实不好 :joy:)

加个夜班赶一下交互感

附上下载链接和代码链接
代码包:https://www.123pan.com/s/pTVtjv-ZyQWd.html
成品包:https://www.123pan.com/s/pTVtjv-cyQWd.html

19 个赞

这个UI有些年头了啊

2 个赞

不错,支持了

感谢分享!

哈哈哈,这个py的库,没用前端
随便糊个等其他大佬解决美化问题

我也是蹭的,嘿嘿,可惜了普号不能用

才知道开了,尝尝鲜

PySimpleGUI 佬这个库有点老呀,还要注册才可以用

1 个赞

我也是试用30天,哈哈,这个UI等大佬改 :sweat_smile:

交给gpt给pyqt5写一下就行了

优秀的佬友

哈哈哈,公然挑衅oai :rofl:

感谢

666

改了流式处理文本响应
增加了UI简单优化

使用:
开始对话之后会录音
每一轮人的录音结束后需手动点击“我说完了”会自动发送等待AI回复
对话结束

没有plus :eyes:

:rofl:啊这,看看有无大善人

太强了!

不错

就玩过一次

1 个赞