接力佬们的,流式返回(抄大佬们的)
我直接部署在服务器上的,全局代理,就把代理去掉了
from fastapi import FastAPI, Request, Response, HTTPException
from starlette.responses import StreamingResponse
import requests
import json
app = FastAPI()
# FastAPI uses Pydantic to parse the request body into python object
@app.post("/v1/chat/completions")
@app.options("/v1/chat/completions") # This maps to the appropriate URL
async def main(request: Request):
if request.method != 'POST':
return Response(None, status_code=204, headers={
'Access-Control-Allow-Origin': '*',
"Access-Control-Allow-Headers": '*',
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive'
})
headers = dict(request.headers)
# Add or modify headers
headers['Content-Type'] = 'application/json'
url = 'https://multillm.ai-pro.org/api/openai-completion' # target API address
headers ={ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36", "Content-Type": "application/json"}
jsonData = await request.json()
jsonData["stream"] = True
response = requests.post(url, headers=headers, json=jsonData) # Send the request
if response.status_code != 200:
return Response(content='Unable to reach the backend API', status_code=502)
response_data = response.json()
return StreamingResponse(
event_stream(response_data),
headers={
'Access-Control-Allow-Origin': '*',
"Access-Control-Allow-Headers": '*',
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive'
}
)
def event_stream(data):
# Simplified eventStream function that does not split the content into chunks
output = json.dumps({
"id": data['id'],
"object": 'chat.completion.chunk',
"created": data['created'],
"model": data['model'],
"system_fingerprint": data['system_fingerprint'],
"choices": [{
"index": 0,
"delta": {"role": 'assistant', "content": data['choices'][0]['message']['content']},
"logprobs": None,
"finish_reason": data['choices'][0]['finish_reason']
}]
})
yield f'data: {output}\n\n' # The StreamingResponse expects an iterable
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=3699)
next-chat使用,比cf稍微快一点吧