from pydantic import BaseModel, Field
from typing import Optional, Callable, Any, Awaitable
import tiktoken
import os
import time
from open_webui.utils.misc import get_last_assistant_message
def num_tokens_from_string(user_message: str, model_name: str) -> int:
encoding = tiktoken.encoding_for_model(model_name)
print(encoding)
num_tokens = len(encoding.encode(user_message))
return num_tokens
class Filter:
class Valves(BaseModel):
priority: int = Field(default=5, description="过滤器操作的优先级")
elapsed_time: bool = Field(
default=True,
description="启用以获取高级统计信息",
)
tokens_no: bool = Field(
default=True,
description="显示总Tokens数(实验性,不准确)",
)
tokens_per_sec: bool = Field(
default=True,
description="显示每秒Tokens数(实验性,不准确)",
)
pass
def __init__(self):
self.valves = self.Valves()
self.start_time = None
pass
def inlet(
self,
body: dict,
):
self.start_time = time.time()
return body
async def outlet(
self,
body: dict,
__event_emitter__: Callable[[Any], Awaitable[None]],
__model__: Optional[dict] = None,
) -> dict:
end_time = time.time()
elapsed_time = end_time - self.start_time
elapsed_time_str = f"Elapsed time: {elapsed_time:.2f} seconds"
response_message = get_last_assistant_message(body["messages"])
# model = __model__["id"]
model = "gpt-4o"
tokens = num_tokens_from_string(response_message, model)
tokens_per_sec = tokens / elapsed_time
stats_array = []
if self.valves.tokens_per_sec:
stats_array.append(f"🚀{tokens_per_sec:.0f}Tokens/S")
if self.valves.tokens_no:
stats_array.append(f"📝{tokens}Tokens")
if self.valves.elapsed_time:
stats_array.append(f"⏱️{elapsed_time:.0f}S")
stats = " | ".join(stat for stat in stats_array)
await __event_emitter__(
{
"type": "status",
"data": {
"description": stats,
"done": True,
},
}
)
return body
没有改太多,只是自己觉得更好看一点