给oi[OpenWebUI]也安排上了WebPilot 更新jina版本

import requests
import asyncio
import json
from typing import Callable, Any, Optional

EmitterType = Optional[Callable[[dict], Any]]
SendCitationType = Callable[[str, str, str], None]
SendStatusType = Callable[[str, bool], None]


def get_send_citation(__event_emitter__: EmitterType) -> SendCitationType:
    async def send_citation(url: str, title: str, content: str):
        if __event_emitter__ is None:
            return
        await __event_emitter__(
            {
                "type": "citation",
                "data": {
                    "document": [content],
                    "metadata": [{"source": url, "html": False}],
                    "source": {"name": title},
                },
            }
        )

    return send_citation


def get_send_status(__event_emitter__: EmitterType) -> SendStatusType:
    async def send_status(status_message: str, done: bool):
        if __event_emitter__ is None:
            return
        await __event_emitter__(
            {
                "type": "status",
                "data": {"description": status_message, "done": done},
            }
        )

    return send_status


class Tools:
    def __init__(self):
        pass

    async def web_scrape(self, url: str, __event_emitter__: EmitterType = None) -> str:
        """
        Scrape and process a web page using gpts.webpilot.ai API

        :param url: The URL of the web page to scrape.
        :param __event_emitter__: Optional event emitter for status updates.
        :return: The scraped and processed content, or an error message.
        """
        api_url = "https://gpts.webpilot.ai/api/read"

        headers = {"Content-Type": "application/json", "WebPilot-Friend-UID": "0"}

        payload = {
            "link": url,
            "ur": "summary of the page",
            "lp": True,
            "rt": False,
            "l": "en",
        }

        send_status = get_send_status(__event_emitter__)
        send_citation = get_send_citation(__event_emitter__)

        try:
            await send_status(f"正在读取 {url} 的内容", False)

            # 添加一个小延迟,确保初始状态能被显示
            await asyncio.sleep(0.1)

            response = requests.post(api_url, headers=headers, json=payload)
            response.raise_for_status()

            # Parse the JSON response
            result = response.json()

            # Remove the 'rules' field from the result
            if "rules" in result:
                del result["rules"]

            # Convert the modified result back to a JSON string
            content = json.dumps(result, ensure_ascii=False)

            # Send citation before marking status as complete
            await send_citation(url, "Web Scrape Result", content)

            # Mark status as complete after sending citation
            await send_status("网页内容已获取", True)

            return content

        except requests.RequestException as e:
            error_message = f"Error scraping web page: {str(e)}"
            await send_status(error_message, True)
            return error_message

不需要function calling也能调用,代码粘贴到oi工作区的tool


第二版本,支持搜索结果的显示和引文,支持多URL读取

import requests
import asyncio
import json
import aiohttp
from typing import Callable, Any, Optional, List

EmitterType = Optional[Callable[[dict], Any]]
SendCitationType = Callable[[str, str, str], None]
SendStatusType = Callable[[str, bool, str, List[str], Optional[str]], None]


def get_send_citation(__event_emitter__: EmitterType) -> SendCitationType:
    async def send_citation(title: str, url: str, content: str):
        if __event_emitter__:
            await __event_emitter__(
                {
                    "type": "citation",
                    "data": {
                        "document": [content],
                        "metadata": [{"name": title, "source": url, "html": False}],
                    },
                }
            )

    return send_citation


def get_send_status(__event_emitter__: EmitterType) -> SendStatusType:
    async def send_status(
        status_message: str,
        done: bool,
        action: str,
        urls: List[str],
        query: Optional[str] = None,
    ):
        if __event_emitter__:
            status_data = {
                "done": done,
                "action": action,
                "description": status_message,
            }
            if query:
                status_data["query"] = query
            if urls:
                status_data["urls"] = urls
            await __event_emitter__({"type": "status", "data": status_data})

    return send_status


class Tools:
    def __init__(self):
        pass

    async def web_scrape(
        self, urls: List[str], __event_emitter__: EmitterType = None
    ) -> str:
        """
        Scrape and process multiple web pages using gpts.webpilot.ai API

        :param urls: List of URLs of the web pages to scrape.
        :param __event_emitter__: Optional event emitter for status updates.
        :return: Combined scraped and processed contents, or error messages.
        """
        api_url = "https://gpts.webpilot.ai/api/read"
        headers = {"Content-Type": "application/json", "WebPilot-Friend-UID": "0"}

        send_status = get_send_status(__event_emitter__)
        send_citation = get_send_citation(__event_emitter__)

        combined_results = []

        await send_status(f"准备读取 {len(urls)} 个网页", False, "web_scrape", urls)

        async def process_url(url):
            try:
                payload = {
                    "link": url,
                    "ur": "summary of the page",
                    "lp": True,
                    "rt": False,
                    "l": "en",
                }

                async with aiohttp.ClientSession() as session:
                    async with session.post(
                        api_url, headers=headers, json=payload
                    ) as response:
                        response.raise_for_status()
                        result = await response.json()

                result.pop("rules", None)
                content = json.dumps(result, ensure_ascii=False)
                title = result.get("title", f"来自 {url} 的结果")

                await send_citation(title, url, content)
                return f"URL: {url}\n标题: {title}\n内容: {content}\n"

            except aiohttp.ClientError as e:
                error_message = f"读取网页 {url} 时出错: {str(e)}"
                await send_status(error_message, False, "web_scrape", urls)
                return f"URL: {url}\n错误: {error_message}\n"

        tasks = [process_url(url) for url in urls]
        results = await asyncio.gather(*tasks)
        combined_results.extend(results)

        await send_status(f"已完成 {len(urls)} 个网页的读取", True, "web_search", urls)

        # 将所有结果合并为一个字符串
        final_result = "\n".join(combined_results)
        return final_result

社区直接导入最新版
https://openwebui.com/t/2171/webpilot
jina版
https://openwebui.com/t/2171/jina

59 个赞

太强了,三藏!

4 个赞

:bili_040: :bili_040: :bili_040:

这插件还能不同问题分别搜索。

6 个赞

帅的一批 :smirk:

3 个赞

佬你这是怎么设置的 为什么我按你说的设置好以后回复是这样的

1 个赞

左下角开关打开了吗

1 个赞

打开了的

3 个赞

等下午我更新一个版本,体验会更好

5 个赞

我设置这个好像是没有生效 很奇怪

4 个赞

不错,但是这个和Open WebUI自带的#加网址 这种直接向量化有什么区别呢?

3 个赞

这就是个插件而已,调用工具api读取网页内容,附带到上下文里让ai能回答你问题

2 个赞

已更新,可以试试新版

1 个赞

不知道问题出在哪

1 个赞

fc 需要模型本身支持,glm-4-plus、DeepSeek 这些都支持,一些主打快速的模型就不支持。

1 个赞

oi是不需要fc的啊

3 个赞

太强了,三藏!

3 个赞

你换 DeepSeek 试试,我用 Cloudflare Worker 反代的 Azure 就不支持 fc,但直连又支持?

2 个赞
#https://linux.do/t/topic/232918

你不信可以把上面的的这个发给llm,然后给出相应指令,也可以实现这样的功能

1 个赞

:bili_040: 哦?原来如此,我上古鸡 Ram 太低没用过 oi

很厉害的样子啊。