让所有模型支持Copilot代码补全和聊天和内联聊天

这里用的是豆包 Doubao-pro-32k 速度挺快的

内联聊天补全

代码补全

左侧聊天

python 代码

玩的开心
Agently这个库支持一大堆模型自己看文档接入 ollama 本地模型之流也可以接入的
此处无利益相关
阿里通义千问 Qwen - Agently AI应用开发框架

比如使用本地的ollama模型调用qwen2:7b就这样子写 效果和速度也蛮快的

agent_factory = (
    Agently.AgentFactory()
    .set_settings("current_model", "OAIClient")
    .set_settings("model.OAIClient.auth", {"api_key": "abc"})
    .set_settings("model.OAIClient.options", {"model": "qwen2:7b"})
    .set_settings("model.OAIClient.url", "http://127.0.0.1:11434/v1")
)

完整代码

from flask import Flask, request, Response, stream_with_context
import json
import time
import uuid
import Agently

app = Flask(__name__)
# logging.basicConfig(level=logging.DEBUG)

agent_factory = (
    Agently.AgentFactory()
    .set_settings("current_model", "OAIClient")
    .set_settings("model.OAIClient.auth", {"api_key": "sk-UwGrkEwoeCAwf6Bc3d10C21aCaD34434A23d426a9aB11677"})
    .set_settings("model.OAIClient.options", {"model": "Doubao-pro-32k"})
    .set_settings("model.OAIClient.url", "http://192.168.100.1:18300/v1")
)
agent = agent_factory.create_agent()


def complete_code(djson):
    # prompt = djson['prompt']
    # print("prompt",prompt)
    prompt = djson # 效果更好
    suffix = djson['suffix']
    max_tokens = djson['max_tokens']
    temperature = djson['temperature']
    top_p = djson['top_p']
    n = djson['n']
    stop = djson['stop']
    stream = djson['stream']
    model = djson['model']
    result = (
        agent
        .general("""您正在协助完成代码。以下是主要指导原则:
{{input}} 中 # Path: 是文件的路径,请检查后缀名确定编程语言
stop: 这是一个包含停止符的列表,用来告诉模型在生成到这些符号时停止。
suffix: 这是给模型的输入后缀,通常用来提供代码的后续部分,以帮助模型更好地理解上下文。
completion输出内容应像直接写入代码编辑器一样。
completion输出内容可以是代码、注释或字符串。
completion不要提供现有的和重复的代码。
completion始终提供非空的输出。
如果提供的前缀和后缀包含不完整的代码或语句,响应内容应该能直接连接到提供的前缀和后缀。
我可能会告诉你我想在注释中写什么,你需要根据这些指示提供内容。
""")
        .info("suffix", suffix)
        .info("stop", stop)
        .input(prompt)
        .set_settings("model.OAIClient.options.temperature", temperature)
        .set_settings("model.OAIClient.options.top_p", top_p)
        .set_settings("model.OAIClient.options.max_tokens", max_tokens)
        .output({
            "completion": ("str", "Code Completion Results"),
        })
        .start()
    )
    try:
        return [result["completion"]]
    except:
        return [""]


def complete_chat(djson):
    # djson = ujson.loads(prompt)
    # print(djson)
    max_tokens = djson['max_tokens']
    temperature = djson['temperature']
    top_p = djson['top_p']
    n = djson['n']
    stream = djson['stream']
    model = djson['model']
    messages = djson['messages']
    # 提取 messages 最后一句话
    last_message = messages[-1]['content']

    result = (
        agent
        .chat_history(messages)
        .input(last_message)
        .set_settings("model.OAIClient.options.temperature", temperature)
        .set_settings("model.OAIClient.options.top_p", top_p)
        .set_settings("model.OAIClient.options.max_tokens", max_tokens)
        .start()
    )
    # print(result)
    return result


def out_text_completion(chat_response, original_model):
    chat_contents = []
    for i, text in enumerate(chat_response):
        chat_content = {
            "index": i,
            "text": text
        }
        chat_contents.append(chat_content)
    m_uuid = str(uuid.uuid4())
    completion_response = {
        "id": m_uuid,
        "object": "text_completion",
        "created": int(time.time()),
        "model": original_model,
        "choices": chat_contents,
        "usage": {'completion_tokens': 10, 'prompt_tokens': 10, 'total_tokens': 10}
    }
    return completion_response


def out_chat_completion_chunk(chat_response, original_model):
    completion_responses = []
    m_uuid = str(uuid.uuid4())
    for i, text in enumerate(chat_response):
        completion_response = {
            "id": m_uuid,
            "object": "chat.completion.chunk",
            "created": int(time.time()),
            "model": original_model,
            "choices": [{
                "index": 0,
                "delta": {
                    "content": text
                },
                "finish_reason": None
            }],
        }
        if i == len(chat_response) - 1:
            completion_response["choices"][0]["finish_reason"] = "stop"
            # 加入 usage
            completion_response["usage"] = {
                "prompt_tokens": 10,
                "completion_tokens": 10,
                "total_tokens": 10
            }
        completion_responses.append(completion_response)

    ctext = ""
    for i, completion_response in enumerate(completion_responses):
        ctext = ctext + "data: " + json.dumps(completion_response) + "\n"
    return ctext


@app.route('/v1/chat/completions', methods=['GET', 'POST', 'PUT', 'DELETE', 'PATCH'])
def proxy_chat():
    completion_request = request.json
    print("得到数据", completion_request)
    completion = complete_chat(completion_request)
    print("得到回答", completion)
    completion_response = out_chat_completion_chunk([completion], completion_request.get('model', 'gpt-4o-mini'))

    if not completion_request.get('stream', False):
        return json.dumps(completion_response), 200, {'Content-Type': 'application/json'}

    def generate():
        yield completion_response
        yield "data: [DONE]\n\n"
    return Response(stream_with_context(generate()), content_type='text/event-stream')

@app.route('/', defaults={'path': ''}, methods=['GET', 'POST', 'PUT', 'DELETE', 'PATCH'])
@app.route('/<path:path>', methods=['GET', 'POST', 'PUT', 'DELETE', 'PATCH'])
def proxy(path):
    if request.method != 'POST' or 'completions' not in path:
        return "This proxy only supports POST requests to completions endpoints", 400
    completion_request = request.json
    # print("得到数据", json.dumps(completion_request))
    print("得到数据", completion_request)
    completion = complete_code(completion_request)
    print("得到回答", completion)
    completion_response = out_text_completion(completion, completion_request.get('model', 'gpt-4o-mini'))

    if not completion_request.get('stream', True):
        return json.dumps(completion_response), 200, {'Content-Type': 'application/json'}

    def generate():
        yield f"data: {json.dumps(completion_response)}\n\n"
        yield "data: [DONE]\n\n"
    return Response(stream_with_context(generate()), content_type='text/event-stream')


if __name__ == '__main__':
    app.run(debug=False, host='0.0.0.0', port=5001)

override 配置

{
  "bind": "0.0.0.0:8181",
  "proxy_url": "",
  "timeout": 600,
  "codex_api_base": "http://127.0.0.1:5001/v1",
  "codex_api_key": "sk-abc",
  "codex_api_organization": "",
  "codex_api_project": "",
  "codex_max_tokens": 500,
  "code_instruct_model": "deepseek-coder",
  "chat_api_base": "http://127.0.0.1:5001/v1",
  "chat_api_key": "sk-abc",
  "chat_api_organization": "",
  "chat_api_project": "",
  "chat_max_tokens": 4096,
  "chat_model_default": "deepseek-chat",
  "chat_model_map": {
  },
  "chat_locale": "zh_CN",
  "auth_token": ""
}

配置教程1

让Copilot用上DeepSeek总于搞明白了vscode和JetBrainsIDE都能用 - 常规话题 / 人工智能 - LINUX DO

配置教程2 最简单

全新玩具,支持window和macos填key就可以直接运行免docker

172 Likes

感谢大佬无私分享

4 Likes

前排,感谢大佬分享

2 Likes

感谢佬友分享!api_key记得重置一下

3 Likes

学习了感谢大佬分享

1 Like

感谢分享

感谢分享

马克

不错的

感谢分享,谢谢

豆包确实不错

3 Likes

感谢分享!

这是我本地的key,内网没关系,谢谢你的提醒。

1 Like

有人测试哪个模型补全效果最好吗

豆包也可以啊,也准确。主要是免费啊。

太牛了吧!感谢感谢!

1 Like

Agently库的作用是什么?提供一个统一的api调用方式?oneapi将各大模型厂商api接口统一为openai格式,Agently库则是实现对各大厂商api的调用库,这样理解对吗?

这个返回是流式的吗?

没写流式返回的代码,一般补全的代码比较短,直接返回就行了。

可以这样理解,主要作用是代码补全的规范输出

1 Like