用 cloudflare 糊了一个 Command R+ 转 OpenAI API接口

Command R+模型介绍:

昨天浅试了一下,感觉 roleplay 比 gpt-3.5 和 gemini-pro要好一些.(当然yyds还是gpt-4-1106 和 claude-opus)。

Workers代码,直接粘贴到workers即可:
可以用
GET /?q=hello&key=xxxxx
GET /?q=hello&key=xxxxx&model=command-r-plus
进行测试。
适配 stream 和 JSON 。

试用 key rate lmited 出现了,1000 API calls / month :

{"message":"You are using a Trial key, which is limited to 1000 API calls / month. You can continue to use the Trial key for free or upgrade to a Production key with higher rate limits at 'https://dashboard.cohere.com/api-keys'. Contact us on 'https://discord.gg/XW44jPfYJu' or email us at [email protected] with any questions"}

试用的API key 用完以后,web端的chat试用也不能用了,因为web端也是用默认的试用api-key 请求的,新建一个key也不能解除限制。

@xiaozhang959 建议下,修复了默认stream导致newapi测试错误的bug。
@lueluelue 建议下,加上了command* 模型的支持,假如是 net-command* 则开启官方的web搜索。如果没有指定 command* ,默认为command-r。
@zding 建议下,增加对pages functions的支持。把这个文件命名为_worker.js 并打包到zip,新建 pages ,上传这个zip即可。根据最新的官方文档,部署在pages亦有请求次数限制(pages functions 和workers functions 一同共用10W请求/天 的免费额度)。
_worker.zip (2.1 KB)
贴上一个API地址(需要科学的上网方式),自己部署的话绑定自己的域名,则不需要。
https://cohere.pages.dev/

export default {
  async fetch(req, env, ctx) {
    if (req.method === "OPTIONS") {
      return new Response("", {
        headers: {
          'Access-Control-Allow-Origin': '*',
          "Access-Control-Allow-Headers": '*'
        }, status: 204
      })
    }

    var search = (new URL(req.url)).searchParams
    var body = {}
    try {
      body = await req.json()
    } catch (e) {
      body = { "messages": [{ "role": "user", "content": search.get('q') || "hello" }],  "temperature": 0.5, "presence_penalty": 0, "frequency_penalty": 0, "top_p": 1, stream: true }
    }


    var data = { chat_history: [] }
    try {
      for (let i = 0; i < body.messages.length - 1; i++) {
        data.chat_history.push({ "role": body.messages[i].role === "assistant" ? "CHATBOT" : body.messages[i].role.toUpperCase(), "message": body.messages[i].content })
      }
      data.message = body.messages[body.messages.length - 1].content
    } catch (e) {
      return new Response(e.message);
    }
    data.stream = body.stream === true

    if((body.model+"").indexOf("net-")===0) data.connectors =  [{"id": "web-search"}];
    for(let i in body){
      if(!/^(model|messages|stream)/i.test(i)) data[i] = body[i];
    }
    if(/^(net-)?command/.test(body.model)) data.model = body.model.replace(/^net-/,"");
    if(!data.model)  data.model = search.get('model') || "command-r";
    

    //return new Response(JSON.stringify(data));


    var resp = await fetch('https://api.cohere.ai/v1/chat', {
      method: "POST",
      body: JSON.stringify(data),
      headers: {
        'content-type': 'application/json',
        "Authorization": req.headers.get('authorization') || "bearer " + search.get('key')
      }
    })
    if (resp.status !== 200) return resp;

    var created = parseInt(Date.now() / 1000);

    if (!data.stream) {
      try {
        var ddd = await resp.json()
      } catch (e) {
        ddd = { error: e.message }
      }
      return new Response(JSON.stringify({
        "id": "chatcmpl-QXlha2FBbmROaXhpZUFyZUF3ZXNvbWUK",
        "object": "chat.completion",
        "created": created,
        "model": data.model,
        "choices": [{
          "index": 0, "message": {
            "role": "assistant",
            "content": ddd.text || ddd.error
          }, "logprobs": null, "finish_reason": "stop"
        }], "usage": { "prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0 }, "system_fingerprint": null
      }), {
        headers: {
          'Access-Control-Allow-Origin': '*',
          "Access-Control-Allow-Headers": '*',
          'Content-Type': 'application/json; charset=UTF-8'
        }, status: resp.status
      })
    }



    const { readable, writable } = new TransformStream();
    const my_stream_writer = writable.getWriter();


 /*   Uint8Array.prototype.toString = function (c) {
      return new TextDecoder(c || "utf-8").decode(this);
    };
    const originalJoin = Array.prototype.join;
    Array.prototype.join = function (separator) {
      if (this[0] instanceof Uint8Array) {
        var combinedChunks = new Uint8Array(this.reduce((acc, val) => acc + val.length, 0));
        let position = 0;
        for (let chunk of this) {
          combinedChunks.set(chunk, position);
          position += chunk.length;
        }
        return combinedChunks
      }
      return originalJoin.apply(this, arguments);
    };
*/


    ; (async () => {
      var reader = resp.body.getReader();
      var totalText = "";
      const decoder = new TextDecoder('utf-8', {stream: true});
      //var values = []

        ; (async () => {
          var encoder = new TextEncoder();
          var isEnd = false;

          while (!isEnd) {
            await sleep(20);
            var msgs = totalText.split('\n');//values.join().toString().split('\n');
            var index = 0;
            for (; index < msgs.length; index++) {
              try {
                let msg = JSON.parse(msgs[index])
                if (msg.text) {
                  var txt = JSON.stringify({ "id": "chatcmpl-QXlha2FBbmROaXhpZUFyZUF3ZXNvbWUK", "object": "chat.completion.chunk", "created": created, "model": data.model, "choices": [{ "index": 0, "delta": { "role": "assistant", "content": msg.text }, "finish_reason": null }] })
                  my_stream_writer.write(encoder.encode('data: ' + txt + '\n\n'));
                }
                if (msg.is_finished) {
                  await my_stream_writer.write(encoder.encode(`data: {"id":"chatcmpl-QXlha2FBbmROaXhpZUFyZUF3ZXNvbWUK","object":"chat.completion.chunk","created":${created},"model":"${data.model}","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}\n\n`));
                  await my_stream_writer.close();
                  isEnd = true;
                }
              } catch (e) {
                break;
              }
            }
            if (index < msgs.length) {
              //values = [encoder.encode(msgs[msgs.length - 1])]
              totalText = msgs[msgs.length - 1]
            } else {
             // values = []
              totalText = ""
            }
          }
        })()

      while (true) {
        const { done, value } = await reader.read();
        if (done) break;
        totalText +=  decoder.decode(value, {stream: true})
        //values.push(value)
      }
    })()




    return new Response(readable, {
      headers: {
        'Access-Control-Allow-Origin': '*',
        "Access-Control-Allow-Headers": '*',
        'Content-Type': 'text/event-stream; charset=UTF-8'
      }, status: resp.status
    })

  },
};

export function onRequest(context) {
  return exports.fetch(context.request)
}

function sleep(ms) {
  return new Promise((r) => {
    setTimeout(r, ms);
  })
}
LATEST MODEL DESCRIPTION MAX TOKENS (CONTEXT LENGTH) ENDPOINTS
command-light A smaller, faster version of command. Almost as capable, but a lot faster. 4096 Chat,Summarize
command-light-nightly To reduce the time between major releases, we put out nightly versions of command models. For command-light, that is command-light-nightly.Be advised that command-light-nightly is the latest, most experimental, and (possibly) unstable version of its default counterpart. Nightly releases are updated regularly, without warning, and are not recommended for production use. 8192 Chat
command An instruction-following conversational model that performs language tasks with high quality, more reliably and with a longer context than our base generative models. 4096 Chat,Summarize
command-nightly To reduce the time between major releases, we put out nightly versions of command models. For command, that is command-nightly.Be advised that command-nightly is the latest, most experimental, and (possibly) unstable version of its default counterpart. Nightly releases are updated regularly, without warning, and are not recommended for production use. 8192 Chat
command-r Command R is an instruction-following conversational model that performs language tasks at a higher quality, more reliably, and with a longer context than previous models. It can be used for complex workflows like code generation, retrieval augmented generation (RAG), tool use, and agents. 128k Chat
command-r-plus Command R+ is an instruction-following conversational model that performs language tasks at a higher quality, more reliably, and with a longer context than previous models. It is best suited for complex RAG workflows and multi-step tool use. 128k Chat
90 个赞

试试试试

4 个赞

mark

3 个赞

牛的,马克试试

3 个赞

:+1: :+1: :+1: :+1: :+1: :+1:

3 个赞

:ox::frog:,mark

3 个赞

感谢分享,学习了

4 个赞

感谢分享

3 个赞

好好好,坛除我佬

3 个赞

蹲个docker版的,cfworker有额度的

2 个赞

感谢分享

2 个赞

能弄成page的话,无限额度

2 个赞

貌似不行,page是静态,不能运行代码。

2 个赞

继续期待

3 个赞

改为docker,需要用nodejs重新写stream的部分。。
因为cloudflare运行时与nodejs有一些的差异(特别是Buffer的部分)。。
我写这个目的是凑合着用。。你可以关注下别的实现 :joy:

2 个赞

直接把代码复制给cmd-r要它改成nodjs实现 :smirk:

2 个赞

可以的 :joy:
不过我感觉我的代码逻辑,假如不是在cfworker这种特殊的运行时,是有点繁琐的。
nodejs直接用axios请求,然后stream.on(data再然后res.write,比这种getReader再处理Response的逻辑实际上要简单。
所以发给cmd-r的话。。或许直接发还会误导它走弯路,也要一点时间慢慢调。 :wink:

2 个赞

大善人真是什么都可以做

1 个赞

对✓
什么时候国内公司也有这种觉悟就好了。。国内云资源基本没有长期免费的,啥云函数云服务器顶多就是免费试用。。

3 个赞

感谢分享,大善人上分!

8 个赞