Command R+模型介绍:
昨天浅试了一下,感觉 roleplay 比 gpt-3.5 和 gemini-pro要好一些.(当然yyds还是gpt-4-1106 和 claude-opus)。
Workers代码,直接粘贴到workers即可:
可以用
GET /?q=hello&key=xxxxx
GET /?q=hello&key=xxxxx&model=command-r-plus
进行测试。
适配 stream 和 JSON 。
试用 key rate lmited 出现了,1000 API calls / month :
{"message":"You are using a Trial key, which is limited to 1000 API calls / month. You can continue to use the Trial key for free or upgrade to a Production key with higher rate limits at 'https://dashboard.cohere.com/api-keys'. Contact us on 'https://discord.gg/XW44jPfYJu' or email us at [email protected] with any questions"}
试用的API key 用完以后,web端的chat试用也不能用了,因为web端也是用默认的试用api-key 请求的,新建一个key也不能解除限制。
在 @xiaozhang959 建议下,修复了默认stream导致newapi测试错误的bug。
在 @lueluelue 建议下,加上了command* 模型的支持,假如是 net-command* 则开启官方的web搜索。如果没有指定 command* ,默认为command-r。
在 @zding 建议下,增加对pages functions的支持。把这个文件命名为_worker.js 并打包到zip,新建 pages ,上传这个zip即可。根据最新的官方文档,部署在pages亦有请求次数限制(pages functions 和workers functions 一同共用10W请求/天 的免费额度)。
_worker.zip (2.1 KB)
贴上一个API地址(需要科学的上网方式),自己部署的话绑定自己的域名,则不需要。
https://cohere.pages.dev/
export default {
async fetch(req, env, ctx) {
if (req.method === "OPTIONS") {
return new Response("", {
headers: {
'Access-Control-Allow-Origin': '*',
"Access-Control-Allow-Headers": '*'
}, status: 204
})
}
var search = (new URL(req.url)).searchParams
var body = {}
try {
body = await req.json()
} catch (e) {
body = { "messages": [{ "role": "user", "content": search.get('q') || "hello" }], "temperature": 0.5, "presence_penalty": 0, "frequency_penalty": 0, "top_p": 1, stream: true }
}
var data = { chat_history: [] }
try {
for (let i = 0; i < body.messages.length - 1; i++) {
data.chat_history.push({ "role": body.messages[i].role === "assistant" ? "CHATBOT" : body.messages[i].role.toUpperCase(), "message": body.messages[i].content })
}
data.message = body.messages[body.messages.length - 1].content
} catch (e) {
return new Response(e.message);
}
data.stream = body.stream === true
if((body.model+"").indexOf("net-")===0) data.connectors = [{"id": "web-search"}];
for(let i in body){
if(!/^(model|messages|stream)/i.test(i)) data[i] = body[i];
}
if(/^(net-)?command/.test(body.model)) data.model = body.model.replace(/^net-/,"");
if(!data.model) data.model = search.get('model') || "command-r";
//return new Response(JSON.stringify(data));
var resp = await fetch('https://api.cohere.ai/v1/chat', {
method: "POST",
body: JSON.stringify(data),
headers: {
'content-type': 'application/json',
"Authorization": req.headers.get('authorization') || "bearer " + search.get('key')
}
})
if (resp.status !== 200) return resp;
var created = parseInt(Date.now() / 1000);
if (!data.stream) {
try {
var ddd = await resp.json()
} catch (e) {
ddd = { error: e.message }
}
return new Response(JSON.stringify({
"id": "chatcmpl-QXlha2FBbmROaXhpZUFyZUF3ZXNvbWUK",
"object": "chat.completion",
"created": created,
"model": data.model,
"choices": [{
"index": 0, "message": {
"role": "assistant",
"content": ddd.text || ddd.error
}, "logprobs": null, "finish_reason": "stop"
}], "usage": { "prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0 }, "system_fingerprint": null
}), {
headers: {
'Access-Control-Allow-Origin': '*',
"Access-Control-Allow-Headers": '*',
'Content-Type': 'application/json; charset=UTF-8'
}, status: resp.status
})
}
const { readable, writable } = new TransformStream();
const my_stream_writer = writable.getWriter();
/* Uint8Array.prototype.toString = function (c) {
return new TextDecoder(c || "utf-8").decode(this);
};
const originalJoin = Array.prototype.join;
Array.prototype.join = function (separator) {
if (this[0] instanceof Uint8Array) {
var combinedChunks = new Uint8Array(this.reduce((acc, val) => acc + val.length, 0));
let position = 0;
for (let chunk of this) {
combinedChunks.set(chunk, position);
position += chunk.length;
}
return combinedChunks
}
return originalJoin.apply(this, arguments);
};
*/
; (async () => {
var reader = resp.body.getReader();
var totalText = "";
const decoder = new TextDecoder('utf-8', {stream: true});
//var values = []
; (async () => {
var encoder = new TextEncoder();
var isEnd = false;
while (!isEnd) {
await sleep(20);
var msgs = totalText.split('\n');//values.join().toString().split('\n');
var index = 0;
for (; index < msgs.length; index++) {
try {
let msg = JSON.parse(msgs[index])
if (msg.text) {
var txt = JSON.stringify({ "id": "chatcmpl-QXlha2FBbmROaXhpZUFyZUF3ZXNvbWUK", "object": "chat.completion.chunk", "created": created, "model": data.model, "choices": [{ "index": 0, "delta": { "role": "assistant", "content": msg.text }, "finish_reason": null }] })
my_stream_writer.write(encoder.encode('data: ' + txt + '\n\n'));
}
if (msg.is_finished) {
await my_stream_writer.write(encoder.encode(`data: {"id":"chatcmpl-QXlha2FBbmROaXhpZUFyZUF3ZXNvbWUK","object":"chat.completion.chunk","created":${created},"model":"${data.model}","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}\n\n`));
await my_stream_writer.close();
isEnd = true;
}
} catch (e) {
break;
}
}
if (index < msgs.length) {
//values = [encoder.encode(msgs[msgs.length - 1])]
totalText = msgs[msgs.length - 1]
} else {
// values = []
totalText = ""
}
}
})()
while (true) {
const { done, value } = await reader.read();
if (done) break;
totalText += decoder.decode(value, {stream: true})
//values.push(value)
}
})()
return new Response(readable, {
headers: {
'Access-Control-Allow-Origin': '*',
"Access-Control-Allow-Headers": '*',
'Content-Type': 'text/event-stream; charset=UTF-8'
}, status: resp.status
})
},
};
export function onRequest(context) {
return exports.fetch(context.request)
}
function sleep(ms) {
return new Promise((r) => {
setTimeout(r, ms);
})
}
LATEST MODEL | DESCRIPTION | MAX TOKENS (CONTEXT LENGTH) | ENDPOINTS |
---|---|---|---|
command-light |
A smaller, faster version of command . Almost as capable, but a lot faster. |
4096 | Chat,Summarize |
command-light-nightly |
To reduce the time between major releases, we put out nightly versions of command models. For command-light , that is command-light-nightly .Be advised that command-light-nightly is the latest, most experimental, and (possibly) unstable version of its default counterpart. Nightly releases are updated regularly, without warning, and are not recommended for production use. |
8192 | Chat |
command |
An instruction-following conversational model that performs language tasks with high quality, more reliably and with a longer context than our base generative models. | 4096 | Chat,Summarize |
command-nightly |
To reduce the time between major releases, we put out nightly versions of command models. For command , that is command-nightly .Be advised that command-nightly is the latest, most experimental, and (possibly) unstable version of its default counterpart. Nightly releases are updated regularly, without warning, and are not recommended for production use. |
8192 | Chat |
command-r |
Command R is an instruction-following conversational model that performs language tasks at a higher quality, more reliably, and with a longer context than previous models. It can be used for complex workflows like code generation, retrieval augmented generation (RAG), tool use, and agents. | 128k | Chat |
command-r-plus |
Command R+ is an instruction-following conversational model that performs language tasks at a higher quality, more reliably, and with a longer context than previous models. It is best suited for complex RAG workflows and multi-step tool use. | 128k | Chat |