✨ 把 Gemini 联网搜索「引用来源」也挪到响应 content 里了。。

Qiner · 2025 年2 月 23 日 10:30

(\ _ /)
( ･-･)
/っ与其用个例猜大模型联网搜索牛不牛逼，不如直接看看它们怎么搜的。。

简而言之，流程大都如此

关键词搜索页，点开就是：

然后 Gemini 会在这个基础上进一步打开它看对眼的 URL：

(\ _ /)
( ･-･)
/っ就，Gemini 搜索能力强不强，不看 Gemini，看 Google 。。和你的文字被理解成了什么 搜索关键字。。

最后是 Vertex AI の Gemini 的 Cloudflare Woker 虽然我觉得应该用不上 （改客户端太麻烦了，改 Worker 好了）。

Cloudflare Worker 源码

const PROJECT_ID = '(这里填你自己的)';
const CLIENT_ID = '(这里填你自己的)';
const CLIENT_SECRET = '(这里填你自己的)';
const REFRESH_TOKEN = '(这里填你自己的)';

// ↑ 上述部分从 https://linux.do/t/topic/118702 这个喂饭教程里拿。

// ↓ 这个是客户端（如 NextChat）访问接口的 Key，自定。
const API_KEY = 'sk-8848decameterBOOM'

const TOKEN_URL = 'https://www.googleapis.com/oauth2/v4/token';

let accessToken = '';
let tokenExpiry = 0;

// 获取 access_token
async function getAccessToken() {
  if (Date.now() / 1000 < tokenExpiry - 60) {
    return accessToken;
  }

  const response = await fetch(TOKEN_URL, {
    method: 'POST',
    headers: {
      'Content-Type': 'application/json'
    },
    body: JSON.stringify({
      client_id: CLIENT_ID,
      client_secret: CLIENT_SECRET,
      refresh_token: REFRESH_TOKEN,
      grant_type: 'refresh_token'
    })
  });

  const data = await response.json();
  accessToken = data.access_token;
  tokenExpiry = Date.now() / 1000 + data.expires_in;
  return accessToken;
}

/*
northamerica-northeast1 (蒙特利尔)
southamerica-east1 (圣保罗)
us-central1 (爱荷华)
us-east1 (南卡罗来纳)
us-east4 (北弗吉尼亚)
us-east5 (哥伦布)
us-south1 (达拉斯)
us-west1 (俄勒冈)
us-west4 (拉斯维加斯)
europe-central2 (华沙)
europe-north1 (芬兰)
europe-southwest1 (马德里)
europe-west1 (比利时)
europe-west2 (伦敦)
europe-west3 (法兰克福)
europe-west4 (荷兰)
europe-west6 (苏黎世)
europe-west8 (米兰)
europe-west9 (巴黎)
asia-east1 (台湾)
asia-east2 (香港)
asia-northeast1 (东京)
asia-northeast3 (首尔)
asia-south1 (孟买)
asia-southeast1 (新加坡)
australia-southeast1 (悉尼)
me-central1 (Doha)
me-central2 (达曼)
me-west1 (特拉维夫)
*/

const AVAILABLE_LOCATIONS = [
  'us-central1',
  'us-south1',
  'us-west1',
  'us-west4',
];

// 选择区域
function getLocation(model) {
  if (/^gemini-.*exp/.test(model)) {
    return 'us-central1';
  } else {
    const currentTimeInSeconds = Math.floor(Date.now() / 1000);
    const intervalInSeconds = 10;
    const index = Math.floor((currentTimeInSeconds / intervalInSeconds) % AVAILABLE_LOCATIONS.length);
    return AVAILABLE_LOCATIONS[index];
  }
}

// 构建 API URL
function constructApiUrl(location, model, isStream) {
  const _type = isStream ? 'streamGenerateContent' : 'generateContent';
  return `https://${location}-aiplatform.googleapis.com/v1/projects/${PROJECT_ID}/locations/${location}/publishers/google/models/${model}:${_type}`;
}

// 处理请求
async function handleRequest(request) {
  if (request.method === 'OPTIONS') {
    return handleOptions();
  }
  // 检查API_KEY
  const url = new URL(request.url);
  const path = url.pathname;

  const apiKey = url.searchParams.get('key');
  const authHeader = request.headers.get('Authorization');
  const expectedAuthHeader = `Bearer ${API_KEY}`;

  let type = 'generateContent';
  // 找到路径中的冒号后的内容
  const colonIndex = path.lastIndexOf(':');
  if (colonIndex === -1) {
    // 如果找不到冒号，则默认为 streamGenerateContent （NextChat）
    type = 'streamGenerateContent';
  } else {   
    // 截取冒号后的内容
    const typeString = path.substring(colonIndex + 1);

    // 判断类型
    if (typeString.includes('streamGenerateContent')) {
      type = 'streamGenerateContent';
    }
  }
  const isStream = type === 'streamGenerateContent';

  if (apiKey && apiKey === API_KEY){
      // ai-studio
  } else if (authHeader && authHeader === expectedAuthHeader) {
      // vertex-ai
  } else {
    const errorResponse = new Response(JSON.stringify({
      type: "error",
      error: {
        type: "permission_error",
        message: "Your API key does not have permission to use the specified resource."
      }
    }), {
      status: 403,
      headers: {
        'Content-Type': 'application/json'
      }
    });

    errorResponse.headers.set('Access-Control-Allow-Origin', '*');
    errorResponse.headers.set('Access-Control-Allow-Methods', 'POST, GET, OPTIONS, DELETE, HEAD');
    errorResponse.headers.set('Access-Control-Allow-Headers', 'Content-Type, Authorization, x-api-key, anthropic-version, model');
    
    return errorResponse;
  }
 
  const accessToken = await getAccessToken();

  // 从 URL 中获取模型名称
  let model = url.searchParams.get('model');
  if (!model) {
    model = 'gemini-1.5-pro'; // 默认模型
  }

  let requestBody = await request.json();

  // 判断 URL 中是否包含 'search' 键
  if (url.searchParams.has('search')) {
    function findLastUserInput(contents) {
      for (let i = contents.length - 1; i >= 0; i--) {
        const content = contents[i];
        if (content.role === 'user' && content.parts && content.parts.length > 0) {
          return content.parts[0].text;
        }
      }
      return null;
    }

    // 查找最后的用户输入
    const lastUserInput = findLastUserInput(requestBody.contents);

    // 根据特定格式匹配模型名称
    if (lastUserInput) {
      const modelMatch = lastUserInput.match(/^(\*?)([a-z0-9.-]+?)￤/);
      if (modelMatch && lastUserInput.indexOf('￤') <= 50) {
        model = 'gemini-' + modelMatch[2];
        // 移除匹配到的模型名称部分
        const lastContent = requestBody.contents[requestBody.contents.length - 1];
        if (lastContent.role === 'user') {
          if (modelMatch[1] === '*') {
              // 如果有星号，则保留星号
              lastContent.parts[0].text = '*' + lastContent.parts[0].text.substring(modelMatch[0].length).trim();
          } else {
              // 如果没有星号，则直接移除匹配部分
              lastContent.parts[0].text = lastContent.parts[0].text.substring(modelMatch[0].length).trim();
          }
        }
      }
    }

    // 如果找到最后的用户输入并且不是以星号开始，则添加 Google 搜索检索工具
    if (lastUserInput && !lastUserInput.startsWith('*')) {
      if (model.includes('exp')) {
        requestBody.tools = [{
          "google_search": {}
        }];
      } else {
        requestBody.tools = [{
          "googleSearchRetrieval": {}
        }];
      }
    } else {
      // 移除行首星号。懒得写例外了自己注意规则就好
      const lastContent = requestBody.contents[requestBody.contents.length - 1];
      if (lastContent.role === 'user') {
        lastContent.parts[0].text = lastContent.parts[0].text.substring(1).trim();
      }
    }
  }
  
  // 修改 safetySettings
  if (requestBody.safetySettings) {
    // 删除已存在的 HARM_CATEGORY_CIVIC_INTEGRITY
    requestBody.safetySettings = requestBody.safetySettings.filter(setting => 
      setting.category !== "HARM_CATEGORY_CIVIC_INTEGRITY"
    );
    
    if (model.includes('exp')) {
      requestBody.safetySettings.forEach(setting => {
        setting.threshold = 'OFF';
      });
      requestBody.safetySettings.push({
        category: "HARM_CATEGORY_CIVIC_INTEGRITY",
        threshold: "BLOCK_ONLY_HIGH"
      });
    } else {
      requestBody.safetySettings.forEach(setting => {
        setting.threshold = 'BLOCK_NONE';
      });
      requestBody.safetySettings.push({
        category: "HARM_CATEGORY_CIVIC_INTEGRITY",
        threshold: "BLOCK_NONE"
      });
    }
  }

  const location = getLocation(model);
  let apiUrl = constructApiUrl(location, model, isStream);

  // 检查是否存在 alt=sse 参数
  const altParam = url.searchParams.get('alt');
  if (altParam === 'sse') {
    apiUrl += '?alt=sse';
  }

  const modifiedHeaders = new Headers(request.headers);
  modifiedHeaders.set('Authorization', `Bearer ${accessToken}`);
  modifiedHeaders.set('Content-Type', 'application/json; charset=utf-8');

  const modifiedRequest = new Request(apiUrl, {
    headers: modifiedHeaders,
    method: request.method,
    body: JSON.stringify(requestBody),
    redirect: 'follow'
  });

  const response = await fetch(modifiedRequest);

  // 如果是 SSE 响应，我们需要特殊处理
  if (altParam === 'sse') {
    const reader = response.body.getReader();
    const encoder = new TextEncoder();
    const decoder = new TextDecoder();
    let buffer = '';

    const stream = new ReadableStream({
      async start(controller) {
        while (true) {
          const { done, value } = await reader.read();
          if (done) break;

          buffer += decoder.decode(value, { stream: true });
          const lines = buffer.split('\n');
          buffer = lines.pop();

          for (const line of lines) {
            if (line.startsWith('data: ')) {
              const data = JSON.parse(line.slice(6));
              if (data.candidates && data.candidates[0].finishReason === 'STOP') {
                // 处理原始的最后一条消息
                const lastMessage = {
                  candidates: [{
                    content: {
                      role: "model",
                      parts: [{ text: data.candidates[0].content.parts[0].text }]
                    }
                  }],
                  modelVersion: data.modelVersion,
                  createTime: data.createTime,
                  responseId: data.responseId
                };
                controller.enqueue(encoder.encode(`data: ${JSON.stringify(lastMessage)}\n\n`));

                // 检查是否有引用来源
                let hasReferences = false;
                let additionalInfo = '## 引用来源\n';

                if (data.candidates[0].groundingMetadata && data.candidates[0].groundingMetadata.searchEntryPoint && data.candidates[0].groundingMetadata.searchEntryPoint.renderedContent) {
                  const renderedContent = data.candidates[0].groundingMetadata.searchEntryPoint.renderedContent;
                  const matches = renderedContent.match(/<a class="chip" href="([^"]+)">([^<]+)<\/a>/g);
                  if (matches) {
                    hasReferences = true;
                    additionalInfo += '* 执行搜索查询 ✨ *renderedContent*\n';
                    for (const match of matches) {
                      const [_, url, text] = match.match(/<a class="chip" href="([^"]+)">([^<]+)<\/a>/);
                      additionalInfo += `   - [${text}](${url})\n`;
                    }
                  }
                }
                
                if (data.candidates[0].groundingMetadata && data.candidates[0].groundingMetadata.groundingChunks) {
                  hasReferences = true;
                  additionalInfo += '* 解析网页内容 🌋 *groundingChunks*\n';
                  let counter = 1;
                  for (const chunk of data.candidates[0].groundingMetadata.groundingChunks) {
                    if (chunk.web) {
                      additionalInfo += `   - [${chunk.web.title}](${chunk.web.uri}) ${getSuperscript(counter)}\n`;
                      counter++;
                    }
                  }
                }                

                // 只有在有引用来源时才发送额外信息
                if (hasReferences) {
                  const additionalData = {
                    candidates: [{
                      content: {
                        role: "model",
                        parts: [{ text: additionalInfo }]
                      }
                    }],
                    modelVersion: data.modelVersion,
                    createTime: data.createTime,
                    responseId: data.responseId
                  };
                  controller.enqueue(encoder.encode(`data: ${JSON.stringify(additionalData)}\n`));
                }
              } else {
                // 对于非STOP消息，保持原格式不变
                controller.enqueue(encoder.encode(`data: ${JSON.stringify(data)}\n`));
              }
            } else {
              // 对于非data行，直接转发
              controller.enqueue(encoder.encode(line + '\n'));
            }
          }
        }
        controller.close();
      }
    });

    return new Response(stream, {
      headers: {
        'Content-Type': 'text/event-stream',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Access-Control-Allow-Origin': '*',
        'Access-Control-Allow-Methods': 'POST, GET, OPTIONS',
        'Access-Control-Allow-Headers': 'Content-Type, Authorization, x-api-key, anthropic-version, model'
      }
    });
  } 

  const modifiedResponse = new Response(response.body, {
    status: response.status,
    statusText: response.statusText,
    headers: response.headers
  });
 
  modifiedResponse.headers.set('Access-Control-Allow-Origin', '*');
  modifiedResponse.headers.set('Access-Control-Allow-Methods', 'POST, GET, OPTIONS');
  modifiedResponse.headers.set('Access-Control-Allow-Headers', 'Content-Type, Authorization, x-api-key, anthropic-version, model');
   
  return modifiedResponse;
}

function getSuperscript(num) {
  const superscripts = ['¹', '²', '³', '⁴', '⁵', '⁶', '⁷', '⁸', '⁹'];
  if (num <= 9) {
    return superscripts[num - 1];
  } else {
    return num.toString().split('').map(digit => superscripts[parseInt(digit) - 1]).join('');
  }
}

function handleOptions() {
  const headers = new Headers();
  headers.set('Access-Control-Allow-Origin', '*');
  headers.set('Access-Control-Allow-Methods', 'POST, GET, OPTIONS');
  headers.set('Access-Control-Allow-Headers', 'Content-Type, Authorization, x-api-key, anthropic-version, model');

  return new Response(null, {
    status: 204,
    headers: headers
  });
}

export default {
  async fetch(request) {
    return handleRequest(request);
  }
}

完。

baipiaodang · 2025 年2 月 23 日 10:46

黛玉太强了

Nshpiter · 2025 年2 月 23 日 11:04

哇，我来试试

handsome · 2025 年2 月 23 日 11:31

诶哟，不错诶

话题		回复	浏览量
自用的「Gemini 使用 Google 搜索」的 Cloudflare Worker 代码。开发调优 Cloudflare , 人工智能 , GCP	15	1789	2024 年11 月 12 日
🌋【超简单】零门槛！NextChat 一键调用 Google 联网搜索！免费！！开发调优 AFF , Gemini , 人工智能	80	3551	2025 年2 月 10 日
简易cf workerjs web框架，参考golang gin 开发调优软件开发	1	85	2025 年2 月 2 日
一些关于 ChatBot 联网搜索的使用经验搞七捻三人工智能	51	1618	2025 年2 月 15 日
【api 测活工具++】纯前端版本+ 模型验证 (v1.3 版本已加入官转验证去新帖）资源荟萃 ChatGPT , 人工智能	77	2643	2024 年11 月 12 日

✨ 把 Gemini 联网搜索「引用来源」也挪到响应 content 里了。。

完。

相关话题