背景
在 chat 场景中,通常模型不需要返回结构化的数据。但在 LLM 应用开发里,模型通常被视为提供某种原子能力的 API Service,此时我们希望直接得到一个 JSON,通常的解法有:
1. 直接在 Prompt 里强调输出格式
- 优:简单,对模型 API 没有任何额外要求
- 缺:格式不稳定,特别是对于能力较差的模型
2. 使用 response_format: { type: “json_object” } + Prompt 说明具体字段
- 优:总是确保返回合法 JSON
- 缺:字段不稳定,特别是对于能力较差的模型
3. 使用 response_format: { type: “json_schema”, json_schema: {“strict”: true, “schema”: …} }
- 优:确保返回合法 JSON,且字段稳定
- 缺:仅部分模型支持
SDK
- 在 OpenAI 提供的 SDK 中直接支持 Class 作为 Response Format,但仅支持 Python
- 在 go-openai 中,使用方式过于通用繁琐
- llm-structed 专门针对结构化场景优化,对方案 3 和方案 2 提供原生支持
例子
package main
import (
"context"
"fmt"
"github.com/glidea/llm-structed"
)
type Summary struct {
Title string `json:"title" desc:"The title of the summary"`
Content string `json:"content" desc:"A concise summary of the article content"`
Keywords []string `json:"keywords" desc:"Key topics mentioned in the article"`
Score int `json:"score" desc:"The quality score of the article (1-10)"`
Category string `json:"category" desc:"The category of the article" enum:"Technology,Science,Business,Health,Education,Other"`
}
func main() {
// New client (In minimal configuration, you only need to set the APIKey)
cli, _ := llmstructed.New(llmstructed.Config{
BaseURL: "https://openrouter.ai/api/v1",
APIKey: "sk-...",
Model: "google/gemini-flash-1.5",
Temperature: 0.3,
StructuredOutputSupported: true, // 使用方案 3
Retry: 1,
Debug: true,
// See source code comments of llmstructed.Config for these config detail
})
ctx := context.Background()
// Structured Outputed
var summary Summary
_ = cli.Do(ctx, []string{`Please generate a summary of this article: Artificial Intelligence (AI) is transforming the way we live and work. It refers to
computer systems that can perform tasks that normally require human intelligence. These
tasks include visual perception, speech recognition, decision-making, and language
translation. Machine learning, a subset of AI, enables systems to learn and improve
from experience without being explicitly programmed. Deep learning, particularly,
has revolutionized AI by using neural networks to process complex patterns in data.`,
}, &summary)
fmt.Printf("Go Struct: %v\n\n", summary)
// Simple method for single value
str, _ := cli.String(ctx, []string{"Hello, who are you?"})
fmt.Printf("String: %s\n\n", str)
languages, _ := cli.StringSlice(ctx, []string{"List some popular programming languages."})
fmt.Printf("String Slice: %v\n\n", languages)
count, _ := cli.Int(ctx, []string{`How many words are in this sentence: "Hello world, this is a test."`})
fmt.Printf("Integer: %d\n\n", count)
yes, _ := cli.Bool(ctx, []string{"Are you happy?"})
fmt.Printf("Boolean: %v\n\n", yes)
trues, _ := cli.BoolSlice(ctx, []string{"Are these statements true? [\"The sky is blue\", \"Fish can fly\", \"Water is wet\"]"})
fmt.Printf("Boolean Slice: %v\n\n", trues)
pi, _ := cli.Float(ctx, []string{"What is the value of pi (to two decimal places)?"})
fmt.Printf("Float: %.2f\n\n", pi)
}