o1-preview和claude-3.5-sonnet哪个更好

我试了一些简单的数学题(如高考填空题)等,两个都能做对,并且看起来claude-3.5-sonnet弄出的动静似乎要小一点?

52 Likes

那么贵,应该能好点吧

5 Likes

o1感觉吹的有点过了,像是为了拉投资一样

7 Likes

而且claude-3.5-sonnet配上缓存价格可以很便宜 :eyes:

4 Likes

claude能答对9.11呀~

2 Likes

草台班子这次竟然没有崩,不符合常理 :crazy_face:

1 Like

9.11问题还是答不对

2 Likes

体验次数太少了 难崩 :melting_face:

3 Likes

写 GCP 用的 Cloudflare Worker,用 Gemini 和 4o、Opus 写出来的都不能用,还是掏出吃灰已久的的付费中转用 3.5 Sonnet 才解决。Opus 的理解力也远不如 3.5 Sonnet。

1 Like

opus的编码能力确实没有sonnet3.5强

3 Likes

o1好像目前还没有白嫖渠道,没有体验过。

2 Likes

除了数据库更大综合实力差挺远,Opus 就理解不了字数,让写 3 个字的词汇 3.5 能准确理解 Opus 就一直回复近义的 2 字词汇。

1 Like

我刚刚试了下这个prompt

我正在用C++写一个编译器,前端部分(从解析到生成LLVM IR)已经写好了,现在我正在写一个简单的汇编代码生成程序中,处理select指令的部分:

\```cpp
#pragma once
#include <bit>
#include <stdexcept>
#include "naivebackend.h"
#include "tools.h"
namespace NaiveBackend {
extern std::string cur_block_label_for_phi;
inline void GenerateASM(std::shared_ptr<ActionItem> act, std::vector<std::string> &code_lines, FuncLayout &layout,
                        const std::unordered_map<std::string, IRClassInfo> &low_level_class_info,
                        bool process_phi = false) {
  if (auto br_act = std::dynamic_pointer_cast<BRAction>(act)) {
    IRVar2RISCVReg(br_act->cond, 1, "t0", layout, code_lines);
    code_lines.push_back("bnez t0, .entrylabel." + br_act->true_label_full);
    code_lines.push_back("j .entrylabel." + br_act->false_label_full);
  } else if (auto jmp_act = std::dynamic_pointer_cast<UNConditionJMPAction>(act)) {
    code_lines.push_back("j .entrylabel." + jmp_act->label_full);
  } else if (auto ret_act = std::dynamic_pointer_cast<RETAction>(act)) {
    if (!std::holds_alternative<LLVMVOIDType>(ret_act->type)) {
      size_t sz = CalcSize(ret_act->type);
      IRVar2RISCVReg(ret_act->value, sz, "a0", layout, code_lines);
    }
    code_lines.push_back("lw ra, -4(s0)");
    code_lines.push_back("lw s0, -8(s0)");
    if (layout.total_frame_size < 2048) {
      code_lines.push_back("addi sp, sp, " + std::to_string(layout.total_frame_size));
    } else {
      code_lines.push_back("li t0, " + std::to_string(layout.total_frame_size));
      code_lines.push_back("add sp, sp, t0");
    }
    code_lines.push_back("ret");
  } else if (auto binary_act = std::dynamic_pointer_cast<BinaryOperationAction>(act)) {
    size_t sz = CalcSize(binary_act->type);
    IRVar2RISCVReg(binary_act->operand1_full, sz, "t0", layout, code_lines);
    IRVar2RISCVReg(binary_act->operand2_full, sz, "t1", layout, code_lines);
    if (binary_act->op == "add") {
      code_lines.push_back("add t2, t0, t1");
    } else if (binary_act->op == "sub") {
      code_lines.push_back("sub t2, t0, t1");
    } else if (binary_act->op == "mul") {
      code_lines.push_back("mul t2, t0, t1");
    } else if (binary_act->op == "sdiv") {
      code_lines.push_back("div t2, t0, t1");
    } else if (binary_act->op == "srem") {
      code_lines.push_back("rem t2, t0, t1");
    } else if (binary_act->op == "and") {
      code_lines.push_back("and t2, t0, t1");
    } else if (binary_act->op == "or") {
      code_lines.push_back("or t2, t0, t1");
    } else if (binary_act->op == "xor") {
      code_lines.push_back("xor t2, t0, t1");
    } else if (binary_act->op == "shl") {
      code_lines.push_back("sll t2, t0, t1");
    } else if (binary_act->op == "ashr") {
      code_lines.push_back("sra t2, t0, t1");
    } else {
      throw std::runtime_error("Unknown binary operation");
    }
    GenerateWriteAccess(binary_act->result_full, sz, "t2", layout, code_lines);
  } else if (auto alloca_act = std::dynamic_pointer_cast<AllocaAction>(act)) {
    // just do nothing
  } else if (auto load_act = std::dynamic_pointer_cast<LoadAction>(act)) {
    size_t sz = CalcSize(load_act->ty);
    IRVar2RISCVReg(load_act->ptr_full, 4, "t0", layout, code_lines);
    if (sz == 1) {
      code_lines.push_back("lb t1, 0(t0)");
    } else if (sz == 4) {
      code_lines.push_back("lw t1, 0(t0)");
    } else {
      throw std::runtime_error("Unknown bytes");
    }
    GenerateWriteAccess(load_act->result_full, sz, "t1", layout, code_lines);
  } else if (auto store_act = std::dynamic_pointer_cast<StoreAction>(act)) {
    size_t sz = CalcSize(store_act->ty);
    IRVar2RISCVReg(store_act->ptr_full, 4, "t0", layout, code_lines);
    IRVar2RISCVReg(store_act->value_full, sz, "t1", layout, code_lines);
    if (sz == 1) {
      code_lines.push_back("sb t1, 0(t0)");
    } else if (sz == 4) {
      code_lines.push_back("sw t1, 0(t0)");
    } else {
      throw std::runtime_error("Unknown bytes");
    }
  } else if (auto get_element_act = std::dynamic_pointer_cast<GetElementPtrAction>(act)) {
    if (get_element_act->indices.size() == 1) {
      // array access
      IRVar2RISCVReg(get_element_act->ptr_full, 4, "t0", layout, code_lines);
      IRVar2RISCVReg(get_element_act->indices[0], 4, "t1", layout, code_lines);
      size_t element_sz = CalcSize(get_element_act->ty);
      code_lines.push_back("slli t1, t1, " + std::to_string(std::countr_zero(element_sz)));
      code_lines.push_back("add t2, t0, t1");
      GenerateWriteAccess(get_element_act->result_full, 4, "t2", layout, code_lines);
    } else if (get_element_act->indices.size() == 2) {
      // struct access
      if (get_element_act->indices[0] != "0") {
        throw std::runtime_error("struct access with non-zero offset is not supported");
      }
      size_t element_idx = std::stoull(get_element_act->indices[1]);
      auto class_ty = std::get<LLVMIRCLASSTYPE>(get_element_act->ty);
      const IRClassInfo &class_info = low_level_class_info.at(class_ty.class_name_full);
      size_t offset = class_info.member_var_pos_after_align[element_idx];
      IRVar2RISCVReg(get_element_act->ptr_full, 4, "t0", layout, code_lines);
      if (offset < 2048) {
        code_lines.push_back("addi t2, t0, " + std::to_string(offset));
      } else {
        code_lines.push_back("li t1, " + std::to_string(offset));
        code_lines.push_back("add t2, t0, t1");
      }
      GenerateWriteAccess(get_element_act->result_full, 4, "t2", layout, code_lines);
    } else {
      throw std::runtime_error("Unknown getelementptr indices size");
    }
  } else if (auto icmp_act = std::dynamic_pointer_cast<ICMPAction>(act)) {
    size_t sz = CalcSize(icmp_act->type);
    IRVar2RISCVReg(icmp_act->operand1_full, sz, "t0", layout, code_lines);
    IRVar2RISCVReg(icmp_act->operand2_full, sz, "t1", layout, code_lines);
    if (icmp_act->op == "eq") {
      code_lines.push_back("xor t2, t0, t1");
      code_lines.push_back("seqz t2, t2");
    } else if (icmp_act->op == "ne") {
      code_lines.push_back("xor t2, t0, t1");
      code_lines.push_back("snez t2, t2");
    } else if (icmp_act->op == "slt") {
      code_lines.push_back("slt t2, t0, t1");
    } else if (icmp_act->op == "sle") {
      code_lines.push_back("slt t2, t1, t0");
      code_lines.push_back("xori t2, t2, 1");
    } else if (icmp_act->op == "sgt") {
      code_lines.push_back("slt t2, t1, t0");
    } else if (icmp_act->op == "sge") {
      code_lines.push_back("slt t2, t0, t1");
      code_lines.push_back("xori t2, t2, 1");
    } else {
      throw std::runtime_error("Unknown icmp operation");
    }
    GenerateWriteAccess(icmp_act->result_full, 1, "t2", layout, code_lines);
  } else if (auto call_act = std::dynamic_pointer_cast<CallItem>(act)) {
    size_t num_of_args = call_act->args_ty.size();
    if (call_act->args_ty.size() != call_act->args_val_full.size()) {
      throw std::runtime_error("args_ty and args_full_name should have the same size");
    }
    code_lines.push_back("addi sp, sp, -16");
    code_lines.push_back("sw a0, 0(sp)");
    for (size_t i = 0; i < num_of_args && i < 8; i++) {
      IRVar2RISCVReg(call_act->args_val_full[i], CalcSize(call_act->args_ty[i]), "a" + std::to_string(i), layout,
                     code_lines);
    }
    if (num_of_args >= 8) {
      size_t ps_delta = (num_of_args * 4 + 15) / 16 * 16;
      if (ps_delta < 2048) {
        code_lines.push_back("addi sp, sp, -" + std::to_string(ps_delta));
      } else {
        code_lines.push_back("li t0, -" + std::to_string(ps_delta));
        code_lines.push_back("add sp, sp, t0");
      }
      for (size_t i = 8; i < num_of_args; i++) {
        IRVar2RISCVReg(call_act->args_val_full[i], CalcSize(call_act->args_ty[i]), "t0", layout, code_lines);
        code_lines.push_back("sw t0, " + std::to_string((i - 8) * 4) + "(sp)");
      }
    }
    code_lines.push_back("call " + call_act->func_name_raw);
    if (call_act->result_full != "") {
      size_t ret_sz = CalcSize(call_act->return_type);
      GenerateWriteAccess(call_act->result_full, ret_sz, "a0", layout, code_lines);
    }
    code_lines.push_back("lw a0, 0(sp)");
    code_lines.push_back("addi sp, sp, 16");
  } else if (auto phi_act = std::dynamic_pointer_cast<PhiItem>(act)) {
    if (!process_phi) {
      return;  // for efficiency, phi actions are implemented as store action in the previous block
    }
    std::string self_label = NaiveBackend::cur_block_label_for_phi;
    for (const auto &[val, label] : phi_act->values) {
      if (label == self_label) {
        size_t sz = CalcSize(phi_act->ty);
        IRVar2RISCVReg(val, sz, "t0", layout, code_lines);
        GenerateWriteAccess(phi_act->result_full, sz, "t0", layout, code_lines);
        return;
      }
    }
    throw std::runtime_error("cannot found label for phi action");
  } else if (auto select_act = std::dynamic_pointer_cast<SelectItem>(act)) {
    // TODO
  } else {
    throw std::runtime_error("Unknown action type");
  }
}
}  // namespace NaiveBackend
\```
你可以从前面几种LLVM IR指令的处理方式中,大致了解相关数据的使用方式,同时,`SelectItem`定义如下:
\```cpp
class SelectItem : public ActionItem {
  friend class IRBuilder;
  friend void NaiveBackend::ScanForVar(class NaiveBackend::FuncLayout &layout, std::shared_ptr<ActionItem> action,
                                       const std::unordered_map<std::string, IRClassInfo> &low_level_class_info);
  friend void NaiveBackend::GenerateASM(std::shared_ptr<ActionItem> act, std::vector<std::string> &code_lines,
                                        NaiveBackend::FuncLayout &layout,
                                        const std::unordered_map<std::string, IRClassInfo> &low_level_class_info,
                                        bool process_phi);
  std::string result_full;
  std::string cond_full;
  std::string true_val_full;
  std::string false_val_full;
  LLVMType ty;

 public:
  SelectItem() = default;
  void RecursivePrint(std::ostream &os) const {
    os << result_full << " = select i1 " << cond_full << ", ";
    if (std::holds_alternative<LLVMIRIntType>(ty)) {
      os << "i" << std::get<LLVMIRIntType>(ty).bits;
    } else if (std::holds_alternative<LLVMIRPTRType>(ty)) {
      os << "ptr";
    } else {
      throw std::runtime_error("something strange happened");
    }
    os << " " << true_val_full << ", ";
    if (std::holds_alternative<LLVMIRIntType>(ty)) {
      os << "i" << std::get<LLVMIRIntType>(ty).bits;
    } else if (std::holds_alternative<LLVMIRPTRType>(ty)) {
      os << "ptr";
    } else {
      throw std::runtime_error("something strange happened");
    }
    os << false_val_full << "\n";
  }
};
\```
请给出处理select指令的代码

o1-preview给出的代码是:

} else if (auto select_act = std::dynamic_pointer_cast<SelectItem>(act)) {
    size_t sz = CalcSize(select_act->ty);
    // 将 cond_full 加载到 t0 中
    IRVar2RISCVReg(select_act->cond_full, 1, "t0", layout, code_lines);
    // 将 true_val_full 加载到 t1 中
    IRVar2RISCVReg(select_act->true_val_full, sz, "t1", layout, code_lines);
    // 将 false_val_full 加载到 t2 中
    IRVar2RISCVReg(select_act->false_val_full, sz, "t2", layout, code_lines);
    // t0 = (t0 != 0)
    code_lines.push_back("snez t0, t0");
    // t4 = 0 - t0
    code_lines.push_back("sub t4, x0, t0");
    // t5 = t1 ^ t2
    code_lines.push_back("xor t5, t1, t2");
    // t5 = t4 & t5
    code_lines.push_back("and t5, t4, t5");
    // t3 = t2 ^ t5
    code_lines.push_back("xor t3, t2, t5");
    // 将 t3 写入 result_full
    GenerateWriteAccess(select_act->result_full, sz, "t3", layout, code_lines);
}

claude-3.5-sonnet给出的代码是:

} else if (auto select_act = std::dynamic_pointer_cast<SelectItem>(act)) {
  size_t sz = CalcSize(select_act->ty);

  // Load condition
  IRVar2RISCVReg(select_act->cond_full, 1, "t0", layout, code_lines);

  // Load true value
  IRVar2RISCVReg(select_act->true_val_full, sz, "t1", layout, code_lines);

  // Load false value
  IRVar2RISCVReg(select_act->false_val_full, sz, "t2", layout, code_lines);

  // Perform select operation
  code_lines.push_back("bnez t0, .select_true_" + select_act->result_full);
  code_lines.push_back("mv t3, t2");  // False case
  code_lines.push_back("j .select_end_" + select_act->result_full);
  code_lines.push_back(".select_true_" + select_act->result_full + ":");
  code_lines.push_back("mv t3, t1");  // True case
  code_lines.push_back(".select_end_" + select_act->result_full + ":");

  // Store result
  GenerateWriteAccess(select_act->result_full, sz, "t3", layout, code_lines);

看起来o1-preview给出的好一些

3 Likes

o1在解决一些高难度问题的表现更好

4 Likes

手动要求claude-3.5-sonnet使用位运算后,它给出的代码变成了:

} else if (auto select_act = std::dynamic_pointer_cast<SelectItem>(act)) {
  size_t sz = CalcSize(select_act->ty);

  // Load condition
  IRVar2RISCVReg(select_act->cond_full, 1, "t0", layout, code_lines);

  // Load true value
  IRVar2RISCVReg(select_act->true_val_full, sz, "t1", layout, code_lines);

  // Load false value
  IRVar2RISCVReg(select_act->false_val_full, sz, "t2", layout, code_lines);

  // Extend condition to full register width
  code_lines.push_back("snez t0, t0");  // Set t0 to 1 if not equal to zero, 0 otherwise
  code_lines.push_back("addi t0, t0, -1");  // Now t0 is either 0 (false) or -1 (true)

  // Perform select operation using bitwise operations
  code_lines.push_back("and t3, t1, t0");  // t3 = true_value & condition_mask
  code_lines.push_back("not t4, t0");      // t4 = ~condition_mask
  code_lines.push_back("and t4, t2, t4");  // t4 = false_value & ~condition_mask
  code_lines.push_back("or t3, t3, t4");   // t3 = (true_value & condition_mask) | (false_value & ~condition_mask)

  // Store result
  GenerateWriteAccess(select_act->result_full, sz, "t3", layout, code_lines);

还是多一条指令

2 Likes

侧重点不同,gpt是分析大量文本预测后面的文字,其结果不依赖逻辑推理而是词直接链接的频率,不能解决未知问题,o1是推理模型可以根据以有的信息进行归纳推理出答案,对于依赖推理的问题可靠性想对高于gpt

2 Likes

3.5便宜呀

4 Likes

确实,sonnet回答快还便宜

claude性价比会好点

用3-5吧,openai越来越让人失望了