log.txt (9.5 KB)
我现在有一个一堆文本文件大概每个文件2G 左右, 我现在需要提取文件中得每行sql 语句,但是他们会换行, 用scanner := bufio.NewScanner(file) buf := make([]byte, maxCapacity) scanner.Buffer(buf, maxCapacity) scanner.Split(scanSQL)
这种方式去获取会导致内存占用过高被系统杀死
有没有大佬 帮助一下 Golang 实现
const maxCapacity = 2048 * 1024 * 1024
func mustSql(filePath string, dbOp *gorm.DB) {
file, err := os.Open(filePath)
if err != nil {
log.Fatal(err)
}
defer file.Close()
tasks := make(chan string, workerCount)
for i := 0; i < workerCount; i++ {
wg.Add(1)
go worker(i, tasks, &wg, dbOp)
}
scanner := bufio.NewScanner(file)
buf := make([]byte, maxCapacity)
scanner.Buffer(buf, maxCapacity)
scanner.Split(scanSQL)
count := 0
r := regexp.MustCompile(`(?s)(insert into sip_info\(.+?\) values\(.+?\))`)
for scanner.Scan() {
line := scanner.Text()
matches := r.FindStringSubmatch(line)
if len(matches) > 1 {
insertSql := matches[1]
tasks <- insertSql
count++
}
}
if err := scanner.Err(); err != nil {
log.Fatal(err)
}
util.LOG.Infof("Total number of SQL statements: %d path:%s", count, filePath)
util.LOG.Info("Concurrent SQL execution completed!")
close(tasks)
wg.Wait()
}
func scanSQL(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
if i := strings.Index(string(data), "siprecorder.c"); i >= 0 {
return i + 1, data[0:i], nil
}
if atEOF {
return len(data), data, nil
}
return
}
上面是现有得代码