package main import ( "bytes" "context" _ "embed" "encoding/json" "fmt" "genBrief/db" "genBrief/model" "genBrief/util" "github.com/PuerkitoBio/goquery" openai "github.com/sashabaranov/go-openai" ) //go:embed dom.json var domJson string var domMap map[string]string func init() { domMap = map[string]string{} err := json.Unmarshal([]byte(domJson), &domMap) if err != nil { panic(err) } } func main() { maxId := 4000 for { newMaxId := loopMaxId(maxId) if maxId == newMaxId { return } maxId = newMaxId loopMaxId(maxId) } } func loopMaxId(maxId int) int { var list []model.TNew db.Gorm.Where("status='Pending'").Order("id asc").Limit(100).Find(&list) for _, record := range list { if len(record.Content) >= 300 { record.Brief = genBrief(record.Content) } else { res, err := util.GetHtml(record.Url) if err != nil { fmt.Println("get Html false", err.Error()) record.Brief = "" } else { record.Content = getContent(res, "body") if len(record.Content) >= 300 { record.Brief = genBrief(record.Content) } else { record.Brief = "" fmt.Println("skip", record.ID) } } } // if record.Brief == "" { record.Status = "Deactivated" } else { record.Status = "Activated" } fmt.Println("finish", record.ID, record.Brief, record.Status) if err := db.Gorm.Save(&record).Error; err != nil { fmt.Println("save", err.Error()) } } return maxId } func genBrief(content string) string { client := openai.NewClient("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") resp, err := client.CreateChatCompletion( context.Background(), openai.ChatCompletionRequest{ Model: openai.GPT3Dot5Turbo, Messages: []openai.ChatCompletionMessage{ { Role: openai.ChatMessageRoleUser, Content: content + "\r\n通过以上内容,生成中文概要,文字控制300字以内。", }, }, }, ) if err != nil { fmt.Printf("ChatCompletion error: %v\n", err) return "" } return resp.Choices[0].Message.Content } func getContent(body []byte, dom string) string { doc, _ := goquery.NewDocumentFromReader(bytes.NewReader(body)) doc.Find("script").Remove() doc.Find("noscript").Remove() if dom == "" { dom = "boby" } br := doc.Find(dom).Text() if br == "" { br = doc.Find("body").Text() } return util.TrimHtml(br) }