package main import ( "bytes" "context" _ "embed" "encoding/json" "fmt" "genBrief/db" "genBrief/util" "github.com/PuerkitoBio/goquery" openai "github.com/sashabaranov/go-openai" ) //go:embed dom.json var domJson string var domMap map[string]string func init() { domMap = map[string]string{} err := json.Unmarshal([]byte(domJson), &domMap) if err != nil { panic(err) } db.Init() } func main() { maxId := 4000 for { newMaxId := loopMaxId(maxId) if maxId == newMaxId { return } maxId = newMaxId loopMaxId(maxId) } } func loopMaxId(maxId int) int { list, err := db.New("t_news").Attr("id,content,url"). Where("status", "Brief").WhereF("id > ?", 4000).Order("ORDER BY id asc").Limit(0, 100).GetAll() fmt.Println("loopLen", len(list), err) for _, record := range list { update := map[string]interface{}{} id := record["id"].(int64) content := record["content"].(string) url := record["url"].(string) brief := "" if len(content) >= 1024 { brief = genBrief(content) update["brief"] = brief } else { res, err := util.GetHtml(url) if err != nil { fmt.Println("get Html false", err.Error()) } else { content = getContent(res, "body") update["content"] = content if len(content) >= 1024 { brief = genBrief(content) update["brief"] = brief } } } // if brief == "" { update["status"] = "Delete" } else { update["status"] = "Picture" } fmt.Println("finish", id, brief) if _, err := db.Pool().Update("t_news", update, map[string]interface{}{"id": id}); err != nil { fmt.Println("save", err.Error()) } } return maxId } func genBrief(content string) string { client := openai.NewClient("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") resp, err := client.CreateChatCompletion( context.Background(), openai.ChatCompletionRequest{ Model: openai.GPT3Dot5Turbo, Messages: []openai.ChatCompletionMessage{ { Role: openai.ChatMessageRoleUser, Content: content + "\r\n通过以上内容,生成中文概要,文字控制500字以内。", }, }, }, ) if err != nil { fmt.Printf("ChatCompletion error: %v\n", err) return "" } return resp.Choices[0].Message.Content } func getContent(body []byte, dom string) string { doc, _ := goquery.NewDocumentFromReader(bytes.NewReader(body)) doc.Find("script").Remove() doc.Find("noscript").Remove() if dom == "" { dom = "boby" } br := doc.Find(dom).Text() if br == "" { br = doc.Find("body").Text() } return util.TrimHtml(br) }