package main import ( "bytes" "context" _ "embed" "encoding/json" "fmt" "genBrief/db" "genBrief/util" "strings" "github.com/PuerkitoBio/goquery" openai "github.com/sashabaranov/go-openai" ) //go:embed dom.json var domJson string var domMap map[string]string var maxId int func init() { domMap = map[string]string{} err := json.Unmarshal([]byte(domJson), &domMap) if err != nil { panic(err) } db.Init() db.New("t_config").Attr("value").Where("name", "prev_loop_new_id").GetRow().Scan(&maxId) fmt.Println("startTask at maxId ", maxId) } func main() { for { newMaxId := loopMaxId(maxId) if maxId >= newMaxId { fmt.Println("finishTask at maxId ", maxId) db.Pool().Update("t_config", map[string]interface{}{"value": maxId}, map[string]interface{}{"name": "prev_loop_new_id"}) return } maxId = newMaxId loopMaxId(maxId) } } func loopMaxId(maxId int) int { list, err := db.New("t_news").Attr("id,content,url").WhereF("id > ?", maxId).Order("ORDER BY id asc").Limit(0, 100).GetAll() fmt.Println("loopLen", len(list), err) for _, record := range list { update := map[string]interface{}{} id := record["id"].(int64) idInt := int(id) content := record["content"].(string) url := record["url"].(string) brief := "" if len(content) >= 1024 { brief = genBrief(content) update["brief"] = brief } else { res, err := util.GetHtml(url) if err != nil { fmt.Println("get Html false", err.Error()) } else { content = getContent(res, "body") update["content"] = content if len(content) >= 1024 { brief = genBrief(content) update["brief"] = brief } } } // maxId更新 if maxId < idInt { maxId = idInt } // if brief == "" { update["status"] = "Delete" } else { update["status"] = "Picture" tags := genTag(brief) if tags != "" { alltag := do_insert_tag(id, tags) update["tag"] = alltag update["pics"] = get_pics(alltag) } } fmt.Println("finish", id, brief) if _, err := db.Pool().Update("t_news", update, map[string]interface{}{"id": id}); err != nil { fmt.Println("save", err.Error()) } } return maxId } func genBrief(content string) string { client := openai.NewClient("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") resp, err := client.CreateChatCompletion( context.Background(), openai.ChatCompletionRequest{ Model: openai.GPT3Dot5Turbo, Messages: []openai.ChatCompletionMessage{ { Role: openai.ChatMessageRoleUser, Content: content + "\r\n通过以上内容,生成中文概要,文字控制500字以内。", }, }, }, ) if err != nil { fmt.Printf("ChatCompletion error: %v\n", err) return "" } return resp.Choices[0].Message.Content } func genTag(content string) string { client := openai.NewClient("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") resp, err := client.CreateChatCompletion( context.Background(), openai.ChatCompletionRequest{ Model: openai.GPT3Dot5Turbo, Messages: []openai.ChatCompletionMessage{ { Role: openai.ChatMessageRoleUser, Content: content + "\r\n以上新闻内容属于哪一类新闻 A居民 B商业 C金融 D建筑 E屋内装饰 \r\n可以选一项或者两项", }, }, }, ) if err != nil { fmt.Printf("ChatCompletion error: %v\n", err) return "" } return resp.Choices[0].Message.Content } func get_pics(tags string) string { tag := strings.Split(tags, ",")[0] if tag == "" || tag == "other" { tag = "gpt" } var url string err := db.New("t_news_img").Attr("url").Where("tag", tag).Order(" ORDER BY RAND()").GetRow().Scan(&url) if err != nil { fmt.Println("get_pics err", tag, err.Error()) } return url } func do_insert_tag(id int64, tags string) string { mtag := map[string]string{ "A": "residential", "B": "commercial", "C": "financial", "D": "construction", "E": "indoor", } mtags := []string{} for opt, tag := range mtag { if strings.Contains(tags, opt) { record := map[string]interface{}{ "new_id": id, "tag": tag, } db.Pool().Insert("t_news_tag", record) mtags = append(mtags, tag) } } return strings.Join(mtags, ",") } func getContent(body []byte, dom string) string { doc, _ := goquery.NewDocumentFromReader(bytes.NewReader(body)) doc.Find("script").Remove() doc.Find("noscript").Remove() if dom == "" { dom = "boby" } br := doc.Find(dom).Text() if br == "" { br = doc.Find("body").Text() } return util.TrimHtml(br) }