123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181 |
- package main
- import (
- "bytes"
- "context"
- _ "embed"
- "encoding/json"
- "fmt"
- "genBrief/db"
- "genBrief/util"
- "strings"
- "github.com/PuerkitoBio/goquery"
- openai "github.com/sashabaranov/go-openai"
- )
- //go:embed dom.json
- var domJson string
- var domMap map[string]string
- var maxId int
- func init() {
- domMap = map[string]string{}
- err := json.Unmarshal([]byte(domJson), &domMap)
- if err != nil {
- panic(err)
- }
- db.Init()
- db.New("t_config").Attr("value").Where("name", "prev_loop_new_id").GetRow().Scan(&maxId)
- fmt.Println("startTask at maxId ", maxId)
- }
- func main() {
- for {
- newMaxId := loopMaxId(maxId)
- if maxId >= newMaxId {
- fmt.Println("finishTask at maxId ", maxId)
- db.Pool().Update("t_config", map[string]interface{}{"value": maxId}, map[string]interface{}{"name": "prev_loop_new_id"})
- return
- }
- maxId = newMaxId
- loopMaxId(maxId)
- }
- }
- func loopMaxId(maxId int) int {
- list, err := db.New("t_news").Attr("id,content,url").
- Where("status", "Brief").WhereF("id > ?", 4000).Order("ORDER BY id asc").Limit(0, 100).GetAll()
- fmt.Println("loopLen", len(list), err)
- for _, record := range list {
- update := map[string]interface{}{}
- id := record["id"].(int64)
- content := record["content"].(string)
- url := record["url"].(string)
- brief := ""
- if len(content) >= 1024 {
- brief = genBrief(content)
- update["brief"] = brief
- } else {
- res, err := util.GetHtml(url)
- if err != nil {
- fmt.Println("get Html false", err.Error())
- } else {
- content = getContent(res, "body")
- update["content"] = content
- if len(content) >= 1024 {
- brief = genBrief(content)
- update["brief"] = brief
- }
- }
- }
- //
- if brief == "" {
- update["status"] = "Delete"
- } else {
- update["status"] = "Picture"
- tags := genTag(brief)
- if tags != "" {
- alltag := do_insert_tag(id, tags)
- update["tag"] = alltag
- update["pics"] = get_pics(alltag)
- }
- }
- fmt.Println("finish", id, brief)
- if _, err := db.Pool().Update("t_news", update, map[string]interface{}{"id": id}); err != nil {
- fmt.Println("save", err.Error())
- }
- }
- return maxId
- }
- func genBrief(content string) string {
- client := openai.NewClient("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")
- resp, err := client.CreateChatCompletion(
- context.Background(),
- openai.ChatCompletionRequest{
- Model: openai.GPT3Dot5Turbo,
- Messages: []openai.ChatCompletionMessage{
- {
- Role: openai.ChatMessageRoleUser,
- Content: content + "\r\n通过以上内容,生成中文概要,文字控制500字以内。",
- },
- },
- },
- )
- if err != nil {
- fmt.Printf("ChatCompletion error: %v\n", err)
- return ""
- }
- return resp.Choices[0].Message.Content
- }
- func genTag(content string) string {
- client := openai.NewClient("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")
- resp, err := client.CreateChatCompletion(
- context.Background(),
- openai.ChatCompletionRequest{
- Model: openai.GPT3Dot5Turbo,
- Messages: []openai.ChatCompletionMessage{
- {
- Role: openai.ChatMessageRoleUser,
- Content: content + "\r\n以上新闻内容属于哪一类新闻 A居民 B商业 C金融 D建筑 E屋内装饰 \r\n可以选一项或者两项",
- },
- },
- },
- )
- if err != nil {
- fmt.Printf("ChatCompletion error: %v\n", err)
- return ""
- }
- return resp.Choices[0].Message.Content
- }
- func get_pics(tags string) string {
- tag := strings.Split(tags, ",")[0]
- if tag == "" || tag == "other" {
- tag = "gpt"
- }
- var url string
- err := db.New("t_news_img").Attr("url").Where("tag", tag).Order("RDER BY RAND()").GetRow().Scan(&url)
- if err != nil {
- fmt.Println("get_pics err", tag, err.Error())
- }
- return url
- }
- func do_insert_tag(id int64, tags string) string {
- mtag := map[string]string{
- "A": "residential",
- "B": "commercial",
- "C": "financial",
- "D": "construction",
- "E": "indoor",
- }
- mtags := []string{}
- for opt, tag := range mtag {
- if strings.Contains(tags, opt) {
- record := map[string]interface{}{
- "new_id": id,
- "tag": tag,
- }
- db.Pool().Insert("t_news_tag", record)
- mtags = append(mtags, tag)
- }
- }
- return strings.Join(mtags, ",")
- }
- func getContent(body []byte, dom string) string {
- doc, _ := goquery.NewDocumentFromReader(bytes.NewReader(body))
- doc.Find("script").Remove()
- doc.Find("noscript").Remove()
- if dom == "" {
- dom = "boby"
- }
- br := doc.Find(dom).Text()
- if br == "" {
- br = doc.Find("body").Text()
- }
- return util.TrimHtml(br)
- }
|