main.go 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. package main
  2. import (
  3. "bytes"
  4. "context"
  5. _ "embed"
  6. "encoding/json"
  7. "fmt"
  8. "genBrief/db"
  9. "genBrief/util"
  10. "strings"
  11. "github.com/PuerkitoBio/goquery"
  12. openai "github.com/sashabaranov/go-openai"
  13. )
  14. //go:embed dom.json
  15. var domJson string
  16. var domMap map[string]string
  17. var maxId int
  18. func init() {
  19. domMap = map[string]string{}
  20. err := json.Unmarshal([]byte(domJson), &domMap)
  21. if err != nil {
  22. panic(err)
  23. }
  24. db.Init()
  25. db.New("t_config").Attr("value").Where("name", "prev_loop_new_id").GetRow().Scan(&maxId)
  26. fmt.Println("startTask at maxId ", maxId)
  27. }
  28. func main() {
  29. for {
  30. newMaxId := loopMaxId(maxId)
  31. if maxId >= newMaxId {
  32. fmt.Println("finishTask at maxId ", maxId)
  33. db.Pool().Update("t_config", map[string]interface{}{"value": maxId}, map[string]interface{}{"name": "prev_loop_new_id"})
  34. return
  35. }
  36. maxId = newMaxId
  37. loopMaxId(maxId)
  38. }
  39. }
  40. func loopMaxId(maxId int) int {
  41. list, err := db.New("t_news").Attr("id,content,url").
  42. Where("status", "Brief").WhereF("id > ?", 4000).Order("ORDER BY id asc").Limit(0, 100).GetAll()
  43. fmt.Println("loopLen", len(list), err)
  44. for _, record := range list {
  45. update := map[string]interface{}{}
  46. id := record["id"].(int64)
  47. content := record["content"].(string)
  48. url := record["url"].(string)
  49. brief := ""
  50. if len(content) >= 1024 {
  51. brief = genBrief(content)
  52. update["brief"] = brief
  53. } else {
  54. res, err := util.GetHtml(url)
  55. if err != nil {
  56. fmt.Println("get Html false", err.Error())
  57. } else {
  58. content = getContent(res, "body")
  59. update["content"] = content
  60. if len(content) >= 1024 {
  61. brief = genBrief(content)
  62. update["brief"] = brief
  63. }
  64. }
  65. }
  66. //
  67. if brief == "" {
  68. update["status"] = "Delete"
  69. } else {
  70. update["status"] = "Picture"
  71. tags := genTag(brief)
  72. if tags != "" {
  73. alltag := do_insert_tag(id, tags)
  74. update["tag"] = alltag
  75. update["pics"] = get_pics(alltag)
  76. }
  77. }
  78. fmt.Println("finish", id, brief)
  79. if _, err := db.Pool().Update("t_news", update, map[string]interface{}{"id": id}); err != nil {
  80. fmt.Println("save", err.Error())
  81. }
  82. }
  83. return maxId
  84. }
  85. func genBrief(content string) string {
  86. client := openai.NewClient("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")
  87. resp, err := client.CreateChatCompletion(
  88. context.Background(),
  89. openai.ChatCompletionRequest{
  90. Model: openai.GPT3Dot5Turbo,
  91. Messages: []openai.ChatCompletionMessage{
  92. {
  93. Role: openai.ChatMessageRoleUser,
  94. Content: content + "\r\n通过以上内容,生成中文概要,文字控制500字以内。",
  95. },
  96. },
  97. },
  98. )
  99. if err != nil {
  100. fmt.Printf("ChatCompletion error: %v\n", err)
  101. return ""
  102. }
  103. return resp.Choices[0].Message.Content
  104. }
  105. func genTag(content string) string {
  106. client := openai.NewClient("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")
  107. resp, err := client.CreateChatCompletion(
  108. context.Background(),
  109. openai.ChatCompletionRequest{
  110. Model: openai.GPT3Dot5Turbo,
  111. Messages: []openai.ChatCompletionMessage{
  112. {
  113. Role: openai.ChatMessageRoleUser,
  114. Content: content + "\r\n以上新闻内容属于哪一类新闻 A居民 B商业 C金融 D建筑 E屋内装饰 \r\n可以选一项或者两项",
  115. },
  116. },
  117. },
  118. )
  119. if err != nil {
  120. fmt.Printf("ChatCompletion error: %v\n", err)
  121. return ""
  122. }
  123. return resp.Choices[0].Message.Content
  124. }
  125. func get_pics(tags string) string {
  126. tag := strings.Split(tags, ",")[0]
  127. if tag == "" || tag == "other" {
  128. tag = "gpt"
  129. }
  130. var url string
  131. err := db.New("t_news_img").Attr("url").Where("tag", tag).Order("RDER BY RAND()").GetRow().Scan(&url)
  132. if err != nil {
  133. fmt.Println("get_pics err", tag, err.Error())
  134. }
  135. return url
  136. }
  137. func do_insert_tag(id int64, tags string) string {
  138. mtag := map[string]string{
  139. "A": "residential",
  140. "B": "commercial",
  141. "C": "financial",
  142. "D": "construction",
  143. "E": "indoor",
  144. }
  145. mtags := []string{}
  146. for opt, tag := range mtag {
  147. if strings.Contains(tags, opt) {
  148. record := map[string]interface{}{
  149. "new_id": id,
  150. "tag": tag,
  151. }
  152. db.Pool().Insert("t_news_tag", record)
  153. mtags = append(mtags, tag)
  154. }
  155. }
  156. return strings.Join(mtags, ",")
  157. }
  158. func getContent(body []byte, dom string) string {
  159. doc, _ := goquery.NewDocumentFromReader(bytes.NewReader(body))
  160. doc.Find("script").Remove()
  161. doc.Find("noscript").Remove()
  162. if dom == "" {
  163. dom = "boby"
  164. }
  165. br := doc.Find(dom).Text()
  166. if br == "" {
  167. br = doc.Find("body").Text()
  168. }
  169. return util.TrimHtml(br)
  170. }