main.go 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. package main
  2. import (
  3. "bytes"
  4. "context"
  5. _ "embed"
  6. "encoding/json"
  7. "fmt"
  8. "genBrief/db"
  9. "genBrief/util"
  10. "strings"
  11. "github.com/PuerkitoBio/goquery"
  12. openai "github.com/sashabaranov/go-openai"
  13. )
  14. //go:embed dom.json
  15. var domJson string
  16. var domMap map[string]string
  17. var maxId int
  18. func init() {
  19. domMap = map[string]string{}
  20. err := json.Unmarshal([]byte(domJson), &domMap)
  21. if err != nil {
  22. panic(err)
  23. }
  24. db.Init()
  25. db.New("t_config").Attr("value").Where("name", "prev_loop_new_id").GetRow().Scan(&maxId)
  26. fmt.Println("startTask at maxId ", maxId)
  27. }
  28. func main() {
  29. for {
  30. newMaxId := loopMaxId(maxId)
  31. if maxId >= newMaxId {
  32. fmt.Println("finishTask at maxId ", maxId)
  33. db.Pool().Update("t_config", map[string]interface{}{"value": maxId}, map[string]interface{}{"name": "prev_loop_new_id"})
  34. return
  35. }
  36. maxId = newMaxId
  37. loopMaxId(maxId)
  38. }
  39. }
  40. func loopMaxId(maxId int) int {
  41. list, err := db.New("t_news").Attr("id,content,url").WhereF("id > ?", maxId).Order("ORDER BY id asc").Limit(0, 100).GetAll()
  42. fmt.Println("loopLen", len(list), err)
  43. for _, record := range list {
  44. update := map[string]interface{}{}
  45. id := record["id"].(int64)
  46. idInt := int(id)
  47. content := record["content"].(string)
  48. url := record["url"].(string)
  49. brief := ""
  50. if len(content) >= 1024 {
  51. brief = genBrief(content)
  52. update["brief"] = brief
  53. } else {
  54. res, err := util.GetHtml(url)
  55. if err != nil {
  56. fmt.Println("get Html false", err.Error())
  57. } else {
  58. content = getContent(res, "body")
  59. update["content"] = content
  60. if len(content) >= 1024 {
  61. brief = genBrief(content)
  62. update["brief"] = brief
  63. }
  64. }
  65. }
  66. // maxId更新
  67. if maxId < idInt {
  68. maxId = idInt
  69. }
  70. //
  71. if brief == "" {
  72. update["status"] = "Delete"
  73. } else {
  74. update["status"] = "Picture"
  75. tags := genTag(brief)
  76. if tags != "" {
  77. alltag := do_insert_tag(id, tags)
  78. update["tag"] = alltag
  79. update["pics"] = get_pics(alltag)
  80. }
  81. }
  82. fmt.Println("finish", id, brief)
  83. if _, err := db.Pool().Update("t_news", update, map[string]interface{}{"id": id}); err != nil {
  84. fmt.Println("save", err.Error())
  85. }
  86. }
  87. return maxId
  88. }
  89. func genBrief(content string) string {
  90. client := openai.NewClient("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")
  91. resp, err := client.CreateChatCompletion(
  92. context.Background(),
  93. openai.ChatCompletionRequest{
  94. Model: openai.GPT3Dot5Turbo,
  95. Messages: []openai.ChatCompletionMessage{
  96. {
  97. Role: openai.ChatMessageRoleUser,
  98. Content: content + "\r\n通过以上内容,生成中文概要,文字控制500字以内。",
  99. },
  100. },
  101. },
  102. )
  103. if err != nil {
  104. fmt.Printf("ChatCompletion error: %v\n", err)
  105. return ""
  106. }
  107. return resp.Choices[0].Message.Content
  108. }
  109. func genTag(content string) string {
  110. client := openai.NewClient("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")
  111. resp, err := client.CreateChatCompletion(
  112. context.Background(),
  113. openai.ChatCompletionRequest{
  114. Model: openai.GPT3Dot5Turbo,
  115. Messages: []openai.ChatCompletionMessage{
  116. {
  117. Role: openai.ChatMessageRoleUser,
  118. Content: content + "\r\n以上新闻内容属于哪一类新闻 A居民 B商业 C金融 D建筑 E屋内装饰 \r\n可以选一项或者两项",
  119. },
  120. },
  121. },
  122. )
  123. if err != nil {
  124. fmt.Printf("ChatCompletion error: %v\n", err)
  125. return ""
  126. }
  127. return resp.Choices[0].Message.Content
  128. }
  129. func get_pics(tags string) string {
  130. tag := strings.Split(tags, ",")[0]
  131. if tag == "" || tag == "other" {
  132. tag = "gpt"
  133. }
  134. var url string
  135. err := db.New("t_news_img").Attr("url").Where("tag", tag).Order("RDER BY RAND()").GetRow().Scan(&url)
  136. if err != nil {
  137. fmt.Println("get_pics err", tag, err.Error())
  138. }
  139. return url
  140. }
  141. func do_insert_tag(id int64, tags string) string {
  142. mtag := map[string]string{
  143. "A": "residential",
  144. "B": "commercial",
  145. "C": "financial",
  146. "D": "construction",
  147. "E": "indoor",
  148. }
  149. mtags := []string{}
  150. for opt, tag := range mtag {
  151. if strings.Contains(tags, opt) {
  152. record := map[string]interface{}{
  153. "new_id": id,
  154. "tag": tag,
  155. }
  156. db.Pool().Insert("t_news_tag", record)
  157. mtags = append(mtags, tag)
  158. }
  159. }
  160. return strings.Join(mtags, ",")
  161. }
  162. func getContent(body []byte, dom string) string {
  163. doc, _ := goquery.NewDocumentFromReader(bytes.NewReader(body))
  164. doc.Find("script").Remove()
  165. doc.Find("noscript").Remove()
  166. if dom == "" {
  167. dom = "boby"
  168. }
  169. br := doc.Find(dom).Text()
  170. if br == "" {
  171. br = doc.Find("body").Text()
  172. }
  173. return util.TrimHtml(br)
  174. }