main.go 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. package main
  2. import (
  3. "bytes"
  4. "context"
  5. _ "embed"
  6. "encoding/json"
  7. "fmt"
  8. "genBrief/db"
  9. "genBrief/util"
  10. "github.com/PuerkitoBio/goquery"
  11. openai "github.com/sashabaranov/go-openai"
  12. )
  13. //go:embed dom.json
  14. var domJson string
  15. var domMap map[string]string
  16. func init() {
  17. domMap = map[string]string{}
  18. err := json.Unmarshal([]byte(domJson), &domMap)
  19. if err != nil {
  20. panic(err)
  21. }
  22. db.Init()
  23. }
  24. func main() {
  25. maxId := 4000
  26. for {
  27. newMaxId := loopMaxId(maxId)
  28. if maxId == newMaxId {
  29. return
  30. }
  31. maxId = newMaxId
  32. loopMaxId(maxId)
  33. }
  34. }
  35. func loopMaxId(maxId int) int {
  36. list, err := db.New("t_news").Attr("id,content,url").
  37. Where("status", "Brief").WhereF("id > ?", 4000).Order("ORDER BY id asc").Limit(0, 100).GetAll()
  38. fmt.Println("loopLen", len(list), err)
  39. for _, record := range list {
  40. update := map[string]interface{}{}
  41. id := record["id"].(int64)
  42. content := record["content"].(string)
  43. url := record["url"].(string)
  44. brief := ""
  45. if len(content) >= 1024 {
  46. brief = genBrief(content)
  47. update["brief"] = brief
  48. } else {
  49. res, err := util.GetHtml(url)
  50. if err != nil {
  51. fmt.Println("get Html false", err.Error())
  52. } else {
  53. content = getContent(res, "body")
  54. update["content"] = content
  55. if len(content) >= 1024 {
  56. brief = genBrief(content)
  57. update["brief"] = brief
  58. }
  59. }
  60. }
  61. //
  62. if brief == "" {
  63. update["status"] = "Delete"
  64. } else {
  65. update["status"] = "Picture"
  66. }
  67. fmt.Println("finish", id, brief)
  68. if _, err := db.Pool().Update("t_news", update, map[string]interface{}{"id": id}); err != nil {
  69. fmt.Println("save", err.Error())
  70. }
  71. }
  72. return maxId
  73. }
  74. func genBrief(content string) string {
  75. client := openai.NewClient("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")
  76. resp, err := client.CreateChatCompletion(
  77. context.Background(),
  78. openai.ChatCompletionRequest{
  79. Model: openai.GPT3Dot5Turbo,
  80. Messages: []openai.ChatCompletionMessage{
  81. {
  82. Role: openai.ChatMessageRoleUser,
  83. Content: content + "\r\n通过以上内容,生成中文概要,文字控制500字以内。",
  84. },
  85. },
  86. },
  87. )
  88. if err != nil {
  89. fmt.Printf("ChatCompletion error: %v\n", err)
  90. return ""
  91. }
  92. return resp.Choices[0].Message.Content
  93. }
  94. func getContent(body []byte, dom string) string {
  95. doc, _ := goquery.NewDocumentFromReader(bytes.NewReader(body))
  96. doc.Find("script").Remove()
  97. doc.Find("noscript").Remove()
  98. if dom == "" {
  99. dom = "boby"
  100. }
  101. br := doc.Find(dom).Text()
  102. if br == "" {
  103. br = doc.Find("body").Text()
  104. }
  105. return util.TrimHtml(br)
  106. }