main.go 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. package main
  2. import (
  3. "bytes"
  4. "context"
  5. _ "embed"
  6. "encoding/json"
  7. "fmt"
  8. "genBrief/db"
  9. "genBrief/model"
  10. "genBrief/util"
  11. "github.com/PuerkitoBio/goquery"
  12. openai "github.com/sashabaranov/go-openai"
  13. )
  14. //go:embed dom.json
  15. var domJson string
  16. var domMap map[string]string
  17. func init() {
  18. domMap = map[string]string{}
  19. err := json.Unmarshal([]byte(domJson), &domMap)
  20. if err != nil {
  21. panic(err)
  22. }
  23. }
  24. func main() {
  25. maxId := 4000
  26. for {
  27. newMaxId := loopMaxId(maxId)
  28. if maxId == newMaxId {
  29. return
  30. }
  31. maxId = newMaxId
  32. loopMaxId(maxId)
  33. }
  34. }
  35. func loopMaxId(maxId int) int {
  36. var list []model.TNew
  37. db.Gorm.Where("status='Pending'").Order("id asc").Limit(100).Find(&list)
  38. for _, record := range list {
  39. if len(record.Content) >= 300 {
  40. record.Brief = genBrief(record.Content)
  41. } else {
  42. res, err := util.GetHtml(record.Url)
  43. if err != nil {
  44. fmt.Println("get Html false", err.Error())
  45. record.Brief = ""
  46. } else {
  47. record.Content = getContent(res, "body")
  48. if len(record.Content) >= 300 {
  49. record.Brief = genBrief(record.Content)
  50. } else {
  51. record.Brief = ""
  52. fmt.Println("skip", record.ID)
  53. }
  54. }
  55. }
  56. //
  57. if record.Brief == "" {
  58. record.Status = "Deactivated"
  59. } else {
  60. record.Status = "Activated"
  61. }
  62. fmt.Println("finish", record.ID, record.Brief, record.Status)
  63. if err := db.Gorm.Save(&record).Error; err != nil {
  64. fmt.Println("save", err.Error())
  65. }
  66. }
  67. return maxId
  68. }
  69. func genBrief(content string) string {
  70. client := openai.NewClient("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")
  71. resp, err := client.CreateChatCompletion(
  72. context.Background(),
  73. openai.ChatCompletionRequest{
  74. Model: openai.GPT3Dot5Turbo,
  75. Messages: []openai.ChatCompletionMessage{
  76. {
  77. Role: openai.ChatMessageRoleUser,
  78. Content: content + "\r\n通过以上内容,生成中文概要,文字控制300字以内。",
  79. },
  80. },
  81. },
  82. )
  83. if err != nil {
  84. fmt.Printf("ChatCompletion error: %v\n", err)
  85. return ""
  86. }
  87. return resp.Choices[0].Message.Content
  88. }
  89. func getContent(body []byte, dom string) string {
  90. doc, _ := goquery.NewDocumentFromReader(bytes.NewReader(body))
  91. doc.Find("script").Remove()
  92. doc.Find("noscript").Remove()
  93. if dom == "" {
  94. dom = "boby"
  95. }
  96. br := doc.Find(dom).Text()
  97. if br == "" {
  98. br = doc.Find("body").Text()
  99. }
  100. return util.TrimHtml(br)
  101. }