parser.go 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. package gofeed
  2. import (
  3. "bytes"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "net/http"
  8. "strings"
  9. "github.com/mmcdole/gofeed/atom"
  10. "github.com/mmcdole/gofeed/rss"
  11. )
  12. // HTTPError represents an HTTP error returned by a server.
  13. type HTTPError struct {
  14. StatusCode int
  15. Status string
  16. }
  17. func (err HTTPError) Error() string {
  18. return fmt.Sprintf("http error: %s", err.Status)
  19. }
  20. // Parser is a universal feed parser that detects
  21. // a given feed type, parsers it, and translates it
  22. // to the universal feed type.
  23. type Parser struct {
  24. AtomTranslator Translator
  25. RSSTranslator Translator
  26. Client *http.Client
  27. rp *rss.Parser
  28. ap *atom.Parser
  29. }
  30. // NewParser creates a universal feed parser.
  31. func NewParser() *Parser {
  32. fp := Parser{
  33. rp: &rss.Parser{},
  34. ap: &atom.Parser{},
  35. }
  36. return &fp
  37. }
  38. // Parse parses a RSS or Atom feed into
  39. // the universal gofeed.Feed. It takes an
  40. // io.Reader which should return the xml content.
  41. func (f *Parser) Parse(feed io.Reader) (*Feed, error) {
  42. // Wrap the feed io.Reader in a io.TeeReader
  43. // so we can capture all the bytes read by the
  44. // DetectFeedType function and construct a new
  45. // reader with those bytes intact for when we
  46. // attempt to parse the feeds.
  47. var buf bytes.Buffer
  48. tee := io.TeeReader(feed, &buf)
  49. feedType := DetectFeedType(tee)
  50. // Glue the read bytes from the detect function
  51. // back into a new reader
  52. r := io.MultiReader(&buf, feed)
  53. switch feedType {
  54. case FeedTypeAtom:
  55. return f.parseAtomFeed(r)
  56. case FeedTypeRSS:
  57. return f.parseRSSFeed(r)
  58. }
  59. return nil, errors.New("Failed to detect feed type")
  60. }
  61. // ParseURL fetches the contents of a given url and
  62. // attempts to parse the response into the universal feed type.
  63. func (f *Parser) ParseURL(feedURL string) (feed *Feed, err error) {
  64. client := f.httpClient()
  65. resp, err := client.Get(feedURL)
  66. if err != nil {
  67. return nil, err
  68. }
  69. if resp != nil {
  70. defer func() {
  71. ce := resp.Body.Close()
  72. if ce != nil {
  73. err = ce
  74. }
  75. }()
  76. }
  77. if resp.StatusCode < 200 || resp.StatusCode >= 300 {
  78. return nil, HTTPError{
  79. StatusCode: resp.StatusCode,
  80. Status: resp.Status,
  81. }
  82. }
  83. return f.Parse(resp.Body)
  84. }
  85. // ParseString parses a feed XML string and into the
  86. // universal feed type.
  87. func (f *Parser) ParseString(feed string) (*Feed, error) {
  88. return f.Parse(strings.NewReader(feed))
  89. }
  90. func (f *Parser) parseAtomFeed(feed io.Reader) (*Feed, error) {
  91. af, err := f.ap.Parse(feed)
  92. if err != nil {
  93. return nil, err
  94. }
  95. return f.atomTrans().Translate(af)
  96. }
  97. func (f *Parser) parseRSSFeed(feed io.Reader) (*Feed, error) {
  98. rf, err := f.rp.Parse(feed)
  99. if err != nil {
  100. return nil, err
  101. }
  102. return f.rssTrans().Translate(rf)
  103. }
  104. func (f *Parser) atomTrans() Translator {
  105. if f.AtomTranslator != nil {
  106. return f.AtomTranslator
  107. }
  108. f.AtomTranslator = &DefaultAtomTranslator{}
  109. return f.AtomTranslator
  110. }
  111. func (f *Parser) rssTrans() Translator {
  112. if f.RSSTranslator != nil {
  113. return f.RSSTranslator
  114. }
  115. f.RSSTranslator = &DefaultRSSTranslator{}
  116. return f.RSSTranslator
  117. }
  118. func (f *Parser) httpClient() *http.Client {
  119. if f.Client != nil {
  120. return f.Client
  121. }
  122. f.Client = &http.Client{}
  123. return f.Client
  124. }