123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342 |
- package xpp
- import (
- "encoding/xml"
- "errors"
- "fmt"
- "io"
- "strings"
- )
- type XMLEventType int
- type CharsetReader func(charset string, input io.Reader) (io.Reader, error)
- const (
- StartDocument XMLEventType = iota
- EndDocument
- StartTag
- EndTag
- Text
- Comment
- ProcessingInstruction
- Directive
- IgnorableWhitespace // TODO: ?
- // TODO: CDSECT ?
- )
- type XMLPullParser struct {
- // Document State
- Spaces map[string]string
- SpacesStack []map[string]string
- // Token State
- Depth int
- Event XMLEventType
- Attrs []xml.Attr
- Name string
- Space string
- Text string
- decoder *xml.Decoder
- token interface{}
- }
- func NewXMLPullParser(r io.Reader, strict bool, cr CharsetReader) *XMLPullParser {
- d := xml.NewDecoder(r)
- d.Strict = strict
- d.CharsetReader = cr
- return &XMLPullParser{
- decoder: d,
- Event: StartDocument,
- Depth: 0,
- Spaces: map[string]string{},
- }
- }
- func (p *XMLPullParser) NextTag() (event XMLEventType, err error) {
- t, err := p.Next()
- if err != nil {
- return event, err
- }
- for t == Text && p.IsWhitespace() {
- t, err = p.Next()
- if err != nil {
- return event, err
- }
- }
- if t != StartTag && t != EndTag {
- return event, fmt.Errorf("Expected StartTag or EndTag but got %s at offset: %d", p.EventName(t), p.decoder.InputOffset())
- }
- return t, nil
- }
- func (p *XMLPullParser) Next() (event XMLEventType, err error) {
- for {
- event, err = p.NextToken()
- if err != nil {
- return event, err
- }
- // Return immediately after encountering a StartTag
- // EndTag, Text, EndDocument
- if event == StartTag ||
- event == EndTag ||
- event == EndDocument ||
- event == Text {
- return event, nil
- }
- // Skip Comment/Directive and ProcessingInstruction
- if event == Comment ||
- event == Directive ||
- event == ProcessingInstruction {
- continue
- }
- }
- return event, nil
- }
- func (p *XMLPullParser) NextToken() (event XMLEventType, err error) {
- // Clear any state held for the previous token
- p.resetTokenState()
- token, err := p.decoder.Token()
- if err != nil {
- if err == io.EOF {
- // XML decoder returns the EOF as an error
- // but we want to return it as a valid
- // EndDocument token instead
- p.token = nil
- p.Event = EndDocument
- return p.Event, nil
- }
- return event, err
- }
- p.token = xml.CopyToken(token)
- p.processToken(p.token)
- p.Event = p.EventType(p.token)
- return p.Event, nil
- }
- func (p *XMLPullParser) NextText() (string, error) {
- if p.Event != StartTag {
- return "", errors.New("Parser must be on StartTag to get NextText()")
- }
- t, err := p.Next()
- if err != nil {
- return "", err
- }
- if t != EndTag && t != Text {
- return "", errors.New("Parser must be on EndTag or Text to read text")
- }
- var result string
- for t == Text {
- result = result + p.Text
- t, err = p.Next()
- if err != nil {
- return "", err
- }
- if t != EndTag && t != Text {
- errstr := fmt.Sprintf("Event Text must be immediately followed by EndTag or Text but got %s", p.EventName(t))
- return "", errors.New(errstr)
- }
- }
- return result, nil
- }
- func (p *XMLPullParser) Skip() error {
- for {
- tok, err := p.NextToken()
- if err != nil {
- return err
- }
- if tok == StartTag {
- if err := p.Skip(); err != nil {
- return err
- }
- } else if tok == EndTag {
- return nil
- }
- }
- }
- func (p *XMLPullParser) Attribute(name string) string {
- for _, attr := range p.Attrs {
- if attr.Name.Local == name {
- return attr.Value
- }
- }
- return ""
- }
- func (p *XMLPullParser) Expect(event XMLEventType, name string) (err error) {
- return p.ExpectAll(event, "*", name)
- }
- func (p *XMLPullParser) ExpectAll(event XMLEventType, space string, name string) (err error) {
- if !(p.Event == event && (strings.ToLower(p.Space) == strings.ToLower(space) || space == "*") && (strings.ToLower(p.Name) == strings.ToLower(name) || name == "*")) {
- err = fmt.Errorf("Expected Space:%s Name:%s Event:%s but got Space:%s Name:%s Event:%s at offset: %d", space, name, p.EventName(event), p.Space, p.Name, p.EventName(p.Event), p.decoder.InputOffset())
- }
- return
- }
- func (p *XMLPullParser) DecodeElement(v interface{}) error {
- if p.Event != StartTag {
- return errors.New("DecodeElement can only be called from a StartTag event")
- }
- //tok := &p.token
- startToken := p.token.(xml.StartElement)
- // Consumes all tokens until the matching end token.
- err := p.decoder.DecodeElement(v, &startToken)
- if err != nil {
- return err
- }
- name := p.Name
- // Need to set the "current" token name/event
- // to the previous StartTag event's name
- p.resetTokenState()
- p.Event = EndTag
- p.Depth--
- p.Name = name
- p.token = nil
- return nil
- }
- func (p *XMLPullParser) IsWhitespace() bool {
- return strings.TrimSpace(p.Text) == ""
- }
- func (p *XMLPullParser) EventName(e XMLEventType) (name string) {
- switch e {
- case StartTag:
- name = "StartTag"
- case EndTag:
- name = "EndTag"
- case StartDocument:
- name = "StartDocument"
- case EndDocument:
- name = "EndDocument"
- case ProcessingInstruction:
- name = "ProcessingInstruction"
- case Directive:
- name = "Directive"
- case Comment:
- name = "Comment"
- case Text:
- name = "Text"
- case IgnorableWhitespace:
- name = "IgnorableWhitespace"
- }
- return
- }
- func (p *XMLPullParser) EventType(t xml.Token) (event XMLEventType) {
- switch t.(type) {
- case xml.StartElement:
- event = StartTag
- case xml.EndElement:
- event = EndTag
- case xml.CharData:
- event = Text
- case xml.Comment:
- event = Comment
- case xml.ProcInst:
- event = ProcessingInstruction
- case xml.Directive:
- event = Directive
- }
- return
- }
- func (p *XMLPullParser) processToken(t xml.Token) {
- switch tt := t.(type) {
- case xml.StartElement:
- p.processStartToken(tt)
- case xml.EndElement:
- p.processEndToken(tt)
- case xml.CharData:
- p.processCharDataToken(tt)
- case xml.Comment:
- p.processCommentToken(tt)
- case xml.ProcInst:
- p.processProcInstToken(tt)
- case xml.Directive:
- p.processDirectiveToken(tt)
- }
- }
- func (p *XMLPullParser) processStartToken(t xml.StartElement) {
- p.Depth++
- p.Attrs = t.Attr
- p.Name = t.Name.Local
- p.Space = t.Name.Space
- p.trackNamespaces(t)
- }
- func (p *XMLPullParser) processEndToken(t xml.EndElement) {
- p.Depth--
- p.SpacesStack = p.SpacesStack[:len(p.SpacesStack)-1]
- if len(p.SpacesStack) == 0 {
- p.Spaces = map[string]string{}
- } else {
- p.Spaces = p.SpacesStack[len(p.SpacesStack)-1]
- }
- p.Name = t.Name.Local
- }
- func (p *XMLPullParser) processCharDataToken(t xml.CharData) {
- p.Text = string([]byte(t))
- }
- func (p *XMLPullParser) processCommentToken(t xml.Comment) {
- p.Text = string([]byte(t))
- }
- func (p *XMLPullParser) processProcInstToken(t xml.ProcInst) {
- p.Text = fmt.Sprintf("%s %s", t.Target, string(t.Inst))
- }
- func (p *XMLPullParser) processDirectiveToken(t xml.Directive) {
- p.Text = string([]byte(t))
- }
- func (p *XMLPullParser) resetTokenState() {
- p.Attrs = nil
- p.Name = ""
- p.Space = ""
- p.Text = ""
- }
- func (p *XMLPullParser) trackNamespaces(t xml.StartElement) {
- newSpace := map[string]string{}
- for k, v := range p.Spaces {
- newSpace[k] = v
- }
- for _, attr := range t.Attr {
- if attr.Name.Space == "xmlns" {
- space := strings.TrimSpace(attr.Value)
- spacePrefix := strings.TrimSpace(strings.ToLower(attr.Name.Local))
- newSpace[space] = spacePrefix
- } else if attr.Name.Local == "xmlns" {
- space := strings.TrimSpace(attr.Value)
- newSpace[space] = ""
- }
- }
- p.Spaces = newSpace
- p.SpacesStack = append(p.SpacesStack, newSpace)
- }
|