decode.go 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. // Copyright 2013 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package cldr
  5. import (
  6. "archive/zip"
  7. "bytes"
  8. "encoding/xml"
  9. "fmt"
  10. "io"
  11. "io/ioutil"
  12. "log"
  13. "os"
  14. "path/filepath"
  15. "regexp"
  16. )
  17. // A Decoder loads an archive of CLDR data.
  18. type Decoder struct {
  19. dirFilter []string
  20. sectionFilter []string
  21. loader Loader
  22. cldr *CLDR
  23. curLocale string
  24. }
  25. // SetSectionFilter takes a list top-level LDML element names to which
  26. // evaluation of LDML should be limited. It automatically calls SetDirFilter.
  27. func (d *Decoder) SetSectionFilter(filter ...string) {
  28. d.sectionFilter = filter
  29. // TODO: automatically set dir filter
  30. }
  31. // SetDirFilter limits the loading of LDML XML files of the specied directories.
  32. // Note that sections may be split across directories differently for different CLDR versions.
  33. // For more robust code, use SetSectionFilter.
  34. func (d *Decoder) SetDirFilter(dir ...string) {
  35. d.dirFilter = dir
  36. }
  37. // A Loader provides access to the files of a CLDR archive.
  38. type Loader interface {
  39. Len() int
  40. Path(i int) string
  41. Reader(i int) (io.ReadCloser, error)
  42. }
  43. var fileRe = regexp.MustCompile(`.*[/\\](.*)[/\\](.*)\.xml`)
  44. // Decode loads and decodes the files represented by l.
  45. func (d *Decoder) Decode(l Loader) (cldr *CLDR, err error) {
  46. d.cldr = makeCLDR()
  47. for i := 0; i < l.Len(); i++ {
  48. fname := l.Path(i)
  49. if m := fileRe.FindStringSubmatch(fname); m != nil {
  50. if len(d.dirFilter) > 0 && !in(d.dirFilter, m[1]) {
  51. continue
  52. }
  53. var r io.Reader
  54. if r, err = l.Reader(i); err == nil {
  55. err = d.decode(m[1], m[2], r)
  56. }
  57. if err != nil {
  58. return nil, err
  59. }
  60. }
  61. }
  62. d.cldr.finalize(d.sectionFilter)
  63. return d.cldr, nil
  64. }
  65. func (d *Decoder) decode(dir, id string, r io.Reader) error {
  66. var v interface{}
  67. var l *LDML
  68. cldr := d.cldr
  69. switch {
  70. case dir == "supplemental":
  71. v = cldr.supp
  72. case dir == "transforms":
  73. return nil
  74. case dir == "bcp47":
  75. v = cldr.bcp47
  76. case dir == "validity":
  77. return nil
  78. default:
  79. ok := false
  80. if v, ok = cldr.locale[id]; !ok {
  81. l = &LDML{}
  82. v, cldr.locale[id] = l, l
  83. }
  84. }
  85. x := xml.NewDecoder(r)
  86. if err := x.Decode(v); err != nil {
  87. log.Printf("%s/%s: %v", dir, id, err)
  88. return err
  89. }
  90. if l != nil {
  91. if l.Identity == nil {
  92. return fmt.Errorf("%s/%s: missing identity element", dir, id)
  93. }
  94. // TODO: verify when CLDR bug http://unicode.org/cldr/trac/ticket/8970
  95. // is resolved.
  96. // path := strings.Split(id, "_")
  97. // if lang := l.Identity.Language.Type; lang != path[0] {
  98. // return fmt.Errorf("%s/%s: language was %s; want %s", dir, id, lang, path[0])
  99. // }
  100. }
  101. return nil
  102. }
  103. type pathLoader []string
  104. func makePathLoader(path string) (pl pathLoader, err error) {
  105. err = filepath.Walk(path, func(path string, _ os.FileInfo, err error) error {
  106. pl = append(pl, path)
  107. return err
  108. })
  109. return pl, err
  110. }
  111. func (pl pathLoader) Len() int {
  112. return len(pl)
  113. }
  114. func (pl pathLoader) Path(i int) string {
  115. return pl[i]
  116. }
  117. func (pl pathLoader) Reader(i int) (io.ReadCloser, error) {
  118. return os.Open(pl[i])
  119. }
  120. // DecodePath loads CLDR data from the given path.
  121. func (d *Decoder) DecodePath(path string) (cldr *CLDR, err error) {
  122. loader, err := makePathLoader(path)
  123. if err != nil {
  124. return nil, err
  125. }
  126. return d.Decode(loader)
  127. }
  128. type zipLoader struct {
  129. r *zip.Reader
  130. }
  131. func (zl zipLoader) Len() int {
  132. return len(zl.r.File)
  133. }
  134. func (zl zipLoader) Path(i int) string {
  135. return zl.r.File[i].Name
  136. }
  137. func (zl zipLoader) Reader(i int) (io.ReadCloser, error) {
  138. return zl.r.File[i].Open()
  139. }
  140. // DecodeZip loads CLDR data from the zip archive for which r is the source.
  141. func (d *Decoder) DecodeZip(r io.Reader) (cldr *CLDR, err error) {
  142. buffer, err := ioutil.ReadAll(r)
  143. if err != nil {
  144. return nil, err
  145. }
  146. archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
  147. if err != nil {
  148. return nil, err
  149. }
  150. return d.Decode(zipLoader{archive})
  151. }