selector_test.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654
  1. package cascadia
  2. import (
  3. "bytes"
  4. "strings"
  5. "testing"
  6. "golang.org/x/net/html"
  7. )
  8. type selectorTest struct {
  9. HTML, selector string
  10. results []string
  11. }
  12. func nodeString(n *html.Node) string {
  13. buf := bytes.NewBufferString("")
  14. html.Render(buf, n)
  15. return buf.String()
  16. }
  17. var selectorTests = []selectorTest{
  18. {
  19. `<body><address>This address...</address></body>`,
  20. "address",
  21. []string{
  22. "<address>This address...</address>",
  23. },
  24. },
  25. {
  26. `<!-- comment --><html><head></head><body>text</body></html>`,
  27. "*",
  28. []string{
  29. "<html><head></head><body>text</body></html>",
  30. "<head></head>",
  31. "<body>text</body>",
  32. },
  33. },
  34. {
  35. `<html><head></head><body></body></html>`,
  36. "*",
  37. []string{
  38. "<html><head></head><body></body></html>",
  39. "<head></head>",
  40. "<body></body>",
  41. },
  42. },
  43. {
  44. `<p id="foo"><p id="bar">`,
  45. "#foo",
  46. []string{
  47. `<p id="foo"></p>`,
  48. },
  49. },
  50. {
  51. `<ul><li id="t1"><p id="t1">`,
  52. "li#t1",
  53. []string{
  54. `<li id="t1"><p id="t1"></p></li>`,
  55. },
  56. },
  57. {
  58. `<ol><li id="t4"><li id="t44">`,
  59. "*#t4",
  60. []string{
  61. `<li id="t4"></li>`,
  62. },
  63. },
  64. {
  65. `<ul><li class="t1"><li class="t2">`,
  66. ".t1",
  67. []string{
  68. `<li class="t1"></li>`,
  69. },
  70. },
  71. {
  72. `<p class="t1 t2">`,
  73. "p.t1",
  74. []string{
  75. `<p class="t1 t2"></p>`,
  76. },
  77. },
  78. {
  79. `<div class="test">`,
  80. "div.teST",
  81. []string{},
  82. },
  83. {
  84. `<p class="t1 t2">`,
  85. ".t1.fail",
  86. []string{},
  87. },
  88. {
  89. `<p class="t1 t2">`,
  90. "p.t1.t2",
  91. []string{
  92. `<p class="t1 t2"></p>`,
  93. },
  94. },
  95. {
  96. `<p><p title="title">`,
  97. "p[title]",
  98. []string{
  99. `<p title="title"></p>`,
  100. },
  101. },
  102. {
  103. `<address><address title="foo"><address title="bar">`,
  104. `address[title="foo"]`,
  105. []string{
  106. `<address title="foo"><address title="bar"></address></address>`,
  107. },
  108. },
  109. {
  110. `<address><address title="foo"><address title="bar">`,
  111. `address[title!="foo"]`,
  112. []string{
  113. `<address><address title="foo"><address title="bar"></address></address></address>`,
  114. `<address title="bar"></address>`,
  115. },
  116. },
  117. {
  118. `<p title="tot foo bar">`,
  119. `[ title ~= foo ]`,
  120. []string{
  121. `<p title="tot foo bar"></p>`,
  122. },
  123. },
  124. {
  125. `<p title="hello world">`,
  126. `[title~="hello world"]`,
  127. []string{},
  128. },
  129. {
  130. `<p lang="en"><p lang="en-gb"><p lang="enough"><p lang="fr-en">`,
  131. `[lang|="en"]`,
  132. []string{
  133. `<p lang="en"></p>`,
  134. `<p lang="en-gb"></p>`,
  135. },
  136. },
  137. {
  138. `<p title="foobar"><p title="barfoo">`,
  139. `[title^="foo"]`,
  140. []string{
  141. `<p title="foobar"></p>`,
  142. },
  143. },
  144. {
  145. `<p title="foobar"><p title="barfoo">`,
  146. `[title$="bar"]`,
  147. []string{
  148. `<p title="foobar"></p>`,
  149. },
  150. },
  151. {
  152. `<p title="foobarufoo">`,
  153. `[title*="bar"]`,
  154. []string{
  155. `<p title="foobarufoo"></p>`,
  156. },
  157. },
  158. {
  159. `<p class=" ">This text should be green.</p><p>This text should be green.</p>`,
  160. `p[class$=" "]`,
  161. []string{},
  162. },
  163. {
  164. `<p class="">This text should be green.</p><p>This text should be green.</p>`,
  165. `p[class$=""]`,
  166. []string{},
  167. },
  168. {
  169. `<p class=" ">This text should be green.</p><p>This text should be green.</p>`,
  170. `p[class^=" "]`,
  171. []string{},
  172. },
  173. {
  174. `<p class="">This text should be green.</p><p>This text should be green.</p>`,
  175. `p[class^=""]`,
  176. []string{},
  177. },
  178. {
  179. `<p class=" ">This text should be green.</p><p>This text should be green.</p>`,
  180. `p[class*=" "]`,
  181. []string{},
  182. },
  183. {
  184. `<p class="">This text should be green.</p><p>This text should be green.</p>`,
  185. `p[class*=""]`,
  186. []string{},
  187. },
  188. {
  189. `<input type="radio" name="Sex" value="F"/>`,
  190. `input[name=Sex][value=F]`,
  191. []string{
  192. `<input type="radio" name="Sex" value="F"/>`,
  193. },
  194. },
  195. {
  196. `<table border="0" cellpadding="0" cellspacing="0" style="table-layout: fixed; width: 100%; border: 0 dashed; border-color: #FFFFFF"><tr style="height:64px">aaa</tr></table>`,
  197. `table[border="0"][cellpadding="0"][cellspacing="0"]`,
  198. []string{
  199. `<table border="0" cellpadding="0" cellspacing="0" style="table-layout: fixed; width: 100%; border: 0 dashed; border-color: #FFFFFF"><tbody><tr style="height:64px"></tr></tbody></table>`,
  200. },
  201. },
  202. {
  203. `<p class="t1 t2">`,
  204. ".t1:not(.t2)",
  205. []string{},
  206. },
  207. {
  208. `<div class="t3">`,
  209. `div:not(.t1)`,
  210. []string{
  211. `<div class="t3"></div>`,
  212. },
  213. },
  214. {
  215. `<div><div class="t2"><div class="t3">`,
  216. `div:not([class="t2"])`,
  217. []string{
  218. `<div><div class="t2"><div class="t3"></div></div></div>`,
  219. `<div class="t3"></div>`,
  220. },
  221. },
  222. {
  223. `<ol><li id=1><li id=2><li id=3></ol>`,
  224. `li:nth-child(odd)`,
  225. []string{
  226. `<li id="1"></li>`,
  227. `<li id="3"></li>`,
  228. },
  229. },
  230. {
  231. `<ol><li id=1><li id=2><li id=3></ol>`,
  232. `li:nth-child(even)`,
  233. []string{
  234. `<li id="2"></li>`,
  235. },
  236. },
  237. {
  238. `<ol><li id=1><li id=2><li id=3></ol>`,
  239. `li:nth-child(-n+2)`,
  240. []string{
  241. `<li id="1"></li>`,
  242. `<li id="2"></li>`,
  243. },
  244. },
  245. {
  246. `<ol><li id=1><li id=2><li id=3></ol>`,
  247. `li:nth-child(3n+1)`,
  248. []string{
  249. `<li id="1"></li>`,
  250. },
  251. },
  252. {
  253. `<ol><li id=1><li id=2><li id=3><li id=4></ol>`,
  254. `li:nth-last-child(odd)`,
  255. []string{
  256. `<li id="2"></li>`,
  257. `<li id="4"></li>`,
  258. },
  259. },
  260. {
  261. `<ol><li id=1><li id=2><li id=3><li id=4></ol>`,
  262. `li:nth-last-child(even)`,
  263. []string{
  264. `<li id="1"></li>`,
  265. `<li id="3"></li>`,
  266. },
  267. },
  268. {
  269. `<ol><li id=1><li id=2><li id=3><li id=4></ol>`,
  270. `li:nth-last-child(-n+2)`,
  271. []string{
  272. `<li id="3"></li>`,
  273. `<li id="4"></li>`,
  274. },
  275. },
  276. {
  277. `<ol><li id=1><li id=2><li id=3><li id=4></ol>`,
  278. `li:nth-last-child(3n+1)`,
  279. []string{
  280. `<li id="1"></li>`,
  281. `<li id="4"></li>`,
  282. },
  283. },
  284. {
  285. `<p>some text <span id="1">and a span</span><span id="2"> and another</span></p>`,
  286. `span:first-child`,
  287. []string{
  288. `<span id="1">and a span</span>`,
  289. },
  290. },
  291. {
  292. `<span>a span</span> and some text`,
  293. `span:last-child`,
  294. []string{
  295. `<span>a span</span>`,
  296. },
  297. },
  298. {
  299. `<address></address><p id=1><p id=2>`,
  300. `p:nth-of-type(2)`,
  301. []string{
  302. `<p id="2"></p>`,
  303. },
  304. },
  305. {
  306. `<address></address><p id=1><p id=2></p><a>`,
  307. `p:nth-last-of-type(2)`,
  308. []string{
  309. `<p id="1"></p>`,
  310. },
  311. },
  312. {
  313. `<address></address><p id=1><p id=2></p><a>`,
  314. `p:last-of-type`,
  315. []string{
  316. `<p id="2"></p>`,
  317. },
  318. },
  319. {
  320. `<address></address><p id=1><p id=2></p><a>`,
  321. `p:first-of-type`,
  322. []string{
  323. `<p id="1"></p>`,
  324. },
  325. },
  326. {
  327. `<div><p id="1"></p><a></a></div><div><p id="2"></p></div>`,
  328. `p:only-child`,
  329. []string{
  330. `<p id="2"></p>`,
  331. },
  332. },
  333. {
  334. `<div><p id="1"></p><a></a></div><div><p id="2"></p><p id="3"></p></div>`,
  335. `p:only-of-type`,
  336. []string{
  337. `<p id="1"></p>`,
  338. },
  339. },
  340. {
  341. `<p id="1"><!-- --><p id="2">Hello<p id="3"><span>`,
  342. `:empty`,
  343. []string{
  344. `<head></head>`,
  345. `<p id="1"><!-- --></p>`,
  346. `<span></span>`,
  347. },
  348. },
  349. {
  350. `<div><p id="1"><table><tr><td><p id="2"></table></div><p id="3">`,
  351. `div p`,
  352. []string{
  353. `<p id="1"><table><tbody><tr><td><p id="2"></p></td></tr></tbody></table></p>`,
  354. `<p id="2"></p>`,
  355. },
  356. },
  357. {
  358. `<div><p id="1"><table><tr><td><p id="2"></table></div><p id="3">`,
  359. `div table p`,
  360. []string{
  361. `<p id="2"></p>`,
  362. },
  363. },
  364. {
  365. `<div><p id="1"><div><p id="2"></div><table><tr><td><p id="3"></table></div>`,
  366. `div > p`,
  367. []string{
  368. `<p id="1"></p>`,
  369. `<p id="2"></p>`,
  370. },
  371. },
  372. {
  373. `<p id="1"><p id="2"></p><address></address><p id="3">`,
  374. `p ~ p`,
  375. []string{
  376. `<p id="2"></p>`,
  377. `<p id="3"></p>`,
  378. },
  379. },
  380. {
  381. `<p id="1"></p>
  382. <!--comment-->
  383. <p id="2"></p><address></address><p id="3">`,
  384. `p + p`,
  385. []string{
  386. `<p id="2"></p>`,
  387. },
  388. },
  389. {
  390. `<ul><li></li><li></li></ul><p>`,
  391. `li, p`,
  392. []string{
  393. "<li></li>",
  394. "<li></li>",
  395. "<p></p>",
  396. },
  397. },
  398. {
  399. `<p id="1"><p id="2"></p><address></address><p id="3">`,
  400. `p +/*This is a comment*/ p`,
  401. []string{
  402. `<p id="2"></p>`,
  403. },
  404. },
  405. {
  406. `<p>Text block that <span>wraps inner text</span> and continues</p>`,
  407. `p:contains("that wraps")`,
  408. []string{
  409. `<p>Text block that <span>wraps inner text</span> and continues</p>`,
  410. },
  411. },
  412. {
  413. `<p>Text block that <span>wraps inner text</span> and continues</p>`,
  414. `p:containsOwn("that wraps")`,
  415. []string{},
  416. },
  417. {
  418. `<p>Text block that <span>wraps inner text</span> and continues</p>`,
  419. `:containsOwn("inner")`,
  420. []string{
  421. `<span>wraps inner text</span>`,
  422. },
  423. },
  424. {
  425. `<p>Text block that <span>wraps inner text</span> and continues</p>`,
  426. `p:containsOwn("block")`,
  427. []string{
  428. `<p>Text block that <span>wraps inner text</span> and continues</p>`,
  429. },
  430. },
  431. {
  432. `<div id="d1"><p id="p1"><span>text content</span></p></div><div id="d2"/>`,
  433. `div:has(#p1)`,
  434. []string{
  435. `<div id="d1"><p id="p1"><span>text content</span></p></div>`,
  436. },
  437. },
  438. {
  439. `<div id="d1"><p id="p1"><span>contents 1</span></p></div>
  440. <div id="d2"><p>contents <em>2</em></p></div>`,
  441. `div:has(:containsOwn("2"))`,
  442. []string{
  443. `<div id="d2"><p>contents <em>2</em></p></div>`,
  444. },
  445. },
  446. {
  447. `<body><div id="d1"><p id="p1"><span>contents 1</span></p></div>
  448. <div id="d2"><p id="p2">contents <em>2</em></p></div></body>`,
  449. `body :has(:containsOwn("2"))`,
  450. []string{
  451. `<div id="d2"><p id="p2">contents <em>2</em></p></div>`,
  452. `<p id="p2">contents <em>2</em></p>`,
  453. },
  454. },
  455. {
  456. `<body><div id="d1"><p id="p1"><span>contents 1</span></p></div>
  457. <div id="d2"><p id="p2">contents <em>2</em></p></div></body>`,
  458. `body :haschild(:containsOwn("2"))`,
  459. []string{
  460. `<p id="p2">contents <em>2</em></p>`,
  461. },
  462. },
  463. {
  464. `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
  465. `p:matches([\d])`,
  466. []string{
  467. `<p id="p1">0123456789</p>`,
  468. `<p id="p3">0123ABCD</p>`,
  469. },
  470. },
  471. {
  472. `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
  473. `p:matches([a-z])`,
  474. []string{
  475. `<p id="p2">abcdef</p>`,
  476. },
  477. },
  478. {
  479. `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
  480. `p:matches([a-zA-Z])`,
  481. []string{
  482. `<p id="p2">abcdef</p>`,
  483. `<p id="p3">0123ABCD</p>`,
  484. },
  485. },
  486. {
  487. `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
  488. `p:matches([^\d])`,
  489. []string{
  490. `<p id="p2">abcdef</p>`,
  491. `<p id="p3">0123ABCD</p>`,
  492. },
  493. },
  494. {
  495. `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
  496. `p:matches(^(0|a))`,
  497. []string{
  498. `<p id="p1">0123456789</p>`,
  499. `<p id="p2">abcdef</p>`,
  500. `<p id="p3">0123ABCD</p>`,
  501. },
  502. },
  503. {
  504. `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
  505. `p:matches(^\d+$)`,
  506. []string{
  507. `<p id="p1">0123456789</p>`,
  508. },
  509. },
  510. {
  511. `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
  512. `p:not(:matches(^\d+$))`,
  513. []string{
  514. `<p id="p2">abcdef</p>`,
  515. `<p id="p3">0123ABCD</p>`,
  516. },
  517. },
  518. {
  519. `<div><p id="p1">01234<em>567</em>89</p><div>`,
  520. `div :matchesOwn(^\d+$)`,
  521. []string{
  522. `<p id="p1">01234<em>567</em>89</p>`,
  523. `<em>567</em>`,
  524. },
  525. },
  526. {
  527. `<ul>
  528. <li><a id="a1" href="http://www.google.com/finance"></a>
  529. <li><a id="a2" href="http://finance.yahoo.com/"></a>
  530. <li><a id="a2" href="http://finance.untrusted.com/"/>
  531. <li><a id="a3" href="https://www.google.com/news"/>
  532. <li><a id="a4" href="http://news.yahoo.com"/>
  533. </ul>`,
  534. `[href#=(fina)]:not([href#=(\/\/[^\/]+untrusted)])`,
  535. []string{
  536. `<a id="a1" href="http://www.google.com/finance"></a>`,
  537. `<a id="a2" href="http://finance.yahoo.com/"></a>`,
  538. },
  539. },
  540. {
  541. `<ul>
  542. <li><a id="a1" href="http://www.google.com/finance"/>
  543. <li><a id="a2" href="http://finance.yahoo.com/"/>
  544. <li><a id="a3" href="https://www.google.com/news"></a>
  545. <li><a id="a4" href="http://news.yahoo.com"/>
  546. </ul>`,
  547. `[href#=(^https:\/\/[^\/]*\/?news)]`,
  548. []string{
  549. `<a id="a3" href="https://www.google.com/news"></a>`,
  550. },
  551. },
  552. {
  553. `<form>
  554. <label>Username <input type="text" name="username" /></label>
  555. <label>Password <input type="password" name="password" /></label>
  556. <label>Country
  557. <select name="country">
  558. <option value="ca">Canada</option>
  559. <option value="us">United States</option>
  560. </select>
  561. </label>
  562. <label>Bio <textarea name="bio"></textarea></label>
  563. <button>Sign up</button>
  564. </form>`,
  565. `:input`,
  566. []string{
  567. `<input type="text" name="username"/>`,
  568. `<input type="password" name="password"/>`,
  569. `<select name="country">
  570. <option value="ca">Canada</option>
  571. <option value="us">United States</option>
  572. </select>`,
  573. `<textarea name="bio"></textarea>`,
  574. `<button>Sign up</button>`,
  575. },
  576. },
  577. {
  578. `<html><head></head><body></body></html>`,
  579. ":root",
  580. []string{
  581. "<html><head></head><body></body></html>",
  582. },
  583. },
  584. {
  585. `<html><head></head><body></body></html>`,
  586. "*:root",
  587. []string{
  588. "<html><head></head><body></body></html>",
  589. },
  590. },
  591. {
  592. `<html><head></head><body></body></html>`,
  593. "*:root:first-child",
  594. []string{},
  595. },
  596. {
  597. `<html><head></head><body></body></html>`,
  598. "*:root:nth-child(1)",
  599. []string{},
  600. },
  601. {
  602. `<html><head></head><body><a href="http://www.foo.com"></a></body></html>`,
  603. "a:not(:root)",
  604. []string{
  605. `<a href="http://www.foo.com"></a>`,
  606. },
  607. },
  608. }
  609. func TestSelectors(t *testing.T) {
  610. for _, test := range selectorTests {
  611. s, err := Compile(test.selector)
  612. if err != nil {
  613. t.Errorf("error compiling %q: %s", test.selector, err)
  614. continue
  615. }
  616. doc, err := html.Parse(strings.NewReader(test.HTML))
  617. if err != nil {
  618. t.Errorf("error parsing %q: %s", test.HTML, err)
  619. continue
  620. }
  621. matches := s.MatchAll(doc)
  622. if len(matches) != len(test.results) {
  623. t.Errorf("selector %s wanted %d elements, got %d instead", test.selector, len(test.results), len(matches))
  624. continue
  625. }
  626. for i, m := range matches {
  627. got := nodeString(m)
  628. if got != test.results[i] {
  629. t.Errorf("selector %s wanted %s, got %s instead", test.selector, test.results[i], got)
  630. }
  631. }
  632. firstMatch := s.MatchFirst(doc)
  633. if len(test.results) == 0 {
  634. if firstMatch != nil {
  635. t.Errorf("MatchFirst: selector %s want nil, got %s", test.selector, nodeString(firstMatch))
  636. }
  637. } else {
  638. got := nodeString(firstMatch)
  639. if got != test.results[0] {
  640. t.Errorf("MatchFirst: selector %s want %s, got %s", test.selector, test.results[0], got)
  641. }
  642. }
  643. }
  644. }