|
| 1 | +import pegs, strutils, ../xml |
| 2 | +from streams import newStringStream |
| 3 | +from strtabs import hasKey |
| 4 | + |
| 5 | + |
| 6 | +let |
| 7 | + attribute = r"[a-zA-Z][a-zA-Z0-9_\-]*" |
| 8 | + classes = r"{\.[a-zA-Z0-9_][a-zA-Z0-9_\-]*}" |
| 9 | + attributes = r"{\[" & attribute & r"\s*([\*\^\$\~]?\=\s*[\'""]?(\s*\ident\s*)+[\'""]?)?\]}" |
| 10 | + pselectors = peg(r"\s*{\ident}?({'#'\ident})? (" & classes & ")* " & attributes & "*") |
| 11 | + pattributes = peg(r"{\[{" & attribute & r"}\s*({[\*\^\$\~]?}\=\s*[\'""]?{(\s*\ident\s*)+}[\'""]?)?\]}") |
| 12 | + |
| 13 | +type |
| 14 | + Attribute = object |
| 15 | + name: string |
| 16 | + operator: char |
| 17 | + value: string |
| 18 | + |
| 19 | + Selector = object |
| 20 | + combinator: char |
| 21 | + tag: string |
| 22 | + id: string |
| 23 | + classes: seq[string] |
| 24 | + attributes: seq[Attribute] |
| 25 | + |
| 26 | + QueryContext = object |
| 27 | + root: seq[XmlNode] |
| 28 | + |
| 29 | +proc newSelector(tag, id = "", classes: seq[string] = @[], attributes: seq[Attribute] = @[]): Selector = |
| 30 | + result.combinator = ' ' |
| 31 | + result.tag = tag |
| 32 | + result.id = id |
| 33 | + result.classes = classes |
| 34 | + result.attributes = attributes |
| 35 | + |
| 36 | +proc initContext(root: seq[XmlNode]): QueryContext = |
| 37 | + result.root = root |
| 38 | + |
| 39 | +proc initContext(root: XmlNode): QueryContext = |
| 40 | + initContext(@[root]) |
| 41 | + |
| 42 | +proc newAttribute(n, o, v: string): Attribute = |
| 43 | + result.name = n |
| 44 | + |
| 45 | + if o.len != 0: |
| 46 | + result.operator = o[0] |
| 47 | + result.value = v |
| 48 | + |
| 49 | +proc q*(n: XmlNode): QueryContext = |
| 50 | + ## Init Q context from single parent node |
| 51 | + initContext(n) |
| 52 | + |
| 53 | +proc q*(n: seq[XmlNode]): QueryContext = |
| 54 | + ## Init Q context from parent nodes |
| 55 | + initContext(n) |
| 56 | + |
| 57 | +proc q*(xml: string): QueryContext = |
| 58 | + ## Init Q context from XML string |
| 59 | + |
| 60 | + var node = parseXml(xml) |
| 61 | + |
| 62 | + result = initContext(@[node]) |
| 63 | + |
| 64 | + |
| 65 | +proc match(n: XmlNode, s: Selector): bool = |
| 66 | + # match tag if tag specified |
| 67 | + result = s.tag == "" or s.tag == "*" or n.name == s.tag |
| 68 | + |
| 69 | + if result and s.id != "": |
| 70 | + result = n.attr("id") == s.id |
| 71 | + |
| 72 | + if result and s.classes.len > 0: |
| 73 | + for class in s.classes: |
| 74 | + result = n.attr("class") != "" and class in n.attr("class").split() |
| 75 | + |
| 76 | + if result and s.attributes.len > 0: |
| 77 | + for attr in s.attributes: |
| 78 | + let value = n.attr(attr.name) |
| 79 | + case attr.operator |
| 80 | + of '\0': |
| 81 | + if attr.value.len == 0: # [attr] match all node has specified attribute, dont care about the value |
| 82 | + result = n.attr(attr.name).len > 0 |
| 83 | + else: # [attr=value] value must match |
| 84 | + result = attr.value == value |
| 85 | + of '^': |
| 86 | + result = value.startsWith(attr.value) |
| 87 | + of '$': |
| 88 | + result = value.endsWith(attr.value) |
| 89 | + of '*': |
| 90 | + result = value.contains(attr.value) |
| 91 | + else: |
| 92 | + result = false |
| 93 | + |
| 94 | +proc searchSimple(parent: XmlNode, selector: Selector, found: var seq[XmlNode]) = |
| 95 | + for child in parent.children: |
| 96 | + if match(child, selector): |
| 97 | + found.add(child) |
| 98 | + if selector.combinator != '>': |
| 99 | + child.searchSimple(selector, found) |
| 100 | + |
| 101 | +proc searchSimple(parents: var seq[XmlNode], selector: Selector) = |
| 102 | + var found: seq[XmlNode] = @[] |
| 103 | + for p in parents: |
| 104 | + p.searchSimple(selector, found) |
| 105 | + |
| 106 | + parents = found |
| 107 | + |
| 108 | +proc searchCombined(parent: XmlNode, selectors: seq[Selector], found: var seq[XmlNode]) = |
| 109 | + var starts: seq[int] = @[0] |
| 110 | + var matches: seq[int] |
| 111 | + |
| 112 | + # matching selector by selector |
| 113 | + for i in 0..selectors.len-1: |
| 114 | + var selector = selectors[i] |
| 115 | + matches = @[] |
| 116 | + |
| 117 | + for j in starts: |
| 118 | + if parent.children.isNil: |
| 119 | + continue |
| 120 | + for k in j..parent.children.len-1: |
| 121 | + var child = parent.children[k] |
| 122 | + |
| 123 | + if match(child, selector): |
| 124 | + if i < selectors.len-1: |
| 125 | + # save current index for next search |
| 126 | + # next selector will only search for nodes followed by this node |
| 127 | + matches.add(k+1) |
| 128 | + else: |
| 129 | + # no more selector, return matches |
| 130 | + if not found.contains(child): |
| 131 | + found.add(child) |
| 132 | + if selector.combinator == '+': |
| 133 | + break |
| 134 | + starts = matches |
| 135 | + |
| 136 | +proc searchCombined(parents: var seq[XmlNode], selectors: seq[Selector]) = |
| 137 | + var found: seq[XmlNode] = @[] |
| 138 | + for p in parents: |
| 139 | + p.searchCombined(selectors, found) |
| 140 | + |
| 141 | + parents = found |
| 142 | + |
| 143 | +proc parseSelector(token: string): Selector = |
| 144 | + result = newSelector() |
| 145 | + # Universal selector |
| 146 | + if token == "*": |
| 147 | + result.tag = "*" |
| 148 | + # Type selector |
| 149 | + elif token =~ pselectors: |
| 150 | + for i in 0..matches.len-1: |
| 151 | + if matches[i].isNil: |
| 152 | + continue |
| 153 | + |
| 154 | + let ch = matches[i][0] |
| 155 | + case ch: |
| 156 | + of '#': |
| 157 | + matches[i].delete(0, 0) |
| 158 | + result.id = matches[i] |
| 159 | + of '.': |
| 160 | + matches[i].delete(0, 0) |
| 161 | + result.classes.add(matches[i]) |
| 162 | + of '[': |
| 163 | + if matches[i] =~ pattributes: |
| 164 | + result.attributes.add(newAttribute(matches[1], matches[2], matches[3])) |
| 165 | + else: |
| 166 | + result.tag = matches[i] |
| 167 | + else: |
| 168 | + discard |
| 169 | + |
| 170 | +proc select*(q: QueryContext, s: string = ""): seq[XmlNode] = |
| 171 | + ## Return list of nodes matched by CSS selector |
| 172 | + result = q.root |
| 173 | + |
| 174 | + if s.len == 0: |
| 175 | + return result |
| 176 | + |
| 177 | + var nextCombinator, nextToken: string |
| 178 | + var tokens = s.split() |
| 179 | + var selectors: seq[Selector] |
| 180 | + for pos in 0..tokens.len-1: |
| 181 | + var isSimple = true |
| 182 | + |
| 183 | + if pos > 0 and (tokens[pos-1] == "+" or tokens[pos-1] == "~"): |
| 184 | + continue |
| 185 | + |
| 186 | + if tokens[pos] in [">", "~", "+"]: # ignore combinators |
| 187 | + continue |
| 188 | + |
| 189 | + var selector = parseSelector(tokens[pos]) |
| 190 | + if pos > 0 and tokens[pos-1] == ">": |
| 191 | + selector.combinator = '>' |
| 192 | + |
| 193 | + selectors = @[selector] |
| 194 | + |
| 195 | + var i = 1 |
| 196 | + while true: |
| 197 | + if pos + i >= tokens.len: |
| 198 | + break |
| 199 | + nextCombinator = tokens[pos+i] |
| 200 | + # if next token is a sibling combinator |
| 201 | + if nextCombinator == "+" or nextCombinator == "~": |
| 202 | + if pos + i + 1 >= tokens.len: |
| 203 | + raise newException(ValueError, "a selector expected after sibling combinator: " & nextCombinator) |
| 204 | + else: |
| 205 | + break |
| 206 | + |
| 207 | + isSimple = false |
| 208 | + |
| 209 | + nextToken = tokens[pos+i+1] |
| 210 | + i += 2 |
| 211 | + |
| 212 | + var tmp = parseSelector(nextToken) |
| 213 | + tmp.combinator = nextCombinator[0] |
| 214 | + selectors.add(tmp) |
| 215 | + |
| 216 | + if isSimple: |
| 217 | + result.searchSimple(selectors[0]) |
| 218 | + else: |
| 219 | + result.searchCombined(selectors) |
| 220 | + |
| 221 | +proc select*(n: XmlNode, s: string = ""): seq[XmlNode] {.inline.} = |
| 222 | + q(n).select(s) |
0 commit comments