diff --git a/cssselect/parser.py b/cssselect/parser.py index a27ece5..f1ccf98 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -257,6 +257,41 @@ def specificity(self): return a1 + a2, b1 + b2, c1 + c2 +class Relation(object): + """ + Represents selector:has(subselector) + """ + + def __init__(self, selector, combinator, subselector): + self.selector = selector + self.combinator = combinator + self.subselector = subselector + + def __repr__(self): + return "%s[%r:has(%r)]" % ( + self.__class__.__name__, + self.selector, + self.subselector, + ) + + def canonical(self): + try: + subsel = self.subselector[0].canonical() + except TypeError: + subsel = self.subselector.canonical() + if len(subsel) > 1: + subsel = subsel.lstrip("*") + return "%s:has(%s)" % (self.selector.canonical(), subsel) + + def specificity(self): + a1, b1, c1 = self.selector.specificity() + try: + a2, b2, c2 = self.subselector[-1].specificity() + except TypeError: + a2, b2, c2 = self.subselector.specificity() + return a1 + a2, b1 + b2, c1 + c2 + + class Matching(object): """ Represents selector:is(selector_list) @@ -582,6 +617,10 @@ def parse_simple_selector(stream, inside_negation=False): if next != ("DELIM", ")"): raise SelectorSyntaxError("Expected ')', got %s" % (next,)) result = Negation(result, argument) + elif ident.lower() == "has": + combinator, arguments = parse_relative_selector(stream) + result = Relation(result, combinator, arguments) + elif ident.lower() in ("matches", "is"): selectors = parse_simple_selector_arguments(stream) result = Matching(result, selectors) @@ -607,6 +646,29 @@ def parse_arguments(stream): raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) +def parse_relative_selector(stream): + stream.skip_whitespace() + subselector = "" + next = stream.next() + + if next in [("DELIM", "+"), ("DELIM", "-"), ("DELIM", ">"), ("DELIM", "~")]: + combinator = next + stream.skip_whitespace() + next = stream.next() + else: + combinator = Token("DELIM", " ", pos=0) + + while 1: + if next.type in ("IDENT", "STRING", "NUMBER") or next in [("DELIM", "."), ("DELIM", "*")]: + subselector += next.value + elif next == ("DELIM", ")"): + result = parse(subselector) + return combinator, result[0] + else: + raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) + next = stream.next() + + def parse_simple_selector_arguments(stream): arguments = [] while 1: diff --git a/cssselect/xpath.py b/cssselect/xpath.py index f80e629..9bdcb7f 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -14,6 +14,7 @@ import sys import re +import copy from cssselect.parser import parse, parse_series, SelectorError @@ -75,14 +76,21 @@ def add_star_prefix(self): """ self.path += "*/" - def join(self, combiner, other): + def join(self, combiner, other, closing_combiner=None, has_inner_condition=False): path = _unicode(self) + combiner # Any "star prefix" is redundant when joining. if other.path != "*/": path += other.path self.path = path - self.element = other.element - self.condition = other.condition + if not has_inner_condition: + self.element = other.element + closing_combiner if closing_combiner else other.element + self.condition = other.condition + else: + self.element = other.element + if other.condition: + self.element += "[" + other.condition + "]" + if closing_combiner: + self.element += closing_combiner return self @@ -275,6 +283,17 @@ def xpath_negation(self, negation): else: return xpath.add_condition("0") + def xpath_relation(self, relation): + xpath = self.xpath(relation.selector) + combinator = relation.combinator + subselector = relation.subselector + right = self.xpath(subselector.parsed_tree) + method = getattr( + self, + "xpath_relation_%s_combinator" % self.combinator_mapping[combinator.value], + ) + return method(xpath, right) + def xpath_matching(self, matching): xpath = self.xpath(matching.selector) exprs = [self.xpath(selector) for selector in matching.selector_list] @@ -376,6 +395,25 @@ def xpath_indirect_adjacent_combinator(self, left, right): """right is a sibling after left, immediately or not""" return left.join("/following-sibling::", right) + def xpath_relation_descendant_combinator(self, left, right): + """right is a child, grand-child or further descendant of left; select left""" + return left.join("[descendant::", right, closing_combiner="]", has_inner_condition=True) + + def xpath_relation_child_combinator(self, left, right): + """right is an immediate child of left; select left""" + return left.join("[./", right, closing_combiner="]") + + def xpath_relation_direct_adjacent_combinator(self, left, right): + """right is a sibling immediately after left; select left""" + xpath = left.add_condition( + "following-sibling::*[(name() = '{}') and (position() = 1)]".format(right.element) + ) + return xpath + + def xpath_relation_indirect_adjacent_combinator(self, left, right): + """right is a sibling after left, immediately or not; select left""" + return left.join("[following-sibling::", right, closing_combiner="]") + # Function: dispatch by function/pseudo-class name def xpath_nth_child_function(self, xpath, function, last=False, add_name_test=True): diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index ba46d8a..6c0f29a 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -148,6 +148,9 @@ def parse_many(first, *others): assert parse_many("div:not(div.foo)") == [ "Negation[Element[div]:not(Class[Element[div].foo])]" ] + assert parse_many("div:has(div.foo)") == [ + "Relation[Element[div]:has(Selector[Class[Element[div].foo]])]" + ] assert parse_many("div:is(.foo, #bar)") == [ "Matching[Element[div]:is(Class[Element[*].foo], Hash[Element[*]#bar])]" ] @@ -279,6 +282,11 @@ def specificity(css): assert specificity(":not(:empty)") == (0, 1, 0) assert specificity(":not(#foo)") == (1, 0, 0) + assert specificity(":has(*)") == (0, 0, 0) + assert specificity(":has(foo)") == (0, 0, 1) + assert specificity(":has(.foo)") == (0, 1, 0) + assert specificity(":has(> foo)") == (0, 0, 1) + assert specificity(":is(.foo, #bar)") == (1, 0, 0) assert specificity(":is(:hover, :visited)") == (0, 1, 0) @@ -315,6 +323,9 @@ def css2css(css, res=None): css2css(":not(*[foo])", ":not([foo])") css2css(":not(:empty)") css2css(":not(#foo)") + css2css(":has(*)") + css2css(":has(foo)") + css2css(":has(*.foo)", ":has(.foo)") css2css(":is(#bar, .foo)") css2css(":is(:focused, :visited)") css2css("foo:empty") @@ -379,6 +390,10 @@ def get_error(css): ) assert get_error("> div p") == ("Expected selector, got ' at 0>") + # Unsupported :has() with several arguments + assert get_error(":has(a, b)") == ("Expected an argument, got ") + assert get_error(":has()") == ("Expected selector, got ") + def test_translation(self): def xpath(css): return _unicode(GenericTranslator().css_to_xpath(css, prefix="")) @@ -453,6 +468,16 @@ def xpath(css): assert xpath("e:EmPTY") == ("e[not(*) and not(string-length())]") assert xpath("e:root") == ("e[not(parent::*)]") assert xpath("e:hover") == ("e[0]") # never matches + assert ( + xpath("div:has(bar.foo)") == "div[descendant::bar" + "[@class and contains(concat(' ', normalize-space(@class), ' '), ' foo ')]]" + ) + assert xpath("e:has(> f)") == "e[./f]" + assert xpath("e:has(f)") == "e[descendant::f]" + assert xpath("e:has(~ f)") == "e[following-sibling::f]" + assert ( + xpath("e:has(+ f)") == "e[following-sibling::*[(name() = 'f') and (position() = 1)]]" + ) assert xpath('e:contains("foo")') == ("e[contains(., 'foo')]") assert xpath("e:ConTains(foo)") == ("e[contains(., 'foo')]") assert xpath("e.warning") == ( @@ -863,6 +888,8 @@ def pcss(main, *selectors, **kwargs): "sixth-li", "seventh-li", ] + assert pcss("link:has(*)") == [] + assert pcss("ol:has(div)") == ["first-ol"] assert pcss(":is(#first-li, #second-li)") == ["first-li", "second-li"] assert pcss("a:is(#name-anchor, #tag-anchor)") == ["name-anchor", "tag-anchor"] assert pcss(":is(.c)") == ["first-ol", "third-li", "fourth-li"]