apetest.referrer module

Models links between documents.

LinkSet models links such as <a href= HTML tags. Redirect models a redirection from one URL to another. Form models a form that can generate requests for other documents when submitted.

Source code
# SPDX-License-Identifier: BSD-3-Clause

"""Models links between documents.

`LinkSet` models links such as `<a href=` HTML tags.
`Redirect` models a redirection from one URL to another.
`Form` models a form that can generate requests for other documents
when submitted.
"""

from apetest.request import Request

class Referrer:
    """Models an entity that can generate requests.
    Examples of Referrers are hyperlinks, which generate one fixed request, and
    forms, which generate many requests.
    """

    def __init__(self, page_url):
        """Initialize the referrer's `page_url` from the argument and
        sets the `method` to `'get'`.
        """

        self.page_url = page_url
        """The URL of the page linked to, without query."""

        self.method = 'get'
        """The HTTP method to be used for generated requests, in lower case.

        Only `'get'` and `'post'` are currently supported.
        """

    def has_request(self, request):
        """Returns `True` iff this referrer could generate `request`.

        Note that it is possible for this function to return `True`
        even though the request in question is not generated by
        `Referrer.iter_requests`.
        """
        raise NotImplementedError

    def iter_requests(self):
        """Iterates through requests generated by this referrer.

        The requests should be returned in an order such that there is
        a lot of variation between subsequent requests. This improves
        the test coverage when we only have time to try the first N requests.
        The iteration is not expected to return every possible request that
        can be generated by this referrer, just a number of interesting ones.
        Note that free inputs such as text fields have an infinite value set,
        so it would not even be possible to generate everything.
        """
        raise NotImplementedError

class Redirect(Referrer):
    """An HTTP redirect from one URL to another."""

    def __init__(self, target):
        """Initialize a redirection to `target`."""
        Referrer.__init__(self, target.page_url)

        self.target = target
        """The `apetest.request.Request` that a user agent is redirected to."""

    def has_request(self, request):
        return request == self.target

    def iter_requests(self):
        yield self.target

class LinkSet(Referrer):
    """One or more links to different queries of the same page."""

    def __init__(self):
        """Initialize an empty set of links.

        `Referrer.page_url` will be `None` until links are added.
        """
        Referrer.__init__(self, None) # page_url is set later

        self.links = set()
        """The links (`apetest.request.Request` objects) that were added."""

    def add(self, request):
        """Adds a linked `apetest.request.Request` to this set.

        All links in the set must point to the same page:
        the URL without the query must be equal.
        """

        if self.page_url is None:
            self.page_url = request.page_url
        else:
            assert self.page_url == request.page_url
        self.links.add(request)

    def has_request(self, request):
        return request in self.links

    def iter_requests(self):
        return iter(self.links)

class Form(Referrer):
    """An HTML form that can generate a request upon submission."""

    def __init__(self, page_url, method, controls):
        """Initialize form with the given `page_url` (action), `method`
        (`'get'` or `'post'`) and `controls` (input elements).
        """
        assert method in ('get', 'post')
        Referrer.__init__(self, page_url)
        self.method = method
        self.controls = controls
        """Collection of `apetest.control.Control` objects that represent
        user interface input elements that contribute to the submitted
        query.
        """

        # Gather all alternatives in a single data structure.
        # TODO: We could improve the chances of submitting valid free input by
        #       looking at what other pages fill in and the default value.
        #       For example, if we see numbers being filled in, it could be
        #       a numeric field. This feature would require the set of
        #       alternatives to change as we gather more hints about the
        #       control.
        self.all_alternatives = all_alternatives = []
        self.base_query = base_query = []
        combinations = 1
        for control in controls:
            # Filter out duplicates.
            alternatives = set(
                (None, None) if name is None or value is None
                else (name, value)
                for name, value in control.alternatives()
                )
            if len(alternatives) == 1:
                # If there is only once choice, make that choice now.
                name, value = alternatives.pop()
                if name is not None and value is not None:
                    base_query.append((name, value))
            elif alternatives:
                # Store possible choices.
                all_alternatives.append(tuple(alternatives))
                combinations *= len(alternatives)

        self.combinations = combinations

        print('non-alternatives:', base_query)
        print('alternatives:', all_alternatives)
        print('combinations:', combinations)

    def has_request(self, request):
        # Check if page matches.
        if self.page_url != request.page_url:
            return False

        # We can never produce more pairs than we have controls.
        if len(request.query) > len(self.controls):
            return False

        # Create a matrix of bools that stores which control and pair
        # combinations are possible.
        produces_pairs = [
            [control.has_alternative(name, value)
             for name, value in request.query]
            for control in self.controls
            ]
        # Store which controls can be omitted from the submission.
        produces_empty = [
            control.has_alternative(None, None)
            for control in self.controls
            ]

        class MatrixIterator:
            """Base class for ControlIterator and PairIterator.
            """
            # pylint: disable=missing-docstring
            def __init__(self):
                self._index = self._get_length()
                self._changed = False
            def __iter__(self):
                return self
            def _get_length(self):
                raise NotImplementedError
            def _value_at(self, index):
                raise NotImplementedError
            def is_changed(self):
                return self._changed
            def remove_control(self, control_index):
                del produces_pairs[control_index]
                del produces_empty[control_index]
                self._changed = True
            def remove_pair(self, pair_index):
                for produced in produces_pairs:
                    del produced[pair_index]
                self._changed = True
            def __next__(self):
                index = self._index
                if index == 0:
                    raise StopIteration
                index -= 1
                self._index = index
                return self._value_at(index)

        class ControlIterator(MatrixIterator):
            """Iterates through produces_pairs and produces_empty one control
            at a time, offering methods to remove controls and pairs without
            disturbing the iteration in progress.
            """
            def remove_control(self): # pylint: disable=arguments-differ
                MatrixIterator.remove_control(self, self._index)
            def _get_length(self):
                return len(produces_pairs)
            def _value_at(self, index):
                return produces_pairs[index], produces_empty[index]

        class PairIterator(MatrixIterator):
            """Iterates through produces_pairs one pair at a time, offering
            methods to remove controls and pairs without disturbing the
            iteration in progress.
            """
            def remove_pair(self): # pylint: disable=arguments-differ
                MatrixIterator.remove_pair(self, self._index)
            def _get_length(self):
                return len(produces_pairs[0]) if produces_pairs else 0
            def _value_at(self, index):
                return [
                    controlIndex
                    for controlIndex, produces in enumerate(produces_pairs)
                    if produces[index]
                    ]

        # Simplify the matrix as much as possible.
        # We go through a lot of trouble to minimize the size of the matrix,
        # since the generic solver has an exponential run time.
        while True:
            # Check whether each control can produce something that matches the
            # request query.
            control_it = ControlIterator()
            for produced, empty in control_it:
                num_pairs = produced.count(True)
                if num_pairs == 0:
                    # None of the pairs this control can produce are part of
                    # the request...
                    if empty:
                        # ...but it can produce nothing.
                        control_it.remove_control()
                    else:
                        # ...and it must produce something.
                        return False
                elif num_pairs == 1 and not empty:
                    # Exactly one pair that this control can produce is part of
                    # the request and it must produce something.
                    control_it.remove_control()
                    control_it.remove_pair(produced.index(True))

            # We can never produce more pairs than we have controls.
            # Check again now that the list of controls is possibly shorter.
            if produces_pairs and len(produces_pairs[0]) > len(produces_pairs):
                return False

            # Resolve request pairs that can only be produced by a single
            # control.
            pair_it = PairIterator()
            for producers in pair_it:
                num_controls = len(producers)
                if num_controls == 0:
                    # No control can produce this name-value pair.
                    return False
                elif num_controls == 1:
                    # Exactly one control can produce this name-value pair.
                    pair_it.remove_control(producers[0])
                    pair_it.remove_pair()

            if not (control_it.is_changed() or pair_it.is_changed()):
                # No more simplifications possible.
                break

        # For most forms, there will be nothing left after eliminating the
        # simple cases, because the control names do not overlap.
        if not produces_pairs:
            return True

        # Recursively try to find a solution.
        # We don't have to know the solution, only check its existence.
        remaining_pairs = list(range(len(produces_pairs[0])))
        def solve_recursive(control_index):
            if control_index == len(produces_pairs):
                return True
            produced = produces_pairs[control_index]
            # Note that remaining_pairs contains the same elements (not
            # necessarily in the same order) when this function returns
            # False; if it returns True then the contents of remaining_pairs
            # don't matter anymore.
            pair_index = remaining_pairs.pop()
            switch_index = 0
            while True:
                if produced[pair_index]:
                    if solve_recursive(control_index + 1):
                        return True
                if switch_index == len(remaining_pairs):
                    break
                remaining_pairs[switch_index], pair_index = \
                    pair_index, remaining_pairs[switch_index]
                switch_index += 1
            remaining_pairs.append(pair_index)
            return False
        return solve_recursive(0)

    def iter_requests(self):
        progress = 0
        # TODO: Design an algorithm to compute a good increment.
        increment = 83
        while True:
            index = progress
            query = []
            for alternatives in self.all_alternatives:
                index, choice = divmod(index, len(alternatives))
                alternative = alternatives[choice]
                if alternative[0] is not None:
                    query.append(alternative)
            yield Request(
                self.page_url, self.base_query + query, maybe_bad=True
                )
            progress = (progress + increment) % self.combinations
            if progress == 0:
                # Because increment and combinations are relatively prime,
                # reaching 0 means we've visited all indices.
                break}

Classes

class Form (ancestors: Referrer)

An HTML form that can generate a request upon submission.

Source code
class Form(Referrer):
    """An HTML form that can generate a request upon submission."""

    def __init__(self, page_url, method, controls):
        """Initialize form with the given `page_url` (action), `method`
        (`'get'` or `'post'`) and `controls` (input elements).
        """
        assert method in ('get', 'post')
        Referrer.__init__(self, page_url)
        self.method = method
        self.controls = controls
        """Collection of `apetest.control.Control` objects that represent
        user interface input elements that contribute to the submitted
        query.
        """

        # Gather all alternatives in a single data structure.
        # TODO: We could improve the chances of submitting valid free input by
        #       looking at what other pages fill in and the default value.
        #       For example, if we see numbers being filled in, it could be
        #       a numeric field. This feature would require the set of
        #       alternatives to change as we gather more hints about the
        #       control.
        self.all_alternatives = all_alternatives = []
        self.base_query = base_query = []
        combinations = 1
        for control in controls:
            # Filter out duplicates.
            alternatives = set(
                (None, None) if name is None or value is None
                else (name, value)
                for name, value in control.alternatives()
                )
            if len(alternatives) == 1:
                # If there is only once choice, make that choice now.
                name, value = alternatives.pop()
                if name is not None and value is not None:
                    base_query.append((name, value))
            elif alternatives:
                # Store possible choices.
                all_alternatives.append(tuple(alternatives))
                combinations *= len(alternatives)

        self.combinations = combinations

        print('non-alternatives:', base_query)
        print('alternatives:', all_alternatives)
        print('combinations:', combinations)

    def has_request(self, request):
        # Check if page matches.
        if self.page_url != request.page_url:
            return False

        # We can never produce more pairs than we have controls.
        if len(request.query) > len(self.controls):
            return False

        # Create a matrix of bools that stores which control and pair
        # combinations are possible.
        produces_pairs = [
            [control.has_alternative(name, value)
             for name, value in request.query]
            for control in self.controls
            ]
        # Store which controls can be omitted from the submission.
        produces_empty = [
            control.has_alternative(None, None)
            for control in self.controls
            ]

        class MatrixIterator:
            """Base class for ControlIterator and PairIterator.
            """
            # pylint: disable=missing-docstring
            def __init__(self):
                self._index = self._get_length()
                self._changed = False
            def __iter__(self):
                return self
            def _get_length(self):
                raise NotImplementedError
            def _value_at(self, index):
                raise NotImplementedError
            def is_changed(self):
                return self._changed
            def remove_control(self, control_index):
                del produces_pairs[control_index]
                del produces_empty[control_index]
                self._changed = True
            def remove_pair(self, pair_index):
                for produced in produces_pairs:
                    del produced[pair_index]
                self._changed = True
            def __next__(self):
                index = self._index
                if index == 0:
                    raise StopIteration
                index -= 1
                self._index = index
                return self._value_at(index)

        class ControlIterator(MatrixIterator):
            """Iterates through produces_pairs and produces_empty one control
            at a time, offering methods to remove controls and pairs without
            disturbing the iteration in progress.
            """
            def remove_control(self): # pylint: disable=arguments-differ
                MatrixIterator.remove_control(self, self._index)
            def _get_length(self):
                return len(produces_pairs)
            def _value_at(self, index):
                return produces_pairs[index], produces_empty[index]

        class PairIterator(MatrixIterator):
            """Iterates through produces_pairs one pair at a time, offering
            methods to remove controls and pairs without disturbing the
            iteration in progress.
            """
            def remove_pair(self): # pylint: disable=arguments-differ
                MatrixIterator.remove_pair(self, self._index)
            def _get_length(self):
                return len(produces_pairs[0]) if produces_pairs else 0
            def _value_at(self, index):
                return [
                    controlIndex
                    for controlIndex, produces in enumerate(produces_pairs)
                    if produces[index]
                    ]

        # Simplify the matrix as much as possible.
        # We go through a lot of trouble to minimize the size of the matrix,
        # since the generic solver has an exponential run time.
        while True:
            # Check whether each control can produce something that matches the
            # request query.
            control_it = ControlIterator()
            for produced, empty in control_it:
                num_pairs = produced.count(True)
                if num_pairs == 0:
                    # None of the pairs this control can produce are part of
                    # the request...
                    if empty:
                        # ...but it can produce nothing.
                        control_it.remove_control()
                    else:
                        # ...and it must produce something.
                        return False
                elif num_pairs == 1 and not empty:
                    # Exactly one pair that this control can produce is part of
                    # the request and it must produce something.
                    control_it.remove_control()
                    control_it.remove_pair(produced.index(True))

            # We can never produce more pairs than we have controls.
            # Check again now that the list of controls is possibly shorter.
            if produces_pairs and len(produces_pairs[0]) > len(produces_pairs):
                return False

            # Resolve request pairs that can only be produced by a single
            # control.
            pair_it = PairIterator()
            for producers in pair_it:
                num_controls = len(producers)
                if num_controls == 0:
                    # No control can produce this name-value pair.
                    return False
                elif num_controls == 1:
                    # Exactly one control can produce this name-value pair.
                    pair_it.remove_control(producers[0])
                    pair_it.remove_pair()

            if not (control_it.is_changed() or pair_it.is_changed()):
                # No more simplifications possible.
                break

        # For most forms, there will be nothing left after eliminating the
        # simple cases, because the control names do not overlap.
        if not produces_pairs:
            return True

        # Recursively try to find a solution.
        # We don't have to know the solution, only check its existence.
        remaining_pairs = list(range(len(produces_pairs[0])))
        def solve_recursive(control_index):
            if control_index == len(produces_pairs):
                return True
            produced = produces_pairs[control_index]
            # Note that remaining_pairs contains the same elements (not
            # necessarily in the same order) when this function returns
            # False; if it returns True then the contents of remaining_pairs
            # don't matter anymore.
            pair_index = remaining_pairs.pop()
            switch_index = 0
            while True:
                if produced[pair_index]:
                    if solve_recursive(control_index + 1):
                        return True
                if switch_index == len(remaining_pairs):
                    break
                remaining_pairs[switch_index], pair_index = \
                    pair_index, remaining_pairs[switch_index]
                switch_index += 1
            remaining_pairs.append(pair_index)
            return False
        return solve_recursive(0)

    def iter_requests(self):
        progress = 0
        # TODO: Design an algorithm to compute a good increment.
        increment = 83
        while True:
            index = progress
            query = []
            for alternatives in self.all_alternatives:
                index, choice = divmod(index, len(alternatives))
                alternative = alternatives[choice]
                if alternative[0] is not None:
                    query.append(alternative)
            yield Request(
                self.page_url, self.base_query + query, maybe_bad=True
                )
            progress = (progress + increment) % self.combinations
            if progress == 0:
                # Because increment and combinations are relatively prime,
                # reaching 0 means we've visited all indices.
                break}

Instance variables

var controls

Collection of Control objects that represent user interface input elements that contribute to the submitted query.

Methods

def __init__(self, page_url, method, controls)

Initialize form with the given page_url (action), method ('get' or 'post') and controls (input elements).

Source code
def __init__(self, page_url, method, controls):
    """Initialize form with the given `page_url` (action), `method`
    (`'get'` or `'post'`) and `controls` (input elements).
    """
    assert method in ('get', 'post')
    Referrer.__init__(self, page_url)
    self.method = method
    self.controls = controls
    """Collection of `apetest.control.Control` objects that represent
    user interface input elements that contribute to the submitted
    query.
    """

    # Gather all alternatives in a single data structure.
    # TODO: We could improve the chances of submitting valid free input by
    #       looking at what other pages fill in and the default value.
    #       For example, if we see numbers being filled in, it could be
    #       a numeric field. This feature would require the set of
    #       alternatives to change as we gather more hints about the
    #       control.
    self.all_alternatives = all_alternatives = []
    self.base_query = base_query = []
    combinations = 1
    for control in controls:
        # Filter out duplicates.
        alternatives = set(
            (None, None) if name is None or value is None
            else (name, value)
            for name, value in control.alternatives()
            )
        if len(alternatives) == 1:
            # If there is only once choice, make that choice now.
            name, value = alternatives.pop()
            if name is not None and value is not None:
                base_query.append((name, value))
        elif alternatives:
            # Store possible choices.
            all_alternatives.append(tuple(alternatives))
            combinations *= len(alternatives)

    self.combinations = combinations

    print('non-alternatives:', base_query)
    print('alternatives:', all_alternatives)
    print('combinations:', combinations)}

Inherited members

class LinkSet (ancestors: Referrer)

One or more links to different queries of the same page.

Source code
class LinkSet(Referrer):
    """One or more links to different queries of the same page."""

    def __init__(self):
        """Initialize an empty set of links.

        `Referrer.page_url` will be `None` until links are added.
        """
        Referrer.__init__(self, None) # page_url is set later

        self.links = set()
        """The links (`apetest.request.Request` objects) that were added."""

    def add(self, request):
        """Adds a linked `apetest.request.Request` to this set.

        All links in the set must point to the same page:
        the URL without the query must be equal.
        """

        if self.page_url is None:
            self.page_url = request.page_url
        else:
            assert self.page_url == request.page_url
        self.links.add(request)

    def has_request(self, request):
        return request in self.links

    def iter_requests(self):
        return iter(self.links)}

Instance variables

The links (Request objects) that were added.

Methods

def __init__(self)

Initialize an empty set of links.

Referrer.page_url will be None until links are added.

Source code
def __init__(self):
    """Initialize an empty set of links.

    `Referrer.page_url` will be `None` until links are added.
    """
    Referrer.__init__(self, None) # page_url is set later

    self.links = set()
    """The links (`apetest.request.Request` objects) that were added."""}
def add(self, request)

Adds a linked Request to this set.

All links in the set must point to the same page: the URL without the query must be equal.

Source code
def add(self, request):
    """Adds a linked `apetest.request.Request` to this set.

    All links in the set must point to the same page:
    the URL without the query must be equal.
    """

    if self.page_url is None:
        self.page_url = request.page_url
    else:
        assert self.page_url == request.page_url
    self.links.add(request)}

Inherited members

class Redirect (ancestors: Referrer)

An HTTP redirect from one URL to another.

Source code
class Redirect(Referrer):
    """An HTTP redirect from one URL to another."""

    def __init__(self, target):
        """Initialize a redirection to `target`."""
        Referrer.__init__(self, target.page_url)

        self.target = target
        """The `apetest.request.Request` that a user agent is redirected to."""

    def has_request(self, request):
        return request == self.target

    def iter_requests(self):
        yield self.target}

Instance variables

var target

The Request that a user agent is redirected to.

Methods

def __init__(self, target)

Initialize a redirection to target.

Source code
def __init__(self, target):
    """Initialize a redirection to `target`."""
    Referrer.__init__(self, target.page_url)

    self.target = target
    """The `apetest.request.Request` that a user agent is redirected to."""}

Inherited members

class Referrer

Models an entity that can generate requests. Examples of Referrers are hyperlinks, which generate one fixed request, and forms, which generate many requests.

Source code
class Referrer:
    """Models an entity that can generate requests.
    Examples of Referrers are hyperlinks, which generate one fixed request, and
    forms, which generate many requests.
    """

    def __init__(self, page_url):
        """Initialize the referrer's `page_url` from the argument and
        sets the `method` to `'get'`.
        """

        self.page_url = page_url
        """The URL of the page linked to, without query."""

        self.method = 'get'
        """The HTTP method to be used for generated requests, in lower case.

        Only `'get'` and `'post'` are currently supported.
        """

    def has_request(self, request):
        """Returns `True` iff this referrer could generate `request`.

        Note that it is possible for this function to return `True`
        even though the request in question is not generated by
        `Referrer.iter_requests`.
        """
        raise NotImplementedError

    def iter_requests(self):
        """Iterates through requests generated by this referrer.

        The requests should be returned in an order such that there is
        a lot of variation between subsequent requests. This improves
        the test coverage when we only have time to try the first N requests.
        The iteration is not expected to return every possible request that
        can be generated by this referrer, just a number of interesting ones.
        Note that free inputs such as text fields have an infinite value set,
        so it would not even be possible to generate everything.
        """
        raise NotImplementedError}

Subclasses

Instance variables

var method

The HTTP method to be used for generated requests, in lower case.

Only 'get' and 'post' are currently supported.

var page_url

The URL of the page linked to, without query.

Methods

def __init__(self, page_url)

Initialize the referrer's page_url from the argument and sets the method to 'get'.

Source code
def __init__(self, page_url):
    """Initialize the referrer's `page_url` from the argument and
    sets the `method` to `'get'`.
    """

    self.page_url = page_url
    """The URL of the page linked to, without query."""

    self.method = 'get'
    """The HTTP method to be used for generated requests, in lower case.

    Only `'get'` and `'post'` are currently supported.
    """}
def has_request(self, request)

Returns True iff this referrer could generate request.

Note that it is possible for this function to return True even though the request in question is not generated by Referrer.iter_requests().

Source code
def has_request(self, request):
    """Returns `True` iff this referrer could generate `request`.

    Note that it is possible for this function to return `True`
    even though the request in question is not generated by
    `Referrer.iter_requests`.
    """
    raise NotImplementedError}
def iter_requests(self)

Iterates through requests generated by this referrer.

The requests should be returned in an order such that there is a lot of variation between subsequent requests. This improves the test coverage when we only have time to try the first N requests. The iteration is not expected to return every possible request that can be generated by this referrer, just a number of interesting ones. Note that free inputs such as text fields have an infinite value set, so it would not even be possible to generate everything.

Source code
def iter_requests(self):
    """Iterates through requests generated by this referrer.

    The requests should be returned in an order such that there is
    a lot of variation between subsequent requests. This improves
    the test coverage when we only have time to try the first N requests.
    The iteration is not expected to return every possible request that
    can be generated by this referrer, just a number of interesting ones.
    Note that free inputs such as text fields have an infinite value set,
    so it would not even be possible to generate everything.
    """
    raise NotImplementedError}