apetest.referrer
module
Models links between documents.
LinkSet
models links such as <a href=
HTML tags.
Redirect
models a redirection from one URL to another.
Form
models a form that can generate requests for other documents
when submitted.
Source code
# SPDX-License-Identifier: BSD-3-Clause
"""Models links between documents.
`LinkSet` models links such as `<a href=` HTML tags.
`Redirect` models a redirection from one URL to another.
`Form` models a form that can generate requests for other documents
when submitted.
"""
from apetest.request import Request
class Referrer:
"""Models an entity that can generate requests.
Examples of Referrers are hyperlinks, which generate one fixed request, and
forms, which generate many requests.
"""
def __init__(self, page_url):
"""Initialize the referrer's `page_url` from the argument and
sets the `method` to `'get'`.
"""
self.page_url = page_url
"""The URL of the page linked to, without query."""
self.method = 'get'
"""The HTTP method to be used for generated requests, in lower case.
Only `'get'` and `'post'` are currently supported.
"""
def has_request(self, request):
"""Returns `True` iff this referrer could generate `request`.
Note that it is possible for this function to return `True`
even though the request in question is not generated by
`Referrer.iter_requests`.
"""
raise NotImplementedError
def iter_requests(self):
"""Iterates through requests generated by this referrer.
The requests should be returned in an order such that there is
a lot of variation between subsequent requests. This improves
the test coverage when we only have time to try the first N requests.
The iteration is not expected to return every possible request that
can be generated by this referrer, just a number of interesting ones.
Note that free inputs such as text fields have an infinite value set,
so it would not even be possible to generate everything.
"""
raise NotImplementedError
class Redirect(Referrer):
"""An HTTP redirect from one URL to another."""
def __init__(self, target):
"""Initialize a redirection to `target`."""
Referrer.__init__(self, target.page_url)
self.target = target
"""The `apetest.request.Request` that a user agent is redirected to."""
def has_request(self, request):
return request == self.target
def iter_requests(self):
yield self.target
class LinkSet(Referrer):
"""One or more links to different queries of the same page."""
def __init__(self):
"""Initialize an empty set of links.
`Referrer.page_url` will be `None` until links are added.
"""
Referrer.__init__(self, None) # page_url is set later
self.links = set()
"""The links (`apetest.request.Request` objects) that were added."""
def add(self, request):
"""Adds a linked `apetest.request.Request` to this set.
All links in the set must point to the same page:
the URL without the query must be equal.
"""
if self.page_url is None:
self.page_url = request.page_url
else:
assert self.page_url == request.page_url
self.links.add(request)
def has_request(self, request):
return request in self.links
def iter_requests(self):
return iter(self.links)
class Form(Referrer):
"""An HTML form that can generate a request upon submission."""
def __init__(self, page_url, method, controls):
"""Initialize form with the given `page_url` (action), `method`
(`'get'` or `'post'`) and `controls` (input elements).
"""
assert method in ('get', 'post')
Referrer.__init__(self, page_url)
self.method = method
self.controls = controls
"""Collection of `apetest.control.Control` objects that represent
user interface input elements that contribute to the submitted
query.
"""
# Gather all alternatives in a single data structure.
# TODO: We could improve the chances of submitting valid free input by
# looking at what other pages fill in and the default value.
# For example, if we see numbers being filled in, it could be
# a numeric field. This feature would require the set of
# alternatives to change as we gather more hints about the
# control.
self.all_alternatives = all_alternatives = []
self.base_query = base_query = []
combinations = 1
for control in controls:
# Filter out duplicates.
alternatives = set(
(None, None) if name is None or value is None
else (name, value)
for name, value in control.alternatives()
)
if len(alternatives) == 1:
# If there is only once choice, make that choice now.
name, value = alternatives.pop()
if name is not None and value is not None:
base_query.append((name, value))
elif alternatives:
# Store possible choices.
all_alternatives.append(tuple(alternatives))
combinations *= len(alternatives)
self.combinations = combinations
print('non-alternatives:', base_query)
print('alternatives:', all_alternatives)
print('combinations:', combinations)
def has_request(self, request):
# Check if page matches.
if self.page_url != request.page_url:
return False
# We can never produce more pairs than we have controls.
if len(request.query) > len(self.controls):
return False
# Create a matrix of bools that stores which control and pair
# combinations are possible.
produces_pairs = [
[control.has_alternative(name, value)
for name, value in request.query]
for control in self.controls
]
# Store which controls can be omitted from the submission.
produces_empty = [
control.has_alternative(None, None)
for control in self.controls
]
class MatrixIterator:
"""Base class for ControlIterator and PairIterator.
"""
# pylint: disable=missing-docstring
def __init__(self):
self._index = self._get_length()
self._changed = False
def __iter__(self):
return self
def _get_length(self):
raise NotImplementedError
def _value_at(self, index):
raise NotImplementedError
def is_changed(self):
return self._changed
def remove_control(self, control_index):
del produces_pairs[control_index]
del produces_empty[control_index]
self._changed = True
def remove_pair(self, pair_index):
for produced in produces_pairs:
del produced[pair_index]
self._changed = True
def __next__(self):
index = self._index
if index == 0:
raise StopIteration
index -= 1
self._index = index
return self._value_at(index)
class ControlIterator(MatrixIterator):
"""Iterates through produces_pairs and produces_empty one control
at a time, offering methods to remove controls and pairs without
disturbing the iteration in progress.
"""
def remove_control(self): # pylint: disable=arguments-differ
MatrixIterator.remove_control(self, self._index)
def _get_length(self):
return len(produces_pairs)
def _value_at(self, index):
return produces_pairs[index], produces_empty[index]
class PairIterator(MatrixIterator):
"""Iterates through produces_pairs one pair at a time, offering
methods to remove controls and pairs without disturbing the
iteration in progress.
"""
def remove_pair(self): # pylint: disable=arguments-differ
MatrixIterator.remove_pair(self, self._index)
def _get_length(self):
return len(produces_pairs[0]) if produces_pairs else 0
def _value_at(self, index):
return [
controlIndex
for controlIndex, produces in enumerate(produces_pairs)
if produces[index]
]
# Simplify the matrix as much as possible.
# We go through a lot of trouble to minimize the size of the matrix,
# since the generic solver has an exponential run time.
while True:
# Check whether each control can produce something that matches the
# request query.
control_it = ControlIterator()
for produced, empty in control_it:
num_pairs = produced.count(True)
if num_pairs == 0:
# None of the pairs this control can produce are part of
# the request...
if empty:
# ...but it can produce nothing.
control_it.remove_control()
else:
# ...and it must produce something.
return False
elif num_pairs == 1 and not empty:
# Exactly one pair that this control can produce is part of
# the request and it must produce something.
control_it.remove_control()
control_it.remove_pair(produced.index(True))
# We can never produce more pairs than we have controls.
# Check again now that the list of controls is possibly shorter.
if produces_pairs and len(produces_pairs[0]) > len(produces_pairs):
return False
# Resolve request pairs that can only be produced by a single
# control.
pair_it = PairIterator()
for producers in pair_it:
num_controls = len(producers)
if num_controls == 0:
# No control can produce this name-value pair.
return False
elif num_controls == 1:
# Exactly one control can produce this name-value pair.
pair_it.remove_control(producers[0])
pair_it.remove_pair()
if not (control_it.is_changed() or pair_it.is_changed()):
# No more simplifications possible.
break
# For most forms, there will be nothing left after eliminating the
# simple cases, because the control names do not overlap.
if not produces_pairs:
return True
# Recursively try to find a solution.
# We don't have to know the solution, only check its existence.
remaining_pairs = list(range(len(produces_pairs[0])))
def solve_recursive(control_index):
if control_index == len(produces_pairs):
return True
produced = produces_pairs[control_index]
# Note that remaining_pairs contains the same elements (not
# necessarily in the same order) when this function returns
# False; if it returns True then the contents of remaining_pairs
# don't matter anymore.
pair_index = remaining_pairs.pop()
switch_index = 0
while True:
if produced[pair_index]:
if solve_recursive(control_index + 1):
return True
if switch_index == len(remaining_pairs):
break
remaining_pairs[switch_index], pair_index = \
pair_index, remaining_pairs[switch_index]
switch_index += 1
remaining_pairs.append(pair_index)
return False
return solve_recursive(0)
def iter_requests(self):
progress = 0
# TODO: Design an algorithm to compute a good increment.
increment = 83
while True:
index = progress
query = []
for alternatives in self.all_alternatives:
index, choice = divmod(index, len(alternatives))
alternative = alternatives[choice]
if alternative[0] is not None:
query.append(alternative)
yield Request(
self.page_url, self.base_query + query, maybe_bad=True
)
progress = (progress + increment) % self.combinations
if progress == 0:
# Because increment and combinations are relatively prime,
# reaching 0 means we've visited all indices.
break}
Classes
class Form (ancestors: Referrer)
-
An HTML form that can generate a request upon submission.
Source code
class Form(Referrer): """An HTML form that can generate a request upon submission.""" def __init__(self, page_url, method, controls): """Initialize form with the given `page_url` (action), `method` (`'get'` or `'post'`) and `controls` (input elements). """ assert method in ('get', 'post') Referrer.__init__(self, page_url) self.method = method self.controls = controls """Collection of `apetest.control.Control` objects that represent user interface input elements that contribute to the submitted query. """ # Gather all alternatives in a single data structure. # TODO: We could improve the chances of submitting valid free input by # looking at what other pages fill in and the default value. # For example, if we see numbers being filled in, it could be # a numeric field. This feature would require the set of # alternatives to change as we gather more hints about the # control. self.all_alternatives = all_alternatives = [] self.base_query = base_query = [] combinations = 1 for control in controls: # Filter out duplicates. alternatives = set( (None, None) if name is None or value is None else (name, value) for name, value in control.alternatives() ) if len(alternatives) == 1: # If there is only once choice, make that choice now. name, value = alternatives.pop() if name is not None and value is not None: base_query.append((name, value)) elif alternatives: # Store possible choices. all_alternatives.append(tuple(alternatives)) combinations *= len(alternatives) self.combinations = combinations print('non-alternatives:', base_query) print('alternatives:', all_alternatives) print('combinations:', combinations) def has_request(self, request): # Check if page matches. if self.page_url != request.page_url: return False # We can never produce more pairs than we have controls. if len(request.query) > len(self.controls): return False # Create a matrix of bools that stores which control and pair # combinations are possible. produces_pairs = [ [control.has_alternative(name, value) for name, value in request.query] for control in self.controls ] # Store which controls can be omitted from the submission. produces_empty = [ control.has_alternative(None, None) for control in self.controls ] class MatrixIterator: """Base class for ControlIterator and PairIterator. """ # pylint: disable=missing-docstring def __init__(self): self._index = self._get_length() self._changed = False def __iter__(self): return self def _get_length(self): raise NotImplementedError def _value_at(self, index): raise NotImplementedError def is_changed(self): return self._changed def remove_control(self, control_index): del produces_pairs[control_index] del produces_empty[control_index] self._changed = True def remove_pair(self, pair_index): for produced in produces_pairs: del produced[pair_index] self._changed = True def __next__(self): index = self._index if index == 0: raise StopIteration index -= 1 self._index = index return self._value_at(index) class ControlIterator(MatrixIterator): """Iterates through produces_pairs and produces_empty one control at a time, offering methods to remove controls and pairs without disturbing the iteration in progress. """ def remove_control(self): # pylint: disable=arguments-differ MatrixIterator.remove_control(self, self._index) def _get_length(self): return len(produces_pairs) def _value_at(self, index): return produces_pairs[index], produces_empty[index] class PairIterator(MatrixIterator): """Iterates through produces_pairs one pair at a time, offering methods to remove controls and pairs without disturbing the iteration in progress. """ def remove_pair(self): # pylint: disable=arguments-differ MatrixIterator.remove_pair(self, self._index) def _get_length(self): return len(produces_pairs[0]) if produces_pairs else 0 def _value_at(self, index): return [ controlIndex for controlIndex, produces in enumerate(produces_pairs) if produces[index] ] # Simplify the matrix as much as possible. # We go through a lot of trouble to minimize the size of the matrix, # since the generic solver has an exponential run time. while True: # Check whether each control can produce something that matches the # request query. control_it = ControlIterator() for produced, empty in control_it: num_pairs = produced.count(True) if num_pairs == 0: # None of the pairs this control can produce are part of # the request... if empty: # ...but it can produce nothing. control_it.remove_control() else: # ...and it must produce something. return False elif num_pairs == 1 and not empty: # Exactly one pair that this control can produce is part of # the request and it must produce something. control_it.remove_control() control_it.remove_pair(produced.index(True)) # We can never produce more pairs than we have controls. # Check again now that the list of controls is possibly shorter. if produces_pairs and len(produces_pairs[0]) > len(produces_pairs): return False # Resolve request pairs that can only be produced by a single # control. pair_it = PairIterator() for producers in pair_it: num_controls = len(producers) if num_controls == 0: # No control can produce this name-value pair. return False elif num_controls == 1: # Exactly one control can produce this name-value pair. pair_it.remove_control(producers[0]) pair_it.remove_pair() if not (control_it.is_changed() or pair_it.is_changed()): # No more simplifications possible. break # For most forms, there will be nothing left after eliminating the # simple cases, because the control names do not overlap. if not produces_pairs: return True # Recursively try to find a solution. # We don't have to know the solution, only check its existence. remaining_pairs = list(range(len(produces_pairs[0]))) def solve_recursive(control_index): if control_index == len(produces_pairs): return True produced = produces_pairs[control_index] # Note that remaining_pairs contains the same elements (not # necessarily in the same order) when this function returns # False; if it returns True then the contents of remaining_pairs # don't matter anymore. pair_index = remaining_pairs.pop() switch_index = 0 while True: if produced[pair_index]: if solve_recursive(control_index + 1): return True if switch_index == len(remaining_pairs): break remaining_pairs[switch_index], pair_index = \ pair_index, remaining_pairs[switch_index] switch_index += 1 remaining_pairs.append(pair_index) return False return solve_recursive(0) def iter_requests(self): progress = 0 # TODO: Design an algorithm to compute a good increment. increment = 83 while True: index = progress query = [] for alternatives in self.all_alternatives: index, choice = divmod(index, len(alternatives)) alternative = alternatives[choice] if alternative[0] is not None: query.append(alternative) yield Request( self.page_url, self.base_query + query, maybe_bad=True ) progress = (progress + increment) % self.combinations if progress == 0: # Because increment and combinations are relatively prime, # reaching 0 means we've visited all indices. break}
Instance variables
var controls
-
Collection of
Control
objects that represent user interface input elements that contribute to the submitted query.
Methods
def __init__(self, page_url, method, controls)
-
Initialize form with the given
page_url
(action),method
('get'
or'post'
) andcontrols
(input elements).Source code
def __init__(self, page_url, method, controls): """Initialize form with the given `page_url` (action), `method` (`'get'` or `'post'`) and `controls` (input elements). """ assert method in ('get', 'post') Referrer.__init__(self, page_url) self.method = method self.controls = controls """Collection of `apetest.control.Control` objects that represent user interface input elements that contribute to the submitted query. """ # Gather all alternatives in a single data structure. # TODO: We could improve the chances of submitting valid free input by # looking at what other pages fill in and the default value. # For example, if we see numbers being filled in, it could be # a numeric field. This feature would require the set of # alternatives to change as we gather more hints about the # control. self.all_alternatives = all_alternatives = [] self.base_query = base_query = [] combinations = 1 for control in controls: # Filter out duplicates. alternatives = set( (None, None) if name is None or value is None else (name, value) for name, value in control.alternatives() ) if len(alternatives) == 1: # If there is only once choice, make that choice now. name, value = alternatives.pop() if name is not None and value is not None: base_query.append((name, value)) elif alternatives: # Store possible choices. all_alternatives.append(tuple(alternatives)) combinations *= len(alternatives) self.combinations = combinations print('non-alternatives:', base_query) print('alternatives:', all_alternatives) print('combinations:', combinations)}
Inherited members
class LinkSet (ancestors: Referrer)
-
One or more links to different queries of the same page.
Source code
class LinkSet(Referrer): """One or more links to different queries of the same page.""" def __init__(self): """Initialize an empty set of links. `Referrer.page_url` will be `None` until links are added. """ Referrer.__init__(self, None) # page_url is set later self.links = set() """The links (`apetest.request.Request` objects) that were added.""" def add(self, request): """Adds a linked `apetest.request.Request` to this set. All links in the set must point to the same page: the URL without the query must be equal. """ if self.page_url is None: self.page_url = request.page_url else: assert self.page_url == request.page_url self.links.add(request) def has_request(self, request): return request in self.links def iter_requests(self): return iter(self.links)}
Instance variables
var links
-
The links (
Request
objects) that were added.
Methods
def __init__(self)
-
Initialize an empty set of links.
Referrer.page_url
will beNone
until links are added.Source code
def __init__(self): """Initialize an empty set of links. `Referrer.page_url` will be `None` until links are added. """ Referrer.__init__(self, None) # page_url is set later self.links = set() """The links (`apetest.request.Request` objects) that were added."""}
def add(self, request)
-
Adds a linked
Request
to this set.All links in the set must point to the same page: the URL without the query must be equal.
Source code
def add(self, request): """Adds a linked `apetest.request.Request` to this set. All links in the set must point to the same page: the URL without the query must be equal. """ if self.page_url is None: self.page_url = request.page_url else: assert self.page_url == request.page_url self.links.add(request)}
Inherited members
class Redirect (ancestors: Referrer)
-
An HTTP redirect from one URL to another.
Source code
class Redirect(Referrer): """An HTTP redirect from one URL to another.""" def __init__(self, target): """Initialize a redirection to `target`.""" Referrer.__init__(self, target.page_url) self.target = target """The `apetest.request.Request` that a user agent is redirected to.""" def has_request(self, request): return request == self.target def iter_requests(self): yield self.target}
Instance variables
var target
-
The
Request
that a user agent is redirected to.
Methods
def __init__(self, target)
-
Initialize a redirection to
target
.Source code
def __init__(self, target): """Initialize a redirection to `target`.""" Referrer.__init__(self, target.page_url) self.target = target """The `apetest.request.Request` that a user agent is redirected to."""}
Inherited members
class Referrer
-
Models an entity that can generate requests. Examples of Referrers are hyperlinks, which generate one fixed request, and forms, which generate many requests.
Source code
class Referrer: """Models an entity that can generate requests. Examples of Referrers are hyperlinks, which generate one fixed request, and forms, which generate many requests. """ def __init__(self, page_url): """Initialize the referrer's `page_url` from the argument and sets the `method` to `'get'`. """ self.page_url = page_url """The URL of the page linked to, without query.""" self.method = 'get' """The HTTP method to be used for generated requests, in lower case. Only `'get'` and `'post'` are currently supported. """ def has_request(self, request): """Returns `True` iff this referrer could generate `request`. Note that it is possible for this function to return `True` even though the request in question is not generated by `Referrer.iter_requests`. """ raise NotImplementedError def iter_requests(self): """Iterates through requests generated by this referrer. The requests should be returned in an order such that there is a lot of variation between subsequent requests. This improves the test coverage when we only have time to try the first N requests. The iteration is not expected to return every possible request that can be generated by this referrer, just a number of interesting ones. Note that free inputs such as text fields have an infinite value set, so it would not even be possible to generate everything. """ raise NotImplementedError}
Subclasses
Instance variables
var method
-
The HTTP method to be used for generated requests, in lower case.
Only
'get'
and'post'
are currently supported. var page_url
-
The URL of the page linked to, without query.
Methods
def __init__(self, page_url)
-
Initialize the referrer's
page_url
from the argument and sets themethod
to'get'
.Source code
def __init__(self, page_url): """Initialize the referrer's `page_url` from the argument and sets the `method` to `'get'`. """ self.page_url = page_url """The URL of the page linked to, without query.""" self.method = 'get' """The HTTP method to be used for generated requests, in lower case. Only `'get'` and `'post'` are currently supported. """}
def has_request(self, request)
-
Returns
True
iff this referrer could generaterequest
.Note that it is possible for this function to return
True
even though the request in question is not generated byReferrer.iter_requests()
.Source code
def has_request(self, request): """Returns `True` iff this referrer could generate `request`. Note that it is possible for this function to return `True` even though the request in question is not generated by `Referrer.iter_requests`. """ raise NotImplementedError}
def iter_requests(self)
-
Iterates through requests generated by this referrer.
The requests should be returned in an order such that there is a lot of variation between subsequent requests. This improves the test coverage when we only have time to try the first N requests. The iteration is not expected to return every possible request that can be generated by this referrer, just a number of interesting ones. Note that free inputs such as text fields have an infinite value set, so it would not even be possible to generate everything.
Source code
def iter_requests(self): """Iterates through requests generated by this referrer. The requests should be returned in an order such that there is a lot of variation between subsequent requests. This improves the test coverage when we only have time to try the first N requests. The iteration is not expected to return every possible request that can be generated by this referrer, just a number of interesting ones. Note that free inputs such as text fields have an infinite value set, so it would not even be possible to generate everything. """ raise NotImplementedError}