Skip to content

Compile html5lib with Cython #524

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 22 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
c52e731
Get rid of getPhases
gsnedders Jun 23, 2020
8cff6aa
fixup! Get rid of getPhases
gsnedders Jun 23, 2020
6eb4d2d
Move tests
gsnedders Oct 18, 2020
d2474af
Make InputStream.readChunk default an int
gsnedders Jan 4, 2021
0904df3
Remove last trace of Tokenizer.lastFourChars
gsnedders Jan 4, 2021
8ebff2e
Move Tokenizer.state to Tokenizer._state
gsnedders Jan 4, 2021
4a8e28a
Instead of comparing with a set of ints, use maths
gsnedders Jan 4, 2021
2ae13cc
Remove unused Tokenizer.escape/escapeFlag
gsnedders Jan 4, 2021
c22d069
Avoid needless setter write, mutate value directly
gsnedders Jan 5, 2021
81b3aaf
Reduce list/tuple access
gsnedders Jan 5, 2021
47df02b
Move lowercasing to _ascii module
gsnedders Jan 5, 2021
7d7a079
Always initialize Parser.tokenizer
gsnedders Jan 5, 2021
1acb5dd
Remove long unused Parser.lastPhase/Parser.beforeRCDataPhase
gsnedders Jan 5, 2021
b6a6484
Speed-up Parser.mainLoop a bit
gsnedders Jan 5, 2021
4822712
Get rid of more frozenset calls around constants
gsnedders Jan 5, 2021
f06451e
Add assert for leavingThisState
gsnedders Oct 30, 2020
9e9ff5f
Avoid recursion in etree.testSerializer
gsnedders Oct 27, 2020
2036738
Get rid of remaining non-decorator property()
gsnedders Oct 27, 2020
2c8e0ec
Call super().f() rather than Base.f(self)
gsnedders Jan 5, 2021
84cbc20
Move _getEtreeTag out of the class
gsnedders Oct 29, 2020
8b89668
Change attributes to be created as dicts from day one
gsnedders Oct 29, 2020
e65c433
Start of Cythonizing the tokenizer
gsnedders Jan 4, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Get rid of remaining non-decorator property()
  • Loading branch information
gsnedders committed Jan 5, 2021
commit 20367382da0082d7b561aab4693bfc334be327bc
19 changes: 9 additions & 10 deletions html5lib/_inputstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -611,26 +611,25 @@ def previous(self):
self._position = p = p - 1
return self[p:p + 1]

def setPosition(self, position):
if self._position >= len(self):
raise StopIteration
self._position = position

def getPosition(self):
@property
def position(self):
if self._position >= len(self):
raise StopIteration
if self._position >= 0:
return self._position
else:
return None

position = property(getPosition, setPosition)
@position.setter
def position(self, position):
if self._position >= len(self):
raise StopIteration
self._position = position

def getCurrentByte(self):
@property
def currentByte(self):
return self[self.position:self.position + 1]

currentByte = property(getCurrentByte)

def skip(self, chars=spaceCharactersBytes):
"""Skip past a list of characters"""
p = self.position # use property for the error-checking
Expand Down
8 changes: 4 additions & 4 deletions html5lib/treebuilders/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,10 +306,12 @@ def createElement(self, token):
element.attributes = token["data"]
return element

def _getInsertFromTable(self):
@property
def insertFromTable(self):
return self._insertFromTable

def _setInsertFromTable(self, value):
@insertFromTable.setter
def insertFromTable(self, value):
"""Switch the function used to insert an element from the
normal one to the misnested table one and back again"""
self._insertFromTable = value
Expand All @@ -318,8 +320,6 @@ def _setInsertFromTable(self, value):
else:
self.insertElement = self.insertElementNormal

insertFromTable = property(_getInsertFromTable, _setInsertFromTable)

def insertElementNormal(self, token):
name = token["name"]
assert isinstance(name, text_type), "Element %s not unicode" % name
Expand Down
17 changes: 9 additions & 8 deletions html5lib/treebuilders/dom.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,9 @@ def __init__(self, element):
base.Node.__init__(self, element.nodeName)
self.element = element

namespace = property(lambda self: hasattr(self.element, "namespaceURI") and
self.element.namespaceURI or None)
@property
def namespace(self):
return getattr(self.element, "namespaceURI", None)

def appendChild(self, node):
node.parent = self
Expand Down Expand Up @@ -88,10 +89,12 @@ def reparentChildren(self, newParent):
newParent.element.appendChild(child)
self.childNodes = []

def getAttributes(self):
@property
def attributes(self):
return AttrList(self.element)

def setAttributes(self, attributes):
@attributes.setter
def attributes(self, attributes):
if attributes:
for name, value in list(attributes.items()):
if isinstance(name, tuple):
Expand All @@ -104,22 +107,20 @@ def setAttributes(self, attributes):
else:
self.element.setAttribute(
name, value)
attributes = property(getAttributes, setAttributes)

def cloneNode(self):
return NodeBuilder(self.element.cloneNode(False))

def hasContent(self):
return self.element.hasChildNodes()

def getNameTuple(self):
@property
def nameTuple(self):
if self.namespace is None:
return namespaces["html"], self.name
else:
return self.namespace, self.name

nameTuple = property(getNameTuple)

class TreeBuilder(base.TreeBuilder): # pylint:disable=unused-variable
def documentClass(self):
self.dom = Dom.getDOMImplementation().createDocument(None, None, None)
Expand Down
62 changes: 31 additions & 31 deletions html5lib/treebuilders/etree.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,28 +41,30 @@ def _getETreeTag(self, name, namespace):
etree_tag = "{%s}%s" % (namespace, name)
return etree_tag

def _setName(self, name):
self._name = name
self._element.tag = self._getETreeTag(self._name, self._namespace)

def _getName(self):
@property
def name(self):
return self._name

name = property(_getName, _setName)

def _setNamespace(self, namespace):
self._namespace = namespace
@name.setter
def name(self, name):
self._name = name
self._element.tag = self._getETreeTag(self._name, self._namespace)

def _getNamespace(self):
@property
def namespace(self):
return self._namespace

namespace = property(_getNamespace, _setNamespace)
@namespace.setter
def namespace(self, namespace):
self._namespace = namespace
self._element.tag = self._getETreeTag(self._name, self._namespace)

def _getAttributes(self):
@property
def attributes(self):
return self._element.attrib

def _setAttributes(self, attributes):
@attributes.setter
def attributes(self, attributes):
el_attrib = self._element.attrib
el_attrib.clear()
if attributes:
Expand All @@ -75,19 +77,17 @@ def _setAttributes(self, attributes):
name = key
el_attrib[name] = value

attributes = property(_getAttributes, _setAttributes)

def _getChildNodes(self):
@property
def childNodes(self):
return self._childNodes

def _setChildNodes(self, value):
@childNodes.setter
def childNodes(self, value):
del self._element[:]
self._childNodes = []
for element in value:
self.insertChild(element)

childNodes = property(_getChildNodes, _setChildNodes)

def hasContent(self):
"""Return true if the node has children or text"""
return bool(self._element.text or len(self._element))
Expand Down Expand Up @@ -156,39 +156,39 @@ def __init__(self, data):
self._childNodes = []
self._flags = []

def _getData(self):
@property
def data(self):
return self._element.text

def _setData(self, value):
@data.setter
def data(self, value):
self._element.text = value

data = property(_getData, _setData)

class DocumentType(Element):
def __init__(self, name, publicId, systemId):
Element.__init__(self, "<!DOCTYPE>")
self._element.text = name
self.publicId = publicId
self.systemId = systemId

def _getPublicId(self):
@property
def publicId(self):
return self._element.get("publicId", "")

def _setPublicId(self, value):
@publicId.setter
def publicId(self, value):
if value is not None:
self._element.set("publicId", value)

publicId = property(_getPublicId, _setPublicId)

def _getSystemId(self):
@property
def systemId(self):
return self._element.get("systemId", "")

def _setSystemId(self, value):
@systemId.setter
def systemId(self, value):
if value is not None:
self._element.set("systemId", value)

systemId = property(_getSystemId, _setSystemId)

class Document(Element):
def __init__(self):
Element.__init__(self, "DOCUMENT_ROOT")
Expand Down
37 changes: 18 additions & 19 deletions html5lib/treebuilders/etree_lxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,10 @@ def appendChild(self, element):

last.addnext(element._element)

def _getChildNodes(self):
@property
def childNodes(self):
return self._childNodes

childNodes = property(_getChildNodes)


def testSerializer(element):
rv = []
Expand Down Expand Up @@ -233,26 +232,26 @@ def __init__(self, name, namespace):
builder.Element.__init__(self, name, namespace=namespace)
self._attributes = Attributes(self)

def _setName(self, name):
@property
def name(self):
return infosetFilter.fromXmlName(self._name)

@name.setter
def name(self, name):
self._name = infosetFilter.coerceElement(name)
self._element.tag = self._getETreeTag(
self._name, self._namespace)

def _getName(self):
return infosetFilter.fromXmlName(self._name)

name = property(_getName, _setName)

def _getAttributes(self):
@property
def attributes(self):
return self._attributes

def _setAttributes(self, value):
@attributes.setter
def attributes(self, value):
attributes = self.attributes
attributes.clear()
attributes.update(value)

attributes = property(_getAttributes, _setAttributes)

def insertText(self, data, insertBefore=None):
data = infosetFilter.coerceCharacters(data)
builder.Element.insertText(self, data, insertBefore)
Expand All @@ -268,14 +267,14 @@ def __init__(self, data):
data = infosetFilter.coerceComment(data)
builder.Comment.__init__(self, data)

def _setData(self, data):
data = infosetFilter.coerceComment(data)
self._element.text = data

def _getData(self):
@property
def data(self):
return self._element.text

data = property(_getData, _setData)
@data.setter
def data(self, data):
data = infosetFilter.coerceComment(data)
self._element.text = data

self.elementClass = Element
self.commentClass = Comment
Expand Down