diff --git a/.flake8 b/.flake8
new file mode 100644
index 00000000..b33811f1
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,2 @@
+[flake8]
+ignore = E501, W503 
\ No newline at end of file
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 00000000..80224f9e
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+tests/samples/* linguist-vendored
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
new file mode 100644
index 00000000..23f16106
--- /dev/null
+++ b/.github/workflows/python-package.yml
@@ -0,0 +1,40 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: Python package
+
+on:
+  push:
+    branches: [ "master" ]
+  pull_request:
+    branches: [ "master" ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install flake8 pytest
+        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Test with pytest
+      run: |
+        pytest
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
new file mode 100644
index 00000000..bdaab28a
--- /dev/null
+++ b/.github/workflows/python-publish.yml
@@ -0,0 +1,39 @@
+# This workflow will upload a Python Package using Twine when a release is created
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
+
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+name: Upload Python Package
+
+on:
+  release:
+    types: [published]
+
+permissions:
+  contents: read
+
+jobs:
+  deploy:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python
+      uses: actions/setup-python@v3
+      with:
+        python-version: '3.x'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install build
+    - name: Build package
+      run: python -m build
+    - name: Publish package
+      uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
+      with:
+        user: __token__
+        password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/.gitignore b/.gitignore
index 0d20b648..b532e65e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,18 @@
 *.pyc
+__pycache__
+*.egg-info
+/build
+/dist
+/bin
+/include
+/lib
+/local
+/man
+nosetests.xml
+.coverage
+.tox
+.idea
+.cache
+/.noseids
+/.venv
+/poetry.lock
\ No newline at end of file
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 00000000..ea56f519
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,29 @@
+language: python
+os: linux
+cache: pip
+
+matrix:
+  include:
+    - name: "Python 3.8 on Linux"
+      dist: xenial
+      python: 3.8
+    - name: "Python 3.9 Nightly on Linux"
+      dist: bionic
+      python: nightly
+    - name: "Pypy 3 on Linux"
+      python: pypy3
+  allow_failures:
+    - python: nightly
+    - python: pypy3
+    - os: osx
+
+install:
+  - if [ $PIP ]; then true; else PIP=pip3; fi
+  - travis_retry $PIP install -U pip wheel tox-travis pytest-cov codecov
+  - travis_retry $PIP install -U -r requirements.txt -e ".[test]"
+
+script:
+  - tox
+
+after_success:
+  - codecov
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 00000000..261eeb9e
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..9caf08a5
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,73 @@
+# Makefile to help automate tasks
+WD := $(shell pwd)
+PY := .venv/bin/python
+PIP := .venv/bin/pip
+PEP8 := .venv/bin/pep8
+NOSE := .venv/bin/nosetests
+TWINE := .venv/bin/twine
+
+# ###########
+# Tests rule!
+# ###########
+.PHONY: test
+test: venv develop $(NOSE)
+	$(NOSE) --with-id -s tests
+
+$(NOSE): setup
+
+# #######
+# INSTALL
+# #######
+.PHONY: all
+all: setup develop
+
+venv: .venv/bin/python
+
+setup: venv
+	$(PIP) install -r requirements-dev.txt | grep -v "already satisfied" || true
+
+.venv/bin/python:
+	test -d .venv || which python3 && python3 -m venv .venv || virtualenv .venv
+
+.PHONY: clean
+clean:
+	rm -rf .venv
+
+develop: .venv/lib/python*/site-packages/readability-lxml.egg-link
+
+.venv/lib/python*/site-packages/readability-lxml.egg-link:
+	$(PY) setup.py develop
+
+
+# ###########
+# Development
+# ###########
+.PHONY: clean_all
+clean_all: clean_venv
+
+.PHONY: build
+build:
+	poetry build
+
+# ###########
+# Deploy
+# ###########
+.PHONY: dist
+dist:
+	$(PY) -m pip install wheel
+	$(PY) setup.py sdist bdist_wheel
+	$(TWINE) check dist/*
+
+.PHONY: upload
+upload:
+	$(TWINE) upload dist/*
+
+.PHONY: bump
+bump:
+	$(EDITOR) readability/__init__.py
+	$(eval VERSION := $(shell grep "__version__" readability/__init__.py | cut -d'"' -f2))
+	# fix first occurrence of version in pyproject.toml
+	sed -i '0,/version = ".*"/s//version = "$(VERSION)"/' pyproject.toml
+	git commit -m "Bump version to $(VERSION)" pyproject.toml readability/__init__.py
+	git tag $(VERSION)
+	git push --tags
diff --git a/README b/README
deleted file mode 100644
index f7b86b35..00000000
--- a/README
+++ /dev/null
@@ -1,23 +0,0 @@
-This code is under the Apache License 2.0.  http://www.apache.org/licenses/LICENSE-2.0
-
-This is a python port of a ruby port of arc90's readability project
-
-http://lab.arc90.com/experiments/readability/
-
-Given a html document, it pulls out the main body text and cleans it up.
-
-Ruby port by starrhorne and iterationlabs
-Python port by gfxmonk
-
-This port uses BeautifulSoup for the HTML parsing. That means it can be
-a little slow, but will work on Google App Engine (unlike libxml-based
-libraries)
-
-
-**note**: I don't currently have any plans for using or improving this
-library, and it's far from perfect (slow, and almost certainly buggy).
-So if you do something cool with it or have a better tool that does
-the same job, please let me know and I can link to it from here.
-
-If you're looking for alternatives, here's the list so far:
- - http://www.minvolai.com/blog/decruft-arc90s-readability-in-python/
diff --git a/README.md b/README.md
new file mode 100644
index 00000000..e09a515a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,67 @@
+[![PyPI version](https://img.shields.io/pypi/v/readability-lxml.svg)](https://pypi.python.org/pypi/readability-lxml)
+
+# python-readability
+
+Given an HTML document, extract and clean up the main body text and title.
+
+This is a Python port of a Ruby port of [arc90's Readability project](https://web.archive.org/web/20130519040221/http://www.readability.com/).
+
+## Installation
+
+It's easy using `pip`, just run:
+
+```bash
+$ pip install readability-lxml
+```
+
+As an alternative, you may also use conda to install, just run:
+
+```bash
+$ conda install -c conda-forge readability-lxml
+```
+
+## Usage
+
+```python
+>>> import requests
+>>> from readability import Document
+
+>>> response = requests.get('http://example.com/')
+>>> doc = Document(response.content)
+>>> doc.title()
+'Example Domain'
+
+>>> doc.summary()
+"""<html><body><div><body id="readabilityBody">\n<div>\n    <h1>Example Domain</h1>\n
+<p>This domain is established to be used for illustrative examples in documents. You may
+use this\n    domain in examples without prior coordination or asking for permission.</p>
+\n    <p><a href="http://www.iana.org/domains/example">More information...</a></p>\n</div>
+\n</body>\n</div></body></html>"""
+```
+
+## Change Log
+- 0.8.4 Better CJK support, thanks @cdhigh
+- 0.8.3.1 Support for python 3.8 - 3.13
+- 0.8.3 We can now save all images via keep_all_images=True (default is to save 1 main image), thanks @botlabsDev
+- 0.8.2 Added article author(s) (thanks @mattblaha)
+- 0.8.1 Fixed processing of non-ascii HTMLs via regexps.
+- 0.8 Replaced XHTML output with HTML5 output in summary() call.
+- 0.7.1 Support for Python 3.7 . Fixed a slowdown when processing documents with lots of spaces.
+- 0.7 Improved HTML5 tags handling. Fixed stripping unwanted HTML nodes (only first matching node was removed before).
+- 0.6 Finally a release which supports Python versions 2.6, 2.7, 3.3 - 3.6
+- 0.5 Preparing a release to support Python versions 2.6, 2.7, 3.3 and 3.4
+- 0.4 Added Videos loading and allowed more images per paragraph
+- 0.3 Added Document.encoding, positive\_keywords and negative\_keywords
+
+## Licensing
+
+This code is under [the Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0) license.
+
+## Thanks to
+
+- Latest [readability.js](https://github.com/MHordecki/readability-redux/blob/master/readability/readability.js)
+- Ruby port by starrhorne and iterationlabs
+- [Python port](https://github.com/gfxmonk/python-readability) by gfxmonk
+- [Decruft effort](https://web.archive.org/web/20110214150709/https://www.minvolai.com/blog/decruft-arc90s-readability-in-python/) to move to lxml
+- "BR to P" fix from readability.js which improves quality for smaller texts
+- Github users contributions.
diff --git a/doc/__init__.py b/doc/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/doc/source/__init__.py b/doc/source/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/doc/source/api.rst b/doc/source/api.rst
new file mode 100644
index 00000000..b0e3bbbb
--- /dev/null
+++ b/doc/source/api.rst
@@ -0,0 +1,30 @@
+Reference
+=========
+
+.. automodule:: readability
+    :members:
+    :show-inheritance:
+
+.. automodule:: readability.browser
+    :members:
+    :show-inheritance:
+
+.. automodule:: readability.cleaners
+    :members:
+    :show-inheritance:
+
+.. automodule:: readability.debug
+    :members:
+    :show-inheritance:
+
+.. automodule:: readability.encoding
+    :members:
+    :show-inheritance:
+
+.. automodule:: readability.htmls
+    :members:
+    :show-inheritance:
+
+.. automodule:: readability.readability
+    :members:
+    :show-inheritance:
diff --git a/doc/source/conf.py b/doc/source/conf.py
new file mode 100644
index 00000000..e70cf9b3
--- /dev/null
+++ b/doc/source/conf.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python3
+#
+# readability documentation build configuration file, created by
+# sphinx-quickstart on Thu Mar 23 16:29:38 2017.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+
+sys.path.insert(0, os.path.abspath("../.."))
+
+import readability
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx.ext.doctest",
+    "sphinx.ext.intersphinx",
+    "sphinx.ext.todo",
+    "myst_parser",
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ["_templates"]
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+source_suffix = [".rst", ".md"]
+
+# The master toctree document.
+master_doc = "index"
+
+# General information about the project.
+project = "readability"
+copyright = "2020, Yuri Baburov"
+author = "Yuri Baburov"
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+
+# The short X.Y version.
+version = readability.__version__
+
+# The full version, including alpha/beta/rc tags.
+release = readability.__version__
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = "en"
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This patterns also effect to html_static_path and html_extra_path
+exclude_patterns = []
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = "sphinx"
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = "sphinx_rtd_theme"
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#
+# html_theme_options = {}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = []  #'_static']
+
+
+# -- Options for HTMLHelp output ------------------------------------------
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = "readabilitydoc"
+
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+    # The paper size ('letterpaper' or 'a4paper').
+    #
+    # 'papersize': 'letterpaper',
+    # The font size ('10pt', '11pt' or '12pt').
+    #
+    # 'pointsize': '10pt',
+    # Additional stuff for the LaTeX preamble.
+    #
+    # 'preamble': '',
+    # Latex figure (float) alignment
+    #
+    # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [(master_doc, "readability.tex", "Readability Documentation", "Yuri Baburov", "manual")]
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [(master_doc, "readability", "readability Documentation", [author], 1)]
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (
+        master_doc,
+        "readability",
+        "Readability Documentation",
+        author,
+        "readability",
+        "One line description of project.",
+        "Miscellaneous",
+    )
+]
+
+
+intersphinx_mapping = {
+    "python": ("https://docs.python.org/3", None),
+}
diff --git a/doc/source/index.rst b/doc/source/index.rst
new file mode 100644
index 00000000..e3bce61d
--- /dev/null
+++ b/doc/source/index.rst
@@ -0,0 +1,13 @@
+.. include:: ../../README.rst
+
+.. toctree::
+    :maxdepth: 2
+
+    api
+
+Indices and tables
+------------------
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..44992853
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,25 @@
+[tool.poetry]
+name = "readability-lxml"
+version = "0.8.4.1"
+description = "fast html to text parser (article readability tool) with python 3 support"
+authors = ["Yuri Baburov <burchik@gmail.com>"]
+license = "Apache License 2.0"
+readme = "README.md"
+packages = [
+    { include = "readability" },
+]
+
+[tool.poetry.dependencies]
+python = ">=3.8.2,<3.14"
+chardet = "^5.2.0"
+cssselect = [
+    { version = "~1.2", markers = "python_version < '3.9'" },
+    { version = "~1.3", markers = "python_version >= '3.9'" }
+]
+lxml = {extras = ["html-clean"], version = "^5.4.0"}
+lxml-html-clean = {markers = "python_version < \"3.11\"", version = "^0.4.2"}
+
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
diff --git a/readability/BeautifulSoup.py b/readability/BeautifulSoup.py
deleted file mode 100644
index 34204e74..00000000
--- a/readability/BeautifulSoup.py
+++ /dev/null
@@ -1,2000 +0,0 @@
-"""Beautiful Soup
-Elixir and Tonic
-"The Screen-Scraper's Friend"
-http://www.crummy.com/software/BeautifulSoup/
-
-Beautiful Soup parses a (possibly invalid) XML or HTML document into a
-tree representation. It provides methods and Pythonic idioms that make
-it easy to navigate, search, and modify the tree.
-
-A well-formed XML/HTML document yields a well-formed data
-structure. An ill-formed XML/HTML document yields a correspondingly
-ill-formed data structure. If your document is only locally
-well-formed, you can use this library to find and process the
-well-formed part of it.
-
-Beautiful Soup works with Python 2.2 and up. It has no external
-dependencies, but you'll have more success at converting data to UTF-8
-if you also install these three packages:
-
-* chardet, for auto-detecting character encodings
-  http://chardet.feedparser.org/
-* cjkcodecs and iconv_codec, which add more encodings to the ones supported
-  by stock Python.
-  http://cjkpython.i18n.org/
-
-Beautiful Soup defines classes for two main parsing strategies:
-
- * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific
-   language that kind of looks like XML.
-
- * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
-   or invalid. This class has web browser-like heuristics for
-   obtaining a sensible parse tree in the face of common HTML errors.
-
-Beautiful Soup also defines a class (UnicodeDammit) for autodetecting
-the encoding of an HTML or XML document, and converting it to
-Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser.
-
-For more than you ever wanted to know about Beautiful Soup, see the
-documentation:
-http://www.crummy.com/software/BeautifulSoup/documentation.html
-
-Here, have some legalese:
-
-Copyright (c) 2004-2009, Leonard Richardson
-
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-  * Redistributions of source code must retain the above copyright
-    notice, this list of conditions and the following disclaimer.
-
-  * Redistributions in binary form must reproduce the above
-    copyright notice, this list of conditions and the following
-    disclaimer in the documentation and/or other materials provided
-    with the distribution.
-
-  * Neither the name of the the Beautiful Soup Consortium and All
-    Night Kosher Bakery nor the names of its contributors may be
-    used to endorse or promote products derived from this software
-    without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT.
-
-"""
-from __future__ import generators
-
-__author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "3.1.0.1"
-__copyright__ = "Copyright (c) 2004-2009 Leonard Richardson"
-__license__ = "New-style BSD"
-
-import codecs
-import markupbase
-import types
-import re
-from HTMLParser import HTMLParser, HTMLParseError
-try:
-    from htmlentitydefs import name2codepoint
-except ImportError:
-    name2codepoint = {}
-try:
-    set
-except NameError:
-    from sets import Set as set
-
-#These hacks make Beautiful Soup able to parse XML with namespaces
-markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match
-
-DEFAULT_OUTPUT_ENCODING = "utf-8"
-
-# First, the classes that represent markup elements.
-
-def sob(unicode, encoding):
-    """Returns either the given Unicode string or its encoding."""
-    if encoding is None:
-        return unicode
-    else:
-        return unicode.encode(encoding)
-
-class PageElement:
-    """Contains the navigational information for some part of the page
-    (either a tag or a piece of text)"""
-
-    def setup(self, parent=None, previous=None):
-        """Sets up the initial relations between this element and
-        other elements."""
-        self.parent = parent
-        self.previous = previous
-        self.next = None
-        self.previousSibling = None
-        self.nextSibling = None
-        if self.parent and self.parent.contents:
-            self.previousSibling = self.parent.contents[-1]
-            self.previousSibling.nextSibling = self
-
-    def replaceWith(self, replaceWith):
-        oldParent = self.parent
-        myIndex = self.parent.contents.index(self)
-        if hasattr(replaceWith, 'parent') and replaceWith.parent == self.parent:
-            # We're replacing this element with one of its siblings.
-            index = self.parent.contents.index(replaceWith)
-            if index and index < myIndex:
-                # Furthermore, it comes before this element. That
-                # means that when we extract it, the index of this
-                # element will change.
-                myIndex = myIndex - 1
-        self.extract()
-        oldParent.insert(myIndex, replaceWith)
-
-    def extract(self):
-        """Destructively rips this element out of the tree."""
-        if self.parent:
-            try:
-                self.parent.contents.remove(self)
-            except ValueError:
-                pass
-
-        #Find the two elements that would be next to each other if
-        #this element (and any children) hadn't been parsed. Connect
-        #the two.
-        lastChild = self._lastRecursiveChild()
-        nextElement = lastChild.next
-
-        if self.previous:
-            self.previous.next = nextElement
-        if nextElement:
-            nextElement.previous = self.previous
-        self.previous = None
-        lastChild.next = None
-
-        self.parent = None
-        if self.previousSibling:
-            self.previousSibling.nextSibling = self.nextSibling
-        if self.nextSibling:
-            self.nextSibling.previousSibling = self.previousSibling
-        self.previousSibling = self.nextSibling = None
-        return self
-
-    def _lastRecursiveChild(self):
-        "Finds the last element beneath this object to be parsed."
-        lastChild = self
-        while hasattr(lastChild, 'contents') and lastChild.contents:
-            lastChild = lastChild.contents[-1]
-        return lastChild
-
-    def insert(self, position, newChild):
-        if (isinstance(newChild, basestring)
-            or isinstance(newChild, unicode)) \
-            and not isinstance(newChild, NavigableString):
-            newChild = NavigableString(newChild)
-
-        position =  min(position, len(self.contents))
-        if hasattr(newChild, 'parent') and newChild.parent != None:
-            # We're 'inserting' an element that's already one
-            # of this object's children.
-            if newChild.parent == self:
-                index = self.find(newChild)
-                if index and index < position:
-                    # Furthermore we're moving it further down the
-                    # list of this object's children. That means that
-                    # when we extract this element, our target index
-                    # will jump down one.
-                    position = position - 1
-            newChild.extract()
-
-        newChild.parent = self
-        previousChild = None
-        if position == 0:
-            newChild.previousSibling = None
-            newChild.previous = self
-        else:
-            previousChild = self.contents[position-1]
-            newChild.previousSibling = previousChild
-            newChild.previousSibling.nextSibling = newChild
-            newChild.previous = previousChild._lastRecursiveChild()
-        if newChild.previous:
-            newChild.previous.next = newChild
-
-        newChildsLastElement = newChild._lastRecursiveChild()
-
-        if position >= len(self.contents):
-            newChild.nextSibling = None
-
-            parent = self
-            parentsNextSibling = None
-            while not parentsNextSibling:
-                parentsNextSibling = parent.nextSibling
-                parent = parent.parent
-                if not parent: # This is the last element in the document.
-                    break
-            if parentsNextSibling:
-                newChildsLastElement.next = parentsNextSibling
-            else:
-                newChildsLastElement.next = None
-        else:
-            nextChild = self.contents[position]
-            newChild.nextSibling = nextChild
-            if newChild.nextSibling:
-                newChild.nextSibling.previousSibling = newChild
-            newChildsLastElement.next = nextChild
-
-        if newChildsLastElement.next:
-            newChildsLastElement.next.previous = newChildsLastElement
-        self.contents.insert(position, newChild)
-
-    def append(self, tag):
-        """Appends the given tag to the contents of this tag."""
-        self.insert(len(self.contents), tag)
-
-    def findNext(self, name=None, attrs={}, text=None, **kwargs):
-        """Returns the first item that matches the given criteria and
-        appears after this Tag in the document."""
-        return self._findOne(self.findAllNext, name, attrs, text, **kwargs)
-
-    def findAllNext(self, name=None, attrs={}, text=None, limit=None,
-                    **kwargs):
-        """Returns all items that match the given criteria and appear
-        after this Tag in the document."""
-        return self._findAll(name, attrs, text, limit, self.nextGenerator,
-                             **kwargs)
-
-    def findNextSibling(self, name=None, attrs={}, text=None, **kwargs):
-        """Returns the closest sibling to this Tag that matches the
-        given criteria and appears after this Tag in the document."""
-        return self._findOne(self.findNextSiblings, name, attrs, text,
-                             **kwargs)
-
-    def findNextSiblings(self, name=None, attrs={}, text=None, limit=None,
-                         **kwargs):
-        """Returns the siblings of this Tag that match the given
-        criteria and appear after this Tag in the document."""
-        return self._findAll(name, attrs, text, limit,
-                             self.nextSiblingGenerator, **kwargs)
-    fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x
-
-    def findPrevious(self, name=None, attrs={}, text=None, **kwargs):
-        """Returns the first item that matches the given criteria and
-        appears before this Tag in the document."""
-        return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs)
-
-    def findAllPrevious(self, name=None, attrs={}, text=None, limit=None,
-                        **kwargs):
-        """Returns all items that match the given criteria and appear
-        before this Tag in the document."""
-        return self._findAll(name, attrs, text, limit, self.previousGenerator,
-                           **kwargs)
-    fetchPrevious = findAllPrevious # Compatibility with pre-3.x
-
-    def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs):
-        """Returns the closest sibling to this Tag that matches the
-        given criteria and appears before this Tag in the document."""
-        return self._findOne(self.findPreviousSiblings, name, attrs, text,
-                             **kwargs)
-
-    def findPreviousSiblings(self, name=None, attrs={}, text=None,
-                             limit=None, **kwargs):
-        """Returns the siblings of this Tag that match the given
-        criteria and appear before this Tag in the document."""
-        return self._findAll(name, attrs, text, limit,
-                             self.previousSiblingGenerator, **kwargs)
-    fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x
-
-    def findParent(self, name=None, attrs={}, **kwargs):
-        """Returns the closest parent of this Tag that matches the given
-        criteria."""
-        # NOTE: We can't use _findOne because findParents takes a different
-        # set of arguments.
-        r = None
-        l = self.findParents(name, attrs, 1)
-        if l:
-            r = l[0]
-        return r
-
-    def findParents(self, name=None, attrs={}, limit=None, **kwargs):
-        """Returns the parents of this Tag that match the given
-        criteria."""
-
-        return self._findAll(name, attrs, None, limit, self.parentGenerator,
-                             **kwargs)
-    fetchParents = findParents # Compatibility with pre-3.x
-
-    #These methods do the real heavy lifting.
-
-    def _findOne(self, method, name, attrs, text, **kwargs):
-        r = None
-        l = method(name, attrs, text, 1, **kwargs)
-        if l:
-            r = l[0]
-        return r
-
-    def _findAll(self, name, attrs, text, limit, generator, **kwargs):
-        "Iterates over a generator looking for things that match."
-
-        if isinstance(name, SoupStrainer):
-            strainer = name
-        else:
-            # Build a SoupStrainer
-            strainer = SoupStrainer(name, attrs, text, **kwargs)
-        results = ResultSet(strainer)
-        g = generator()
-        while True:
-            try:
-                i = g.next()
-            except StopIteration:
-                break
-            if i:
-                found = strainer.search(i)
-                if found:
-                    results.append(found)
-                    if limit and len(results) >= limit:
-                        break
-        return results
-
-    #These Generators can be used to navigate starting from both
-    #NavigableStrings and Tags.
-    def nextGenerator(self):
-        i = self
-        while i:
-            i = i.next
-            yield i
-
-    def nextSiblingGenerator(self):
-        i = self
-        while i:
-            i = i.nextSibling
-            yield i
-
-    def previousGenerator(self):
-        i = self
-        while i:
-            i = i.previous
-            yield i
-
-    def previousSiblingGenerator(self):
-        i = self
-        while i:
-            i = i.previousSibling
-            yield i
-
-    def parentGenerator(self):
-        i = self
-        while i:
-            i = i.parent
-            yield i
-
-    # Utility methods
-    def substituteEncoding(self, str, encoding=None):
-        encoding = encoding or "utf-8"
-        return str.replace("%SOUP-ENCODING%", encoding)
-
-    def toEncoding(self, s, encoding=None):
-        """Encodes an object to a string in some encoding, or to Unicode.
-        ."""
-        if isinstance(s, unicode):
-            if encoding:
-                s = s.encode(encoding)
-        elif isinstance(s, str):
-            if encoding:
-                s = s.encode(encoding)
-            else:
-                s = unicode(s)
-        else:
-            if encoding:
-                s  = self.toEncoding(str(s), encoding)
-            else:
-                s = unicode(s)
-        return s
-
-class NavigableString(unicode, PageElement):
-
-    def __new__(cls, value):
-        """Create a new NavigableString.
-
-        When unpickling a NavigableString, this method is called with
-        the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be
-        passed in to the superclass's __new__ or the superclass won't know
-        how to handle non-ASCII characters.
-        """
-        if isinstance(value, unicode):
-            return unicode.__new__(cls, value)
-        return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
-
-    def __getnewargs__(self):
-        return (unicode(self),)
-
-    def __getattr__(self, attr):
-        """text.string gives you text. This is for backwards
-        compatibility for Navigable*String, but for CData* it lets you
-        get the string without the CData wrapper."""
-        if attr == 'string':
-            return self
-        else:
-            raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)
-
-    def encode(self, encoding=DEFAULT_OUTPUT_ENCODING):
-        return self.decode().encode(encoding)
-
-    def decodeGivenEventualEncoding(self, eventualEncoding):
-        return self
-
-class CData(NavigableString):
-
-    def decodeGivenEventualEncoding(self, eventualEncoding):
-        return u'<![CDATA[' + self + u']]>'
-
-class ProcessingInstruction(NavigableString):
-
-    def decodeGivenEventualEncoding(self, eventualEncoding):
-        output = self
-        if u'%SOUP-ENCODING%' in output:
-            output = self.substituteEncoding(output, eventualEncoding)
-        return u'<?' + output + u'?>'
-
-class Comment(NavigableString):
-    def decodeGivenEventualEncoding(self, eventualEncoding):
-        return u'<!--' + self + u'-->'
-
-class Declaration(NavigableString):
-    def decodeGivenEventualEncoding(self, eventualEncoding):
-        return u'<!' + self + u'>'
-
-class Tag(PageElement):
-
-    """Represents a found HTML tag with its attributes and contents."""
-
-    def _invert(h):
-        "Cheap function to invert a hash."
-        i = {}
-        for k,v in h.items():
-            i[v] = k
-        return i
-
-    XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'",
-                                      "quot" : '"',
-                                      "amp" : "&",
-                                      "lt" : "<",
-                                      "gt" : ">" }
-
-    XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS)
-
-    def _convertEntities(self, match):
-        """Used in a call to re.sub to replace HTML, XML, and numeric
-        entities with the appropriate Unicode characters. If HTML
-        entities are being converted, any unrecognized entities are
-        escaped."""
-        x = match.group(1)
-        if self.convertHTMLEntities and x in name2codepoint:
-            return unichr(name2codepoint[x])
-        elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS:
-            if self.convertXMLEntities:
-                return self.XML_ENTITIES_TO_SPECIAL_CHARS[x]
-            else:
-                return u'&%s;' % x
-        elif len(x) > 0 and x[0] == '#':
-            # Handle numeric entities
-            if len(x) > 1 and x[1] == 'x':
-                return unichr(int(x[2:], 16))
-            else:
-                return unichr(int(x[1:]))
-
-        elif self.escapeUnrecognizedEntities:
-            return u'&amp;%s;' % x
-        else:
-            return u'&%s;' % x
-
-    def __init__(self, parser, name, attrs=None, parent=None,
-                 previous=None):
-        "Basic constructor."
-
-        # We don't actually store the parser object: that lets extracted
-        # chunks be garbage-collected
-        self.parserClass = parser.__class__
-        self.isSelfClosing = parser.isSelfClosingTag(name)
-        self.name = name
-        if attrs == None:
-            attrs = []
-        self.attrs = attrs
-        self.contents = []
-        self.setup(parent, previous)
-        self.hidden = False
-        self.containsSubstitutions = False
-        self.convertHTMLEntities = parser.convertHTMLEntities
-        self.convertXMLEntities = parser.convertXMLEntities
-        self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities
-
-        def convert(kval):
-            "Converts HTML, XML and numeric entities in the attribute value."
-            k, val = kval
-            if val is None:
-                return kval
-            return (k, re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);",
-                              self._convertEntities, val))
-        self.attrs = map(convert, self.attrs)
-
-    def get(self, key, default=None):
-        """Returns the value of the 'key' attribute for the tag, or
-        the value given for 'default' if it doesn't have that
-        attribute."""
-        return self._getAttrMap().get(key, default)
-
-    def has_key(self, key):
-        return self._getAttrMap().has_key(key)
-
-    def __getitem__(self, key):
-        """tag[key] returns the value of the 'key' attribute for the tag,
-        and throws an exception if it's not there."""
-        return self._getAttrMap()[key]
-
-    def __iter__(self):
-        "Iterating over a tag iterates over its contents."
-        return iter(self.contents)
-
-    def __len__(self):
-        "The length of a tag is the length of its list of contents."
-        return len(self.contents)
-
-    def __contains__(self, x):
-        return x in self.contents
-
-    def __nonzero__(self):
-        "A tag is non-None even if it has no contents."
-        return True
-
-    def __setitem__(self, key, value):
-        """Setting tag[key] sets the value of the 'key' attribute for the
-        tag."""
-        self._getAttrMap()
-        self.attrMap[key] = value
-        found = False
-        for i in range(0, len(self.attrs)):
-            if self.attrs[i][0] == key:
-                self.attrs[i] = (key, value)
-                found = True
-        if not found:
-            self.attrs.append((key, value))
-        self._getAttrMap()[key] = value
-
-    def __delitem__(self, key):
-        "Deleting tag[key] deletes all 'key' attributes for the tag."
-        for item in self.attrs:
-            if item[0] == key:
-                self.attrs.remove(item)
-                #We don't break because bad HTML can define the same
-                #attribute multiple times.
-            self._getAttrMap()
-            if self.attrMap.has_key(key):
-                del self.attrMap[key]
-
-    def __call__(self, *args, **kwargs):
-        """Calling a tag like a function is the same as calling its
-        findAll() method. Eg. tag('a') returns a list of all the A tags
-        found within this tag."""
-        return apply(self.findAll, args, kwargs)
-
-    def __getattr__(self, tag):
-        #print "Getattr %s.%s" % (self.__class__, tag)
-        if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3:
-            return self.find(tag[:-3])
-        elif tag.find('__') != 0:
-            return self.find(tag)
-        raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag)
-
-    def __eq__(self, other):
-        """Returns true iff this tag has the same name, the same attributes,
-        and the same contents (recursively) as the given tag.
-
-        NOTE: right now this will return false if two tags have the
-        same attributes in a different order. Should this be fixed?"""
-        if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other):
-            return False
-        for i in range(0, len(self.contents)):
-            if self.contents[i] != other.contents[i]:
-                return False
-        return True
-
-    def __ne__(self, other):
-        """Returns true iff this tag is not identical to the other tag,
-        as defined in __eq__."""
-        return not self == other
-
-    def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING):
-        """Renders this tag as a string."""
-        return self.decode(eventualEncoding=encoding)
-
-    BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
-                                           + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
-                                           + ")")
-
-    def _sub_entity(self, x):
-        """Used with a regular expression to substitute the
-        appropriate XML entity for an XML special character."""
-        return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";"
-
-    def __unicode__(self):
-        return self.decode()
-
-    def __str__(self):
-        return self.encode()
-
-    def encode(self, encoding=DEFAULT_OUTPUT_ENCODING,
-               prettyPrint=False, indentLevel=0):
-        return self.decode(prettyPrint, indentLevel, encoding).encode(encoding)
-
-    def decode(self, prettyPrint=False, indentLevel=0,
-               eventualEncoding=DEFAULT_OUTPUT_ENCODING):
-        """Returns a string or Unicode representation of this tag and
-        its contents. To get Unicode, pass None for encoding."""
-
-        attrs = []
-        if self.attrs:
-            for key, val in self.attrs:
-                fmt = '%s="%s"'
-                if isString(val):
-                    if (self.containsSubstitutions
-                        and eventualEncoding is not None
-                        and '%SOUP-ENCODING%' in val):
-                        val = self.substituteEncoding(val, eventualEncoding)
-
-                    # The attribute value either:
-                    #
-                    # * Contains no embedded double quotes or single quotes.
-                    #   No problem: we enclose it in double quotes.
-                    # * Contains embedded single quotes. No problem:
-                    #   double quotes work here too.
-                    # * Contains embedded double quotes. No problem:
-                    #   we enclose it in single quotes.
-                    # * Embeds both single _and_ double quotes. This
-                    #   can't happen naturally, but it can happen if
-                    #   you modify an attribute value after parsing
-                    #   the document. Now we have a bit of a
-                    #   problem. We solve it by enclosing the
-                    #   attribute in single quotes, and escaping any
-                    #   embedded single quotes to XML entities.
-                    if '"' in val:
-                        fmt = "%s='%s'"
-                        if "'" in val:
-                            # TODO: replace with apos when
-                            # appropriate.
-                            val = val.replace("'", "&squot;")
-
-                    # Now we're okay w/r/t quotes. But the attribute
-                    # value might also contain angle brackets, or
-                    # ampersands that aren't part of entities. We need
-                    # to escape those to XML entities too.
-                    val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val)
-                if val is None:
-                    # Handle boolean attributes.
-                    decoded = key
-                else:
-                    decoded = fmt % (key, val)
-                attrs.append(decoded)
-        close = ''
-        closeTag = ''
-        if self.isSelfClosing:
-            close = ' /'
-        else:
-            closeTag = '</%s>' % self.name
-
-        indentTag, indentContents = 0, 0
-        if prettyPrint:
-            indentTag = indentLevel
-            space = (' ' * (indentTag-1))
-            indentContents = indentTag + 1
-        contents = self.decodeContents(prettyPrint, indentContents,
-                                       eventualEncoding)
-        if self.hidden:
-            s = contents
-        else:
-            s = []
-            attributeString = ''
-            if attrs:
-                attributeString = ' ' + ' '.join(attrs)
-            if prettyPrint:
-                s.append(space)
-            s.append('<%s%s%s>' % (self.name, attributeString, close))
-            if prettyPrint:
-                s.append("\n")
-            s.append(contents)
-            if prettyPrint and contents and contents[-1] != "\n":
-                s.append("\n")
-            if prettyPrint and closeTag:
-                s.append(space)
-            s.append(closeTag)
-            if prettyPrint and closeTag and self.nextSibling:
-                s.append("\n")
-            s = ''.join(s)
-        return s
-
-    def decompose(self):
-        """Recursively destroys the contents of this tree."""
-        contents = [i for i in self.contents]
-        for i in contents:
-            if isinstance(i, Tag):
-                i.decompose()
-            else:
-                i.extract()
-        self.extract()
-
-    def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING):
-        return self.encode(encoding, True)
-
-    def encodeContents(self, encoding=DEFAULT_OUTPUT_ENCODING,
-                       prettyPrint=False, indentLevel=0):
-        return self.decodeContents(prettyPrint, indentLevel).encode(encoding)
-
-    def decodeContents(self, prettyPrint=False, indentLevel=0,
-                       eventualEncoding=DEFAULT_OUTPUT_ENCODING):
-        """Renders the contents of this tag as a string in the given
-        encoding. If encoding is None, returns a Unicode string.."""
-        s=[]
-        for c in self:
-            text = None
-            if isinstance(c, NavigableString):
-                text = c.decodeGivenEventualEncoding(eventualEncoding)
-            elif isinstance(c, Tag):
-                s.append(c.decode(prettyPrint, indentLevel, eventualEncoding))
-            if text and prettyPrint:
-                text = text.strip()
-            if text:
-                if prettyPrint:
-                    s.append(" " * (indentLevel-1))
-                s.append(text)
-                if prettyPrint:
-                    s.append("\n")
-        return ''.join(s)
-
-    #Soup methods
-
-    def find(self, name=None, attrs={}, recursive=True, text=None,
-             **kwargs):
-        """Return only the first child of this Tag matching the given
-        criteria."""
-        r = None
-        l = self.findAll(name, attrs, recursive, text, 1, **kwargs)
-        if l:
-            r = l[0]
-        return r
-    findChild = find
-
-    def findAll(self, name=None, attrs={}, recursive=True, text=None,
-                limit=None, **kwargs):
-        """Extracts a list of Tag objects that match the given
-        criteria.  You can specify the name of the Tag and any
-        attributes you want the Tag to have.
-
-        The value of a key-value pair in the 'attrs' map can be a
-        string, a list of strings, a regular expression object, or a
-        callable that takes a string and returns whether or not the
-        string matches for some custom definition of 'matches'. The
-        same is true of the tag name."""
-        generator = self.recursiveChildGenerator
-        if not recursive:
-            generator = self.childGenerator
-        return self._findAll(name, attrs, text, limit, generator, **kwargs)
-    findChildren = findAll
-
-    # Pre-3.x compatibility methods. Will go away in 4.0.
-    first = find
-    fetch = findAll
-
-    def fetchText(self, text=None, recursive=True, limit=None):
-        return self.findAll(text=text, recursive=recursive, limit=limit)
-
-    def firstText(self, text=None, recursive=True):
-        return self.find(text=text, recursive=recursive)
-
-    # 3.x compatibility methods. Will go away in 4.0.
-    def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING,
-                       prettyPrint=False, indentLevel=0):
-        if encoding is None:
-            return self.decodeContents(prettyPrint, indentLevel, encoding)
-        else:
-            return self.encodeContents(encoding, prettyPrint, indentLevel)
-
-
-    #Private methods
-
-    def _getAttrMap(self):
-        """Initializes a map representation of this tag's attributes,
-        if not already initialized."""
-        if not getattr(self, 'attrMap'):
-            self.attrMap = {}
-            for (key, value) in self.attrs:
-                self.attrMap[key] = value
-        return self.attrMap
-
-    #Generator methods
-    def recursiveChildGenerator(self):
-        if not len(self.contents):
-            raise StopIteration
-        stopNode = self._lastRecursiveChild().next
-        current = self.contents[0]
-        while current is not stopNode:
-            yield current
-            current = current.next
-
-    def childGenerator(self):
-        if not len(self.contents):
-            raise StopIteration
-        current = self.contents[0]
-        while current:
-            yield current
-            current = current.nextSibling
-        raise StopIteration
-
-# Next, a couple classes to represent queries and their results.
-class SoupStrainer:
-    """Encapsulates a number of ways of matching a markup element (tag or
-    text)."""
-
-    def __init__(self, name=None, attrs={}, text=None, **kwargs):
-        self.name = name
-        if isString(attrs):
-            kwargs['class'] = attrs
-            attrs = None
-        if kwargs:
-            if attrs:
-                attrs = attrs.copy()
-                attrs.update(kwargs)
-            else:
-                attrs = kwargs
-        self.attrs = attrs
-        self.text = text
-
-    def __str__(self):
-        if self.text:
-            return self.text
-        else:
-            return "%s|%s" % (self.name, self.attrs)
-
-    def searchTag(self, markupName=None, markupAttrs={}):
-        found = None
-        markup = None
-        if isinstance(markupName, Tag):
-            markup = markupName
-            markupAttrs = markup
-        callFunctionWithTagData = callable(self.name) \
-                                and not isinstance(markupName, Tag)
-
-        if (not self.name) \
-               or callFunctionWithTagData \
-               or (markup and self._matches(markup, self.name)) \
-               or (not markup and self._matches(markupName, self.name)):
-            if callFunctionWithTagData:
-                match = self.name(markupName, markupAttrs)
-            else:
-                match = True
-                markupAttrMap = None
-                for attr, matchAgainst in self.attrs.items():
-                    if not markupAttrMap:
-                         if hasattr(markupAttrs, 'get'):
-                            markupAttrMap = markupAttrs
-                         else:
-                            markupAttrMap = {}
-                            for k,v in markupAttrs:
-                                markupAttrMap[k] = v
-                    attrValue = markupAttrMap.get(attr)
-                    if not self._matches(attrValue, matchAgainst):
-                        match = False
-                        break
-            if match:
-                if markup:
-                    found = markup
-                else:
-                    found = markupName
-        return found
-
-    def search(self, markup):
-        #print 'looking for %s in %s' % (self, markup)
-        found = None
-        # If given a list of items, scan it for a text element that
-        # matches.
-        if isList(markup) and not isinstance(markup, Tag):
-            for element in markup:
-                if isinstance(element, NavigableString) \
-                       and self.search(element):
-                    found = element
-                    break
-        # If it's a Tag, make sure its name or attributes match.
-        # Don't bother with Tags if we're searching for text.
-        elif isinstance(markup, Tag):
-            if not self.text:
-                found = self.searchTag(markup)
-        # If it's text, make sure the text matches.
-        elif isinstance(markup, NavigableString) or \
-                 isString(markup):
-            if self._matches(markup, self.text):
-                found = markup
-        else:
-            raise Exception, "I don't know how to match against a %s" \
-                  % markup.__class__
-        return found
-
-    def _matches(self, markup, matchAgainst):
-        #print "Matching %s against %s" % (markup, matchAgainst)
-        result = False
-        if matchAgainst == True and type(matchAgainst) == types.BooleanType:
-            result = markup != None
-        elif callable(matchAgainst):
-            result = matchAgainst(markup)
-        else:
-            #Custom match methods take the tag as an argument, but all
-            #other ways of matching match the tag name as a string.
-            if isinstance(markup, Tag):
-                markup = markup.name
-            if markup is not None and not isString(markup):
-                markup = unicode(markup)
-            #Now we know that chunk is either a string, or None.
-            if hasattr(matchAgainst, 'match'):
-                # It's a regexp object.
-                result = markup and matchAgainst.search(markup)
-            elif (isList(matchAgainst)
-                  and (markup is not None or not isString(matchAgainst))):
-                result = markup in matchAgainst
-            elif hasattr(matchAgainst, 'items'):
-                result = markup.has_key(matchAgainst)
-            elif matchAgainst and isString(markup):
-                if isinstance(markup, unicode):
-                    matchAgainst = unicode(matchAgainst)
-                else:
-                    matchAgainst = str(matchAgainst)
-
-            if not result:
-                result = matchAgainst == markup
-        return result
-
-class ResultSet(list):
-    """A ResultSet is just a list that keeps track of the SoupStrainer
-    that created it."""
-    def __init__(self, source):
-        list.__init__([])
-        self.source = source
-
-# Now, some helper functions.
-
-def isList(l):
-    """Convenience method that works with all 2.x versions of Python
-    to determine whether or not something is listlike."""
-    return ((hasattr(l, '__iter__') and not isString(l))
-            or (type(l) in (types.ListType, types.TupleType)))
-
-def isString(s):
-    """Convenience method that works with all 2.x versions of Python
-    to determine whether or not something is stringlike."""
-    try:
-        return isinstance(s, unicode) or isinstance(s, basestring)
-    except NameError:
-        return isinstance(s, str)
-
-def buildTagMap(default, *args):
-    """Turns a list of maps, lists, or scalars into a single map.
-    Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and
-    NESTING_RESET_TAGS maps out of lists and partial maps."""
-    built = {}
-    for portion in args:
-        if hasattr(portion, 'items'):
-            #It's a map. Merge it.
-            for k,v in portion.items():
-                built[k] = v
-        elif isList(portion) and not isString(portion):
-            #It's a list. Map each item to the default.
-            for k in portion:
-                built[k] = default
-        else:
-            #It's a scalar. Map it to the default.
-            built[portion] = default
-    return built
-
-# Now, the parser classes.
-
-class HTMLParserBuilder(HTMLParser):
-
-    def __init__(self, soup):
-        HTMLParser.__init__(self)
-        self.soup = soup
-
-    # We inherit feed() and reset().
-
-    def handle_starttag(self, name, attrs):
-        if name == 'meta':
-            self.soup.extractCharsetFromMeta(attrs)
-        else:
-            self.soup.unknown_starttag(name, attrs)
-
-    def handle_endtag(self, name):
-        self.soup.unknown_endtag(name)
-
-    def handle_data(self, content):
-        self.soup.handle_data(content)
-
-    def _toStringSubclass(self, text, subclass):
-        """Adds a certain piece of text to the tree as a NavigableString
-        subclass."""
-        self.soup.endData()
-        self.handle_data(text)
-        self.soup.endData(subclass)
-
-    def handle_pi(self, text):
-        """Handle a processing instruction as a ProcessingInstruction
-        object, possibly one with a %SOUP-ENCODING% slot into which an
-        encoding will be plugged later."""
-        if text[:3] == "xml":
-            text = u"xml version='1.0' encoding='%SOUP-ENCODING%'"
-        self._toStringSubclass(text, ProcessingInstruction)
-
-    def handle_comment(self, text):
-        "Handle comments as Comment objects."
-        self._toStringSubclass(text, Comment)
-
-    def handle_charref(self, ref):
-        "Handle character references as data."
-        if self.soup.convertEntities:
-            data = unichr(int(ref))
-        else:
-            data = '&#%s;' % ref
-        self.handle_data(data)
-
-    def handle_entityref(self, ref):
-        """Handle entity references as data, possibly converting known
-        HTML and/or XML entity references to the corresponding Unicode
-        characters."""
-        data = None
-        if self.soup.convertHTMLEntities:
-            try:
-                data = unichr(name2codepoint[ref])
-            except KeyError:
-                pass
-
-        if not data and self.soup.convertXMLEntities:
-                data = self.soup.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref)
-
-        if not data and self.soup.convertHTMLEntities and \
-            not self.soup.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref):
-                # TODO: We've got a problem here. We're told this is
-                # an entity reference, but it's not an XML entity
-                # reference or an HTML entity reference. Nonetheless,
-                # the logical thing to do is to pass it through as an
-                # unrecognized entity reference.
-                #
-                # Except: when the input is "&carol;" this function
-                # will be called with input "carol". When the input is
-                # "AT&T", this function will be called with input
-                # "T". We have no way of knowing whether a semicolon
-                # was present originally, so we don't know whether
-                # this is an unknown entity or just a misplaced
-                # ampersand.
-                #
-                # The more common case is a misplaced ampersand, so I
-                # escape the ampersand and omit the trailing semicolon.
-                data = "&amp;%s" % ref
-        if not data:
-            # This case is different from the one above, because we
-            # haven't already gone through a supposedly comprehensive
-            # mapping of entities to Unicode characters. We might not
-            # have gone through any mapping at all. So the chances are
-            # very high that this is a real entity, and not a
-            # misplaced ampersand.
-            data = "&%s;" % ref
-        self.handle_data(data)
-
-    def handle_decl(self, data):
-        "Handle DOCTYPEs and the like as Declaration objects."
-        self._toStringSubclass(data, Declaration)
-
-    def parse_declaration(self, i):
-        """Treat a bogus SGML declaration as raw data. Treat a CDATA
-        declaration as a CData object."""
-        j = None
-        if self.rawdata[i:i+9] == '<![CDATA[':
-             k = self.rawdata.find(']]>', i)
-             if k == -1:
-                 k = len(self.rawdata)
-             data = self.rawdata[i+9:k]
-             j = k+3
-             self._toStringSubclass(data, CData)
-        else:
-            try:
-                j = HTMLParser.parse_declaration(self, i)
-            except HTMLParseError:
-                toHandle = self.rawdata[i:]
-                self.handle_data(toHandle)
-                j = i + len(toHandle)
-        return j
-
-
-class BeautifulStoneSoup(Tag):
-
-    """This class contains the basic parser and search code. It defines
-    a parser that knows nothing about tag behavior except for the
-    following:
-
-      You can't close a tag without closing all the tags it encloses.
-      That is, "<foo><bar></foo>" actually means
-      "<foo><bar></bar></foo>".
-
-    [Another possible explanation is "<foo><bar /></foo>", but since
-    this class defines no SELF_CLOSING_TAGS, it will never use that
-    explanation.]
-
-    This class is useful for parsing XML or made-up markup languages,
-    or when BeautifulSoup makes an assumption counter to what you were
-    expecting."""
-
-    SELF_CLOSING_TAGS = {}
-    NESTABLE_TAGS = {}
-    RESET_NESTING_TAGS = {}
-    QUOTE_TAGS = {}
-    PRESERVE_WHITESPACE_TAGS = []
-
-    MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'),
-                       lambda x: x.group(1) + ' />'),
-                      (re.compile('<!\s+([^<>]*)>'),
-                       lambda x: '<!' + x.group(1) + '>')
-                      ]
-
-    ROOT_TAG_NAME = u'[document]'
-
-    HTML_ENTITIES = "html"
-    XML_ENTITIES = "xml"
-    XHTML_ENTITIES = "xhtml"
-    # TODO: This only exists for backwards-compatibility
-    ALL_ENTITIES = XHTML_ENTITIES
-
-    # Used when determining whether a text node is all whitespace and
-    # can be replaced with a single space. A text node that contains
-    # fancy Unicode spaces (usually non-breaking) should be left
-    # alone.
-    STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, }
-
-    def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None,
-                 markupMassage=True, smartQuotesTo=XML_ENTITIES,
-                 convertEntities=None, selfClosingTags=None, isHTML=False,
-                 builder=HTMLParserBuilder):
-        """The Soup object is initialized as the 'root tag', and the
-        provided markup (which can be a string or a file-like object)
-        is fed into the underlying parser.
-
-        HTMLParser will process most bad HTML, and the BeautifulSoup
-        class has some tricks for dealing with some HTML that kills
-        HTMLParser, but Beautiful Soup can nonetheless choke or lose data
-        if your data uses self-closing tags or declarations
-        incorrectly.
-
-        By default, Beautiful Soup uses regexes to sanitize input,
-        avoiding the vast majority of these problems. If the problems
-        don't apply to you, pass in False for markupMassage, and
-        you'll get better performance.
-
-        The default parser massage techniques fix the two most common
-        instances of invalid HTML that choke HTMLParser:
-
-         <br/> (No space between name of closing tag and tag close)
-         <! --Comment--> (Extraneous whitespace in declaration)
-
-        You can pass in a custom list of (RE object, replace method)
-        tuples to get Beautiful Soup to scrub your input the way you
-        want."""
-
-        self.parseOnlyThese = parseOnlyThese
-        self.fromEncoding = fromEncoding
-        self.smartQuotesTo = smartQuotesTo
-        self.convertEntities = convertEntities
-        # Set the rules for how we'll deal with the entities we
-        # encounter
-        if self.convertEntities:
-            # It doesn't make sense to convert encoded characters to
-            # entities even while you're converting entities to Unicode.
-            # Just convert it all to Unicode.
-            self.smartQuotesTo = None
-            if convertEntities == self.HTML_ENTITIES:
-                self.convertXMLEntities = False
-                self.convertHTMLEntities = True
-                self.escapeUnrecognizedEntities = True
-            elif convertEntities == self.XHTML_ENTITIES:
-                self.convertXMLEntities = True
-                self.convertHTMLEntities = True
-                self.escapeUnrecognizedEntities = False
-            elif convertEntities == self.XML_ENTITIES:
-                self.convertXMLEntities = True
-                self.convertHTMLEntities = False
-                self.escapeUnrecognizedEntities = False
-        else:
-            self.convertXMLEntities = False
-            self.convertHTMLEntities = False
-            self.escapeUnrecognizedEntities = False
-
-        self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags)
-        self.builder = builder(self)
-        self.reset()
-
-        if hasattr(markup, 'read'):        # It's a file-type object.
-            markup = markup.read()
-        self.markup = markup
-        self.markupMassage = markupMassage
-        try:
-            self._feed(isHTML=isHTML)
-        except StopParsing:
-            pass
-        self.markup = None                 # The markup can now be GCed.
-        self.builder = None                # So can the builder.
-
-    def _feed(self, inDocumentEncoding=None, isHTML=False):
-        # Convert the document to Unicode.
-        markup = self.markup
-        if isinstance(markup, unicode):
-            if not hasattr(self, 'originalEncoding'):
-                self.originalEncoding = None
-        else:
-            dammit = UnicodeDammit\
-                     (markup, [self.fromEncoding, inDocumentEncoding],
-                      smartQuotesTo=self.smartQuotesTo, isHTML=isHTML)
-            markup = dammit.unicode
-            self.originalEncoding = dammit.originalEncoding
-            self.declaredHTMLEncoding = dammit.declaredHTMLEncoding
-        if markup:
-            if self.markupMassage:
-                if not isList(self.markupMassage):
-                    self.markupMassage = self.MARKUP_MASSAGE
-                for fix, m in self.markupMassage:
-                    markup = fix.sub(m, markup)
-                # TODO: We get rid of markupMassage so that the
-                # soup object can be deepcopied later on. Some
-                # Python installations can't copy regexes. If anyone
-                # was relying on the existence of markupMassage, this
-                # might cause problems.
-                del(self.markupMassage)
-        self.builder.reset()
-
-        self.builder.feed(markup)
-        # Close out any unfinished strings and close all the open tags.
-        self.endData()
-        while self.currentTag.name != self.ROOT_TAG_NAME:
-            self.popTag()
-
-    def isSelfClosingTag(self, name):
-        """Returns true iff the given string is the name of a
-        self-closing tag according to this parser."""
-        return self.SELF_CLOSING_TAGS.has_key(name) \
-               or self.instanceSelfClosingTags.has_key(name)
-
-    def reset(self):
-        Tag.__init__(self, self, self.ROOT_TAG_NAME)
-        self.hidden = 1
-        self.builder.reset()
-        self.currentData = []
-        self.currentTag = None
-        self.tagStack = []
-        self.quoteStack = []
-        self.pushTag(self)
-
-    def popTag(self):
-        tag = self.tagStack.pop()
-        # Tags with just one string-owning child get the child as a
-        # 'string' property, so that soup.tag.string is shorthand for
-        # soup.tag.contents[0]
-        if len(self.currentTag.contents) == 1 and \
-           isinstance(self.currentTag.contents[0], NavigableString):
-            self.currentTag.string = self.currentTag.contents[0]
-
-        #print "Pop", tag.name
-        if self.tagStack:
-            self.currentTag = self.tagStack[-1]
-        return self.currentTag
-
-    def pushTag(self, tag):
-        #print "Push", tag.name
-        if self.currentTag:
-            self.currentTag.contents.append(tag)
-        self.tagStack.append(tag)
-        self.currentTag = self.tagStack[-1]
-
-    def endData(self, containerClass=NavigableString):
-        if self.currentData:
-            currentData = u''.join(self.currentData)
-            if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and
-                not set([tag.name for tag in self.tagStack]).intersection(
-                    self.PRESERVE_WHITESPACE_TAGS)):
-                if '\n' in currentData:
-                    currentData = '\n'
-                else:
-                    currentData = ' '
-            self.currentData = []
-            if self.parseOnlyThese and len(self.tagStack) <= 1 and \
-                   (not self.parseOnlyThese.text or \
-                    not self.parseOnlyThese.search(currentData)):
-                return
-            o = containerClass(currentData)
-            o.setup(self.currentTag, self.previous)
-            if self.previous:
-                self.previous.next = o
-            self.previous = o
-            self.currentTag.contents.append(o)
-
-
-    def _popToTag(self, name, inclusivePop=True):
-        """Pops the tag stack up to and including the most recent
-        instance of the given tag. If inclusivePop is false, pops the tag
-        stack up to but *not* including the most recent instqance of
-        the given tag."""
-        #print "Popping to %s" % name
-        if name == self.ROOT_TAG_NAME:
-            return
-
-        numPops = 0
-        mostRecentTag = None
-        for i in range(len(self.tagStack)-1, 0, -1):
-            if name == self.tagStack[i].name:
-                numPops = len(self.tagStack)-i
-                break
-        if not inclusivePop:
-            numPops = numPops - 1
-
-        for i in range(0, numPops):
-            mostRecentTag = self.popTag()
-        return mostRecentTag
-
-    def _smartPop(self, name):
-
-        """We need to pop up to the previous tag of this type, unless
-        one of this tag's nesting reset triggers comes between this
-        tag and the previous tag of this type, OR unless this tag is a
-        generic nesting trigger and another generic nesting trigger
-        comes between this tag and the previous tag of this type.
-
-        Examples:
-         <p>Foo<b>Bar *<p>* should pop to 'p', not 'b'.
-         <p>Foo<table>Bar *<p>* should pop to 'table', not 'p'.
-         <p>Foo<table><tr>Bar *<p>* should pop to 'tr', not 'p'.
-
-         <li><ul><li> *<li>* should pop to 'ul', not the first 'li'.
-         <tr><table><tr> *<tr>* should pop to 'table', not the first 'tr'
-         <td><tr><td> *<td>* should pop to 'tr', not the first 'td'
-        """
-
-        nestingResetTriggers = self.NESTABLE_TAGS.get(name)
-        isNestable = nestingResetTriggers != None
-        isResetNesting = self.RESET_NESTING_TAGS.has_key(name)
-        popTo = None
-        inclusive = True
-        for i in range(len(self.tagStack)-1, 0, -1):
-            p = self.tagStack[i]
-            if (not p or p.name == name) and not isNestable:
-                #Non-nestable tags get popped to the top or to their
-                #last occurance.
-                popTo = name
-                break
-            if (nestingResetTriggers != None
-                and p.name in nestingResetTriggers) \
-                or (nestingResetTriggers == None and isResetNesting
-                    and self.RESET_NESTING_TAGS.has_key(p.name)):
-
-                #If we encounter one of the nesting reset triggers
-                #peculiar to this tag, or we encounter another tag
-                #that causes nesting to reset, pop up to but not
-                #including that tag.
-                popTo = p.name
-                inclusive = False
-                break
-            p = p.parent
-        if popTo:
-            self._popToTag(popTo, inclusive)
-
-    def unknown_starttag(self, name, attrs, selfClosing=0):
-        #print "Start tag %s: %s" % (name, attrs)
-        if self.quoteStack:
-            #This is not a real tag.
-            #print "<%s> is not real!" % name
-            attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs))
-            self.handle_data('<%s%s>' % (name, attrs))
-            return
-        self.endData()
-
-        if not self.isSelfClosingTag(name) and not selfClosing:
-            self._smartPop(name)
-
-        if self.parseOnlyThese and len(self.tagStack) <= 1 \
-               and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)):
-            return
-
-        tag = Tag(self, name, attrs, self.currentTag, self.previous)
-        if self.previous:
-            self.previous.next = tag
-        self.previous = tag
-        self.pushTag(tag)
-        if selfClosing or self.isSelfClosingTag(name):
-            self.popTag()
-        if name in self.QUOTE_TAGS:
-            #print "Beginning quote (%s)" % name
-            self.quoteStack.append(name)
-            self.literal = 1
-        return tag
-
-    def unknown_endtag(self, name):
-        #print "End tag %s" % name
-        if self.quoteStack and self.quoteStack[-1] != name:
-            #This is not a real end tag.
-            #print "</%s> is not real!" % name
-            self.handle_data('</%s>' % name)
-            return
-        self.endData()
-        self._popToTag(name)
-        if self.quoteStack and self.quoteStack[-1] == name:
-            self.quoteStack.pop()
-            self.literal = (len(self.quoteStack) > 0)
-
-    def handle_data(self, data):
-        self.currentData.append(data)
-
-    def extractCharsetFromMeta(self, attrs):
-        self.unknown_starttag('meta', attrs)
-
-
-class BeautifulSoup(BeautifulStoneSoup):
-
-    """This parser knows the following facts about HTML:
-
-    * Some tags have no closing tag and should be interpreted as being
-      closed as soon as they are encountered.
-
-    * The text inside some tags (ie. 'script') may contain tags which
-      are not really part of the document and which should be parsed
-      as text, not tags. If you want to parse the text as tags, you can
-      always fetch it and parse it explicitly.
-
-    * Tag nesting rules:
-
-      Most tags can't be nested at all. For instance, the occurance of
-      a <p> tag should implicitly close the previous <p> tag.
-
-       <p>Para1<p>Para2
-        should be transformed into:
-       <p>Para1</p><p>Para2
-
-      Some tags can be nested arbitrarily. For instance, the occurance
-      of a <blockquote> tag should _not_ implicitly close the previous
-      <blockquote> tag.
-
-       Alice said: <blockquote>Bob said: <blockquote>Blah
-        should NOT be transformed into:
-       Alice said: <blockquote>Bob said: </blockquote><blockquote>Blah
-
-      Some tags can be nested, but the nesting is reset by the
-      interposition of other tags. For instance, a <tr> tag should
-      implicitly close the previous <tr> tag within the same <table>,
-      but not close a <tr> tag in another table.
-
-       <table><tr>Blah<tr>Blah
-        should be transformed into:
-       <table><tr>Blah</tr><tr>Blah
-        but,
-       <tr>Blah<table><tr>Blah
-        should NOT be transformed into
-       <tr>Blah<table></tr><tr>Blah
-
-    Differing assumptions about tag nesting rules are a major source
-    of problems with the BeautifulSoup class. If BeautifulSoup is not
-    treating as nestable a tag your page author treats as nestable,
-    try ICantBelieveItsBeautifulSoup, MinimalSoup, or
-    BeautifulStoneSoup before writing your own subclass."""
-
-    def __init__(self, *args, **kwargs):
-        if not kwargs.has_key('smartQuotesTo'):
-            kwargs['smartQuotesTo'] = self.HTML_ENTITIES
-        kwargs['isHTML'] = True
-        BeautifulStoneSoup.__init__(self, *args, **kwargs)
-
-    SELF_CLOSING_TAGS = buildTagMap(None,
-                                    ['br' , 'hr', 'input', 'img', 'meta',
-                                    'spacer', 'link', 'frame', 'base'])
-
-    PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea'])
-
-    QUOTE_TAGS = {'script' : None, 'textarea' : None}
-
-    #According to the HTML standard, each of these inline tags can
-    #contain another tag of the same type. Furthermore, it's common
-    #to actually use these tags this way.
-    NESTABLE_INLINE_TAGS = ['span', 'font', 'q', 'object', 'bdo', 'sub', 'sup',
-                            'center']
-
-    #According to the HTML standard, these block tags can contain
-    #another tag of the same type. Furthermore, it's common
-    #to actually use these tags this way.
-    NESTABLE_BLOCK_TAGS = ['blockquote', 'div', 'fieldset', 'ins', 'del']
-
-    #Lists can contain other lists, but there are restrictions.
-    NESTABLE_LIST_TAGS = { 'ol' : [],
-                           'ul' : [],
-                           'li' : ['ul', 'ol'],
-                           'dl' : [],
-                           'dd' : ['dl'],
-                           'dt' : ['dl'] }
-
-    #Tables can contain other tables, but there are restrictions.
-    NESTABLE_TABLE_TAGS = {'table' : [],
-                           'tr' : ['table', 'tbody', 'tfoot', 'thead'],
-                           'td' : ['tr'],
-                           'th' : ['tr'],
-                           'thead' : ['table'],
-                           'tbody' : ['table'],
-                           'tfoot' : ['table'],
-                           }
-
-    NON_NESTABLE_BLOCK_TAGS = ['address', 'form', 'p', 'pre']
-
-    #If one of these tags is encountered, all tags up to the next tag of
-    #this type are popped.
-    RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript',
-                                     NON_NESTABLE_BLOCK_TAGS,
-                                     NESTABLE_LIST_TAGS,
-                                     NESTABLE_TABLE_TAGS)
-
-    NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS,
-                                NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS)
-
-    # Used to detect the charset in a META tag; see start_meta
-    CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M)
-
-    def extractCharsetFromMeta(self, attrs):
-        """Beautiful Soup can detect a charset included in a META tag,
-        try to convert the document to that charset, and re-parse the
-        document from the beginning."""
-        httpEquiv = None
-        contentType = None
-        contentTypeIndex = None
-        tagNeedsEncodingSubstitution = False
-
-        for i in range(0, len(attrs)):
-            key, value = attrs[i]
-            key = key.lower()
-            if key == 'http-equiv':
-                httpEquiv = value
-            elif key == 'content':
-                contentType = value
-                contentTypeIndex = i
-
-        if httpEquiv and contentType: # It's an interesting meta tag.
-            match = self.CHARSET_RE.search(contentType)
-            if match:
-                if (self.declaredHTMLEncoding is not None or
-                    self.originalEncoding == self.fromEncoding):
-                    # An HTML encoding was sniffed while converting
-                    # the document to Unicode, or an HTML encoding was
-                    # sniffed during a previous pass through the
-                    # document, or an encoding was specified
-                    # explicitly and it worked. Rewrite the meta tag.
-                    def rewrite(match):
-                        return match.group(1) + "%SOUP-ENCODING%"
-                    newAttr = self.CHARSET_RE.sub(rewrite, contentType)
-                    attrs[contentTypeIndex] = (attrs[contentTypeIndex][0],
-                                               newAttr)
-                    tagNeedsEncodingSubstitution = True
-                else:
-                    # This is our first pass through the document.
-                    # Go through it again with the encoding information.
-                    newCharset = match.group(3)
-                    if newCharset and newCharset != self.originalEncoding:
-                        self.declaredHTMLEncoding = newCharset
-                        self._feed(self.declaredHTMLEncoding)
-                        raise StopParsing
-                    pass
-        tag = self.unknown_starttag("meta", attrs)
-        if tag and tagNeedsEncodingSubstitution:
-            tag.containsSubstitutions = True
-
-
-class StopParsing(Exception):
-    pass
-
-class ICantBelieveItsBeautifulSoup(BeautifulSoup):
-
-    """The BeautifulSoup class is oriented towards skipping over
-    common HTML errors like unclosed tags. However, sometimes it makes
-    errors of its own. For instance, consider this fragment:
-
-     <b>Foo<b>Bar</b></b>
-
-    This is perfectly valid (if bizarre) HTML. However, the
-    BeautifulSoup class will implicitly close the first b tag when it
-    encounters the second 'b'. It will think the author wrote
-    "<b>Foo<b>Bar", and didn't close the first 'b' tag, because
-    there's no real-world reason to bold something that's already
-    bold. When it encounters '</b></b>' it will close two more 'b'
-    tags, for a grand total of three tags closed instead of two. This
-    can throw off the rest of your document structure. The same is
-    true of a number of other tags, listed below.
-
-    It's much more common for someone to forget to close a 'b' tag
-    than to actually use nested 'b' tags, and the BeautifulSoup class
-    handles the common case. This class handles the not-co-common
-    case: where you can't believe someone wrote what they did, but
-    it's valid HTML and BeautifulSoup screwed up by assuming it
-    wouldn't be."""
-
-    I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \
-     ['em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong',
-      'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b',
-      'big']
-
-    I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ['noscript']
-
-    NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS,
-                                I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS,
-                                I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS)
-
-class MinimalSoup(BeautifulSoup):
-    """The MinimalSoup class is for parsing HTML that contains
-    pathologically bad markup. It makes no assumptions about tag
-    nesting, but it does know which tags are self-closing, that
-    <script> tags contain Javascript and should not be parsed, that
-    META tags may contain encoding information, and so on.
-
-    This also makes it better for subclassing than BeautifulStoneSoup
-    or BeautifulSoup."""
-
-    RESET_NESTING_TAGS = buildTagMap('noscript')
-    NESTABLE_TAGS = {}
-
-class BeautifulSOAP(BeautifulStoneSoup):
-    """This class will push a tag with only a single string child into
-    the tag's parent as an attribute. The attribute's name is the tag
-    name, and the value is the string child. An example should give
-    the flavor of the change:
-
-    <foo><bar>baz</bar></foo>
-     =>
-    <foo bar="baz"><bar>baz</bar></foo>
-
-    You can then access fooTag['bar'] instead of fooTag.barTag.string.
-
-    This is, of course, useful for scraping structures that tend to
-    use subelements instead of attributes, such as SOAP messages. Note
-    that it modifies its input, so don't print the modified version
-    out.
-
-    I'm not sure how many people really want to use this class; let me
-    know if you do. Mainly I like the name."""
-
-    def popTag(self):
-        if len(self.tagStack) > 1:
-            tag = self.tagStack[-1]
-            parent = self.tagStack[-2]
-            parent._getAttrMap()
-            if (isinstance(tag, Tag) and len(tag.contents) == 1 and
-                isinstance(tag.contents[0], NavigableString) and
-                not parent.attrMap.has_key(tag.name)):
-                parent[tag.name] = tag.contents[0]
-        BeautifulStoneSoup.popTag(self)
-
-#Enterprise class names! It has come to our attention that some people
-#think the names of the Beautiful Soup parser classes are too silly
-#and "unprofessional" for use in enterprise screen-scraping. We feel
-#your pain! For such-minded folk, the Beautiful Soup Consortium And
-#All-Night Kosher Bakery recommends renaming this file to
-#"RobustParser.py" (or, in cases of extreme enterprisiness,
-#"RobustParserBeanInterface.class") and using the following
-#enterprise-friendly class aliases:
-class RobustXMLParser(BeautifulStoneSoup):
-    pass
-class RobustHTMLParser(BeautifulSoup):
-    pass
-class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup):
-    pass
-class RobustInsanelyWackAssHTMLParser(MinimalSoup):
-    pass
-class SimplifyingSOAPParser(BeautifulSOAP):
-    pass
-
-######################################################
-#
-# Bonus library: Unicode, Dammit
-#
-# This class forces XML data into a standard format (usually to UTF-8
-# or Unicode).  It is heavily based on code from Mark Pilgrim's
-# Universal Feed Parser. It does not rewrite the XML or HTML to
-# reflect a new encoding: that happens in BeautifulStoneSoup.handle_pi
-# (XML) and BeautifulSoup.start_meta (HTML).
-
-# Autodetects character encodings.
-# Download from http://chardet.feedparser.org/
-try:
-    import chardet
-#    import chardet.constants
-#    chardet.constants._debug = 1
-except ImportError:
-    chardet = None
-
-# cjkcodecs and iconv_codec make Python know about more character encodings.
-# Both are available from http://cjkpython.i18n.org/
-# They're built in if you use Python 2.4.
-try:
-    import cjkcodecs.aliases
-except ImportError:
-    pass
-try:
-    import iconv_codec
-except ImportError:
-    pass
-
-class UnicodeDammit:
-    """A class for detecting the encoding of a *ML document and
-    converting it to a Unicode string. If the source encoding is
-    windows-1252, can replace MS smart quotes with their HTML or XML
-    equivalents."""
-
-    # This dictionary maps commonly seen values for "charset" in HTML
-    # meta tags to the corresponding Python codec names. It only covers
-    # values that aren't in Python's aliases and can't be determined
-    # by the heuristics in find_codec.
-    CHARSET_ALIASES = { "macintosh" : "mac-roman",
-                        "x-sjis" : "shift-jis" }
-
-    def __init__(self, markup, overrideEncodings=[],
-                 smartQuotesTo='xml', isHTML=False):
-        self.declaredHTMLEncoding = None
-        self.markup, documentEncoding, sniffedEncoding = \
-                     self._detectEncoding(markup, isHTML)
-        self.smartQuotesTo = smartQuotesTo
-        self.triedEncodings = []
-        if markup == '' or isinstance(markup, unicode):
-            self.originalEncoding = None
-            self.unicode = unicode(markup)
-            return
-
-        u = None
-        for proposedEncoding in overrideEncodings:
-            u = self._convertFrom(proposedEncoding)
-            if u: break
-        if not u:
-            for proposedEncoding in (documentEncoding, sniffedEncoding):
-                u = self._convertFrom(proposedEncoding)
-                if u: break
-
-        # If no luck and we have auto-detection library, try that:
-        if not u and chardet and not isinstance(self.markup, unicode):
-            u = self._convertFrom(chardet.detect(self.markup)['encoding'])
-
-        # As a last resort, try utf-8 and windows-1252:
-        if not u:
-            for proposed_encoding in ("utf-8", "windows-1252"):
-                u = self._convertFrom(proposed_encoding)
-                if u: break
-
-        self.unicode = u
-        if not u: self.originalEncoding = None
-
-    def _subMSChar(self, match):
-        """Changes a MS smart quote character to an XML or HTML
-        entity."""
-        orig = match.group(1)
-        sub = self.MS_CHARS.get(orig)
-        if type(sub) == types.TupleType:
-            if self.smartQuotesTo == 'xml':
-                sub = '&#x'.encode() + sub[1].encode() + ';'.encode()
-            else:
-                sub = '&'.encode() + sub[0].encode() + ';'.encode()
-        else:
-            sub = sub.encode()
-        return sub
-
-    def _convertFrom(self, proposed):
-        proposed = self.find_codec(proposed)
-        if not proposed or proposed in self.triedEncodings:
-            return None
-        self.triedEncodings.append(proposed)
-        markup = self.markup
-
-        # Convert smart quotes to HTML if coming from an encoding
-        # that might have them.
-        if self.smartQuotesTo and proposed.lower() in("windows-1252",
-                                                      "iso-8859-1",
-                                                      "iso-8859-2"):
-            smart_quotes_re = "([\x80-\x9f])"
-            smart_quotes_compiled = re.compile(smart_quotes_re)
-            markup = smart_quotes_compiled.sub(self._subMSChar, markup)
-
-        try:
-            # print "Trying to convert document to %s" % proposed
-            u = self._toUnicode(markup, proposed)
-            self.markup = u
-            self.originalEncoding = proposed
-        except Exception, e:
-            # print "That didn't work!"
-            # print e
-            return None
-        #print "Correct encoding: %s" % proposed
-        return self.markup
-
-    def _toUnicode(self, data, encoding):
-        '''Given a string and its encoding, decodes the string into Unicode.
-        %encoding is a string recognized by encodings.aliases'''
-
-        # strip Byte Order Mark (if present)
-        if (len(data) >= 4) and (data[:2] == '\xfe\xff') \
-               and (data[2:4] != '\x00\x00'):
-            encoding = 'utf-16be'
-            data = data[2:]
-        elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \
-                 and (data[2:4] != '\x00\x00'):
-            encoding = 'utf-16le'
-            data = data[2:]
-        elif data[:3] == '\xef\xbb\xbf':
-            encoding = 'utf-8'
-            data = data[3:]
-        elif data[:4] == '\x00\x00\xfe\xff':
-            encoding = 'utf-32be'
-            data = data[4:]
-        elif data[:4] == '\xff\xfe\x00\x00':
-            encoding = 'utf-32le'
-            data = data[4:]
-        newdata = unicode(data, encoding)
-        return newdata
-
-    def _detectEncoding(self, xml_data, isHTML=False):
-        """Given a document, tries to detect its XML encoding."""
-        xml_encoding = sniffed_xml_encoding = None
-        try:
-            if xml_data[:4] == '\x4c\x6f\xa7\x94':
-                # EBCDIC
-                xml_data = self._ebcdic_to_ascii(xml_data)
-            elif xml_data[:4] == '\x00\x3c\x00\x3f':
-                # UTF-16BE
-                sniffed_xml_encoding = 'utf-16be'
-                xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')
-            elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \
-                     and (xml_data[2:4] != '\x00\x00'):
-                # UTF-16BE with BOM
-                sniffed_xml_encoding = 'utf-16be'
-                xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')
-            elif xml_data[:4] == '\x3c\x00\x3f\x00':
-                # UTF-16LE
-                sniffed_xml_encoding = 'utf-16le'
-                xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')
-            elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \
-                     (xml_data[2:4] != '\x00\x00'):
-                # UTF-16LE with BOM
-                sniffed_xml_encoding = 'utf-16le'
-                xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')
-            elif xml_data[:4] == '\x00\x00\x00\x3c':
-                # UTF-32BE
-                sniffed_xml_encoding = 'utf-32be'
-                xml_data = unicode(xml_data, 'utf-32be').encode('utf-8')
-            elif xml_data[:4] == '\x3c\x00\x00\x00':
-                # UTF-32LE
-                sniffed_xml_encoding = 'utf-32le'
-                xml_data = unicode(xml_data, 'utf-32le').encode('utf-8')
-            elif xml_data[:4] == '\x00\x00\xfe\xff':
-                # UTF-32BE with BOM
-                sniffed_xml_encoding = 'utf-32be'
-                xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8')
-            elif xml_data[:4] == '\xff\xfe\x00\x00':
-                # UTF-32LE with BOM
-                sniffed_xml_encoding = 'utf-32le'
-                xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8')
-            elif xml_data[:3] == '\xef\xbb\xbf':
-                # UTF-8 with BOM
-                sniffed_xml_encoding = 'utf-8'
-                xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8')
-            else:
-                sniffed_xml_encoding = 'ascii'
-                pass
-        except:
-            xml_encoding_match = None
-        xml_encoding_re = '^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode()
-        xml_encoding_match = re.compile(xml_encoding_re).match(xml_data)
-        if not xml_encoding_match and isHTML:
-            meta_re = '<\s*meta[^>]+charset=([^>]*?)[;\'">]'.encode()
-            regexp = re.compile(meta_re, re.I)
-            xml_encoding_match = regexp.search(xml_data)
-        if xml_encoding_match is not None:
-            xml_encoding = xml_encoding_match.groups()[0].decode(
-                'ascii').lower()
-            if isHTML:
-                self.declaredHTMLEncoding = xml_encoding
-            if sniffed_xml_encoding and \
-               (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode',
-                                 'iso-10646-ucs-4', 'ucs-4', 'csucs4',
-                                 'utf-16', 'utf-32', 'utf_16', 'utf_32',
-                                 'utf16', 'u16')):
-                xml_encoding = sniffed_xml_encoding
-        return xml_data, xml_encoding, sniffed_xml_encoding
-
-
-    def find_codec(self, charset):
-        return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \
-               or (charset and self._codec(charset.replace("-", ""))) \
-               or (charset and self._codec(charset.replace("-", "_"))) \
-               or charset
-
-    def _codec(self, charset):
-        if not charset: return charset
-        codec = None
-        try:
-            codecs.lookup(charset)
-            codec = charset
-        except (LookupError, ValueError):
-            pass
-        return codec
-
-    EBCDIC_TO_ASCII_MAP = None
-    def _ebcdic_to_ascii(self, s):
-        c = self.__class__
-        if not c.EBCDIC_TO_ASCII_MAP:
-            emap = (0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15,
-                    16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31,
-                    128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7,
-                    144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26,
-                    32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33,
-                    38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94,
-                    45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63,
-                    186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34,
-                    195,97,98,99,100,101,102,103,104,105,196,197,198,199,200,
-                    201,202,106,107,108,109,110,111,112,113,114,203,204,205,
-                    206,207,208,209,126,115,116,117,118,119,120,121,122,210,
-                    211,212,213,214,215,216,217,218,219,220,221,222,223,224,
-                    225,226,227,228,229,230,231,123,65,66,67,68,69,70,71,72,
-                    73,232,233,234,235,236,237,125,74,75,76,77,78,79,80,81,
-                    82,238,239,240,241,242,243,92,159,83,84,85,86,87,88,89,
-                    90,244,245,246,247,248,249,48,49,50,51,52,53,54,55,56,57,
-                    250,251,252,253,254,255)
-            import string
-            c.EBCDIC_TO_ASCII_MAP = string.maketrans( \
-            ''.join(map(chr, range(256))), ''.join(map(chr, emap)))
-        return s.translate(c.EBCDIC_TO_ASCII_MAP)
-
-    MS_CHARS = { '\x80' : ('euro', '20AC'),
-                 '\x81' : ' ',
-                 '\x82' : ('sbquo', '201A'),
-                 '\x83' : ('fnof', '192'),
-                 '\x84' : ('bdquo', '201E'),
-                 '\x85' : ('hellip', '2026'),
-                 '\x86' : ('dagger', '2020'),
-                 '\x87' : ('Dagger', '2021'),
-                 '\x88' : ('circ', '2C6'),
-                 '\x89' : ('permil', '2030'),
-                 '\x8A' : ('Scaron', '160'),
-                 '\x8B' : ('lsaquo', '2039'),
-                 '\x8C' : ('OElig', '152'),
-                 '\x8D' : '?',
-                 '\x8E' : ('#x17D', '17D'),
-                 '\x8F' : '?',
-                 '\x90' : '?',
-                 '\x91' : ('lsquo', '2018'),
-                 '\x92' : ('rsquo', '2019'),
-                 '\x93' : ('ldquo', '201C'),
-                 '\x94' : ('rdquo', '201D'),
-                 '\x95' : ('bull', '2022'),
-                 '\x96' : ('ndash', '2013'),
-                 '\x97' : ('mdash', '2014'),
-                 '\x98' : ('tilde', '2DC'),
-                 '\x99' : ('trade', '2122'),
-                 '\x9a' : ('scaron', '161'),
-                 '\x9b' : ('rsaquo', '203A'),
-                 '\x9c' : ('oelig', '153'),
-                 '\x9d' : '?',
-                 '\x9e' : ('#x17E', '17E'),
-                 '\x9f' : ('Yuml', ''),}
-
-#######################################################################
-
-
-#By default, act as an HTML pretty-printer.
-if __name__ == '__main__':
-    import sys
-    soup = BeautifulSoup(sys.stdin)
-    print soup.prettify()
diff --git a/readability/__init__.py b/readability/__init__.py
index f554b393..b36f021d 100644
--- a/readability/__init__.py
+++ b/readability/__init__.py
@@ -1,2 +1,3 @@
-from readability import Document, main
-from page_parser import ascii, Unparseable
+__version__ = "0.8.4.1"
+
+from .readability import Document
diff --git a/readability/browser.py b/readability/browser.py
new file mode 100644
index 00000000..42117a5a
--- /dev/null
+++ b/readability/browser.py
@@ -0,0 +1,21 @@
+def open_in_browser(html):
+    """
+    Open the HTML document in a web browser, saving it to a temporary
+    file to open it.  Note that this does not delete the file after
+    use.  This is mainly meant for debugging.
+    """
+    import os
+    import webbrowser
+    import tempfile
+
+    handle, fn = tempfile.mkstemp(suffix=".html")
+    f = os.fdopen(handle, "wb")
+    try:
+        f.write(b"<meta charset='UTF-8' />")
+        f.write(html.encode("utf-8"))
+    finally:
+        # we leak the file itself here, but we should at least close it
+        f.close()
+    url = "file://" + fn.replace(os.path.sep, "/")
+    webbrowser.open(url)
+    return url
diff --git a/readability/cleaners.py b/readability/cleaners.py
new file mode 100644
index 00000000..e0b07260
--- /dev/null
+++ b/readability/cleaners.py
@@ -0,0 +1,55 @@
+# strip out a set of nuisance html attributes that can mess up rendering in RSS feeds
+import re
+try:
+    from lxml.html.clean import Cleaner
+except ImportError:
+    from lxml_html_clean import Cleaner
+
+bad_attrs = ["width", "height", "style", "[-a-z]*color", "background[-a-z]*", "on*"]
+single_quoted = "'[^']+'"
+double_quoted = '"[^"]+"'
+non_space = "[^ \"'>]+"
+htmlstrip = re.compile(
+    "<"  # open
+    "([^>]+) "  # prefix
+    "(?:%s) *" % ("|".join(bad_attrs),)
+    + "= *(?:%s|%s|%s)"  # undesirable attributes
+    % (non_space, single_quoted, double_quoted)
+    + "([^>]*)"  # value  # postfix
+    ">",  # end
+    re.I,
+)
+
+
+def clean_attributes(html):
+    while htmlstrip.search(html):
+        html = htmlstrip.sub("<\\1\\2>", html)
+    return html
+
+
+def normalize_spaces(s):
+    if not s:
+        return ""
+    """replace any sequence of whitespace
+    characters with a single space"""
+    return " ".join(s.split())
+
+
+html_cleaner = Cleaner(
+    scripts=True,
+    javascript=True,
+    comments=True,
+    style=True,
+    links=True,
+    meta=False,
+    add_nofollow=False,
+    page_structure=False,
+    processing_instructions=True,
+    embedded=False,
+    frames=False,
+    forms=False,
+    annoying_tags=False,
+    remove_tags=None,
+    remove_unknown_tags=False,
+    safe_attrs_only=False,
+)
diff --git a/readability/debug.py b/readability/debug.py
new file mode 100644
index 00000000..3bc81974
--- /dev/null
+++ b/readability/debug.py
@@ -0,0 +1,51 @@
+import re
+
+
+# FIXME: use with caution, can leak memory
+uids = {}
+uids_document = None
+
+
+def describe_node(node):
+    global uids
+    if node is None:
+        return ""
+    if not hasattr(node, "tag"):
+        return "[%s]" % type(node)
+    name = node.tag
+    if node.get("id", ""):
+        name += "#" + node.get("id")
+    if node.get("class", "").strip():
+        name += "." + ".".join(node.get("class").split())
+    if name[:4] in ["div#", "div."]:
+        name = name[3:]
+    if name in ["tr", "td", "div", "p"]:
+        uid = uids.get(node)
+        if uid is None:
+            uid = uids[node] = len(uids) + 1
+        name += "{%02d}" % uid
+    return name
+
+
+def describe(node, depth=1):
+    global uids, uids_document
+    doc = node.getroottree().getroot()
+    if doc != uids_document:
+        uids = {}
+        uids_document = doc
+
+    # return repr(NodeRepr(node))
+    parent = ""
+    if depth and node.getparent() is not None:
+        parent = describe(node.getparent(), depth=depth - 1) + ">"
+    return parent + describe_node(node)
+
+
+RE_COLLAPSE_WHITESPACES = re.compile(r"\s+", re.U)
+
+
+def text_content(elem, length=40):
+    content = RE_COLLAPSE_WHITESPACES.sub(" ", elem.text_content().replace("\r", ""))
+    if len(content) < length:
+        return content
+    return content[:length] + "..."
diff --git a/readability/encoding.py b/readability/encoding.py
new file mode 100644
index 00000000..08332df0
--- /dev/null
+++ b/readability/encoding.py
@@ -0,0 +1,64 @@
+import re
+try:
+    import cchardet as chardet
+except ImportError:
+    import chardet
+
+
+RE_CHARSET = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
+RE_PRAGMA = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
+RE_XML = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')
+
+CHARSETS = {
+    "big5": "big5hkscs",
+    "gb2312": "gb18030",
+    "ascii": "utf-8",
+    "maccyrillic": "cp1251",
+    "win1251": "cp1251",
+    "win-1251": "cp1251",
+    "windows-1251": "cp1251",
+}
+
+
+def fix_charset(encoding):
+    """Overrides encoding when charset declaration
+       or charset determination is a subset of a larger
+       charset.  Created because of issues with Chinese websites"""
+    encoding = encoding.lower()
+    return CHARSETS.get(encoding, encoding)
+
+
+def get_encoding(page):
+    # Regex for XML and HTML Meta charset declaration
+    declared_encodings = (
+        RE_CHARSET.findall(page) + RE_PRAGMA.findall(page) + RE_XML.findall(page)
+    )
+
+    # Try any declared encodings
+    for declared_encoding in declared_encodings:
+        try:
+            # Python3 only
+            # declared_encoding will actually be bytes but .decode() only
+            # accepts `str` type. Decode blindly with ascii because no one should
+            # ever use non-ascii characters in the name of an encoding.
+            declared_encoding = declared_encoding.decode("ascii", "replace")
+
+            encoding = fix_charset(declared_encoding)
+            # Now let's decode the page
+            page.decode(encoding)
+            # It worked!
+            return encoding
+        except UnicodeDecodeError:
+            pass
+
+    # Fallback to chardet if declared encodings fail
+    # Remove all HTML tags, and leave only text for chardet
+    text = re.sub(r'(\s*</?[^>]*>)+\s*', ' ', page).strip()
+    enc = 'utf-8'
+    if len(text) < 10:
+        return enc  # can't guess
+    res = chardet.detect(text)
+    enc = res["encoding"] or "utf-8"
+    # print '->', enc, "%.2f" % res['confidence']
+    enc = fix_charset(enc)
+    return enc
diff --git a/readability/htmls.py b/readability/htmls.py
new file mode 100644
index 00000000..d99a9f53
--- /dev/null
+++ b/readability/htmls.py
@@ -0,0 +1,161 @@
+from lxml.html import tostring
+import lxml.html
+import re
+
+from .cleaners import normalize_spaces, clean_attributes
+from .encoding import get_encoding
+
+utf8_parser = lxml.html.HTMLParser(encoding="utf-8")
+
+
+def build_doc(page):
+    if isinstance(page, str):
+        encoding = None
+        decoded_page = page
+    else:
+        encoding = get_encoding(page) or "utf-8"
+        decoded_page = page.decode(encoding, "replace")
+
+    # XXX: we have to do .decode and .encode even for utf-8 pages to remove bad characters
+    doc = lxml.html.document_fromstring(
+        decoded_page.encode("utf-8", "replace"), parser=utf8_parser
+    )
+    return doc, encoding
+
+
+def js_re(src, pattern, flags, repl):
+    return re.compile(pattern, flags).sub(src, repl.replace("$", "\\"))
+
+
+def normalize_entities(cur_title):
+    entities = {
+        "\u2014": "-",
+        "\u2013": "-",
+        "&mdash;": "-",
+        "&ndash;": "-",
+        "\u00A0": " ",
+        "\u00AB": '"',
+        "\u00BB": '"',
+        "&quot;": '"',
+    }
+    for c, r in entities.items():
+        if c in cur_title:
+            cur_title = cur_title.replace(c, r)
+
+    return cur_title
+
+
+def norm_title(title):
+    return normalize_entities(normalize_spaces(title))
+
+
+def get_title(doc):
+    title = doc.find(".//title")
+    if title is None or title.text is None or len(title.text) == 0:
+        return "[no-title]"
+
+    return norm_title(title.text)
+
+
+def get_author(doc):
+    author = doc.find(".//meta[@name='author']")
+    if author is None or 'content' not in author.keys() or \
+       len(author.get('content')) == 0:
+        return "[no-author]"
+
+    return author.get('content')
+
+
+def add_match(collection, text, orig):
+    text = norm_title(text)
+    if len(text.split()) >= 2 and len(text) >= 15:
+        if text.replace('"', "") in orig.replace('"', ""):
+            collection.add(text)
+
+
+TITLE_CSS_HEURISTICS = [
+    "#title",
+    "#head",
+    "#heading",
+    ".pageTitle",
+    ".news_title",
+    ".title",
+    ".head",
+    ".heading",
+    ".contentheading",
+    ".small_header_red",
+]
+
+
+def shorten_title(doc):
+    title = doc.find(".//title")
+    if title is None or title.text is None or len(title.text) == 0:
+        return ""
+
+    title = orig = norm_title(title.text)
+
+    candidates = set()
+
+    for item in [".//h1", ".//h2", ".//h3"]:
+        for e in list(doc.iterfind(item)):
+            if e.text:
+                add_match(candidates, e.text, orig)
+            if e.text_content():
+                add_match(candidates, e.text_content(), orig)
+
+    for item in TITLE_CSS_HEURISTICS:
+        for e in doc.cssselect(item):
+            if e.text:
+                add_match(candidates, e.text, orig)
+            if e.text_content():
+                add_match(candidates, e.text_content(), orig)
+
+    cjk = re.compile('[\u4e00-\u9fff]+')
+
+    if candidates:
+        title = sorted(candidates, key=len)[-1]
+    else:
+        for delimiter in [" | ", " - ", " :: ", " / "]:
+            if delimiter in title:
+                parts = orig.split(delimiter)
+                p0 = parts[0]
+                pl = parts[-1]
+                if (len(p0.split()) >= 4) or (len(p0) >= 4 and cjk.search(p0)):
+                    title = p0
+                    break
+                elif (len(pl.split()) >= 4) or (len(pl) >= 4 and cjk.search(pl)):
+                    title = pl
+                    break
+        else:
+            if ": " in title:
+                p1 = orig.split(": ")[-1]
+                if (len(p1.split()) >= 4) or (len(p1) >= 4 and cjk.search(p1)):
+                    title = p1
+                else:
+                    title = orig.split(": ", 1)[1]
+
+    if cjk.search(title):
+        if not (4 <= len(title) < 100):  # Allow length >= 4, cap at 100
+            return orig
+    elif not 15 < len(title) < 150:
+        return orig
+
+    return title
+
+
+# is it necessary? Cleaner from LXML is initialized correctly in cleaners.py
+def get_body(doc):
+    for elem in doc.xpath(".//script | .//link | .//style"):
+        elem.drop_tree()
+    # tostring() always return utf-8 encoded string
+    # FIXME: isn't better to use tounicode?
+    raw_html = tostring(doc.body or doc)
+    if isinstance(raw_html, bytes):
+        raw_html = raw_html.decode()
+    cleaned = clean_attributes(raw_html)
+    try:
+        # BeautifulSoup(cleaned) #FIXME do we really need to try loading it?
+        return cleaned
+    except Exception:  # FIXME find the equivalent lxml error
+        # logging.error("cleansing broke html content: %s\n---------\n%s" % (raw_html, cleaned))
+        return raw_html
diff --git a/readability/page_parser.py b/readability/page_parser.py
deleted file mode 100644
index 1c80ca70..00000000
--- a/readability/page_parser.py
+++ /dev/null
@@ -1,145 +0,0 @@
-import re
-from url_helpers import absolute_url
-from BeautifulSoup import BeautifulSoup, HTMLParseError, UnicodeDammit
-from logging import error
-
-__all__ = [
-	'Unparseable',
-	'parse',
-	'get_title',
-	'get_body',
-	'ascii']
-
-def debug(s): pass
-
-class Unparseable(ValueError):
-	pass
-
-def parse(raw_content, base_href=None, notify=lambda x: None):
-	for parse_method in _parse_methods():
-		try:
-			return parse_method(raw_content, base_href)
-		except HTMLParseError, e:
-			notify("parsing (%s) failed: %s" % (parse_method.__name__, e))
-			continue
-	raise Unparseable()
-
-def get_title(soup):
-	title = unicode(getattr(soup.title, 'string', ''))
-	if not title:
-		return None
-	return normalize_spaces(title)
-
-
-def get_body(soup):
-	[ elem.extract() for elem in soup.findAll(['script', 'link', 'style']) ]
-	raw_html = unicode(soup.body or soup)
-	cleaned = clean_attributes(raw_html)
-	try:
-		BeautifulSoup(cleaned)
-		return cleaned
-	except HTMLParseError:
-		error("cleansing broke html content: %s\n---------\n%s" % (raw_html,cleaned))
-		return raw_html
-
-def ascii(s):
-	return s.decode('ascii', 'ignore')
-
-class Replacement(object):
-	def __init__(self, desc, regex, replacement):
-		self.desc = desc
-		self.regex = regex
-		self.replacement = replacement
-	
-	def apply(self, content):
-#		# useful for debugging:
-#		try:
-#			print self. desc + ':' + str(self.regex.findall(content))
-#		except RuntimeError: pass
-		return self.regex.sub(self.replacement, content)
-
-def beautiful_soup(content, base_href):
-	soup = BeautifulSoup(content)
-	if base_href:
-		_fix_references(soup, base_href)
-	return soup
-
-
-def _make_absolute_links(soup, base_href):
-	for link in soup.findAll('a', attrs={'href':True}):
-		link['href'] = absolute_url(http://github.com/link['href'],%20base_href)
-
-def _make_absolute_images(soup, base_href):
-	for img in soup.findAll('img', attrs={'src':True}):
-		img['src'] = absolute_url(http://github.com/img['src'],%20base_href)
-
-def _fix_references(soup, base_href):
-	_make_absolute_links(soup, base_href)
-	_make_absolute_images(soup, base_href)
-
-# a bunch of regexes to hack around lousy html
-dodgy_regexes = (
-	Replacement('javascript',
-		regex=re.compile('<script.*?</script[^>]*>', re.DOTALL | re.IGNORECASE),
-		replacement=''),
-
-	Replacement('double double-quoted attributes',
-		regex=re.compile('(="[^"]+")"+'),
-		replacement='\\1'),
-
-	Replacement('unclosed tags',
-		regex = re.compile('(<[a-zA-Z]+[^>]*)(<[a-zA-Z]+[^<>]*>)'),
-		replacement='\\1>\\2'),
-
-	Replacement('unclosed (numerical) attribute values',
-		regex = re.compile('(<[^>]*[a-zA-Z]+\s*=\s*"[0-9]+)( [a-zA-Z]+="\w+"|/?>)'),
-		replacement='\\1"\\2'),
-	)
-	
-
-# helpers for parsing
-def normalize_spaces(s):
-	"""replace any sequence of whitespace
-	characters with a single space"""
-	return ' '.join(s.split())
-
-def _remove_crufty_html(content):
-	for replacement in dodgy_regexes:
-		content = replacement.apply(content)
-	return content
-
-def _parse_methods():
-	def unicode_cleansed(content, base_href):
-		content = UnicodeDammit(content, isHTML=True).markup
-		cleaned = _remove_crufty_html(content)
-		debug("Cleaned content: %s" % (cleaned,))
-		return beautiful_soup(cleaned, base_href)
-
-	def ascii_cleansed(content, base_href):
-		content = ascii(content)
-		cleaned = _remove_crufty_html(content)
-		debug("Cleaned content: %s" % (cleaned,))
-		return beautiful_soup(cleaned, base_href)
-
-	return (
-		beautiful_soup,
-		unicode_cleansed,
-		ascii_cleansed)
-
-# strip out a set of nuisance html attributes that can mess up rendering in RSS feeds
-bad_attrs = ['width','height','style','[-a-z]*color','background[-a-z]*']
-single_quoted = "'[^']+'"
-double_quoted = '"[^"]+"'
-non_space = '[^ "\'>]+'
-htmlstrip = re.compile("<" # open
-	"([^>]+) " # prefix
-	"(?:%s) *" % ('|'.join(bad_attrs),) + # undesirable attributes
-	'= *(?:%s|%s|%s)' % (non_space, single_quoted, double_quoted) + # value
-	"([^>]*)"  # postfix
-	">"        # end
-, re.I)
-def clean_attributes(html):
-	while htmlstrip.search(html):
-		html = htmlstrip.sub('<\\1\\2>', html)
-	return html
-
diff --git a/readability/readability.py b/readability/readability.py
index 9c5f5c6c..c5739056 100755
--- a/readability/readability.py
+++ b/readability/readability.py
@@ -1,343 +1,747 @@
 #!/usr/bin/env python
-from BeautifulSoup import NavigableString
-from page_parser import parse, get_title, get_body, Unparseable
 import logging
 import re
-
-REGEXES = { 'unlikelyCandidatesRe': re.compile('combx|comment|disqus|foot|header|menu|meta|nav|rss|shoutbox|sidebar|sponsor',re.I),
-	'okMaybeItsACandidateRe': re.compile('and|article|body|column|main',re.I),
-	'positiveRe': re.compile('article|body|content|entry|hentry|page|pagination|post|text',re.I),
-	'negativeRe': re.compile('combx|comment|contact|foot|footer|footnote|link|media|meta|promo|related|scroll|shoutbox|sponsor|tags|widget',re.I),
-	'divToPElementsRe': re.compile('<(a|blockquote|dl|div|img|ol|p|pre|table|ul)',re.I),
-	'replaceBrsRe': re.compile('(<br[^>]*>[ \n\r\t]*){2,}',re.I),
-	'replaceFontsRe': re.compile('<(\/?)font[^>]*>',re.I),
-	'trimRe': re.compile('^\s+|\s+$/'),
-	'normalizeRe': re.compile('\s{2,}/'),
-	'killBreaksRe': re.compile('(<br\s*\/?>(\s|&nbsp;?)*){1,}/'),
-	'videoRe': re.compile('http:\/\/(www\.)?(youtube|vimeo)\.com', re.I),
+import sys
+import urllib.request
+import urllib.parse
+import urllib.error
+
+from lxml.etree import tostring
+from lxml.etree import tounicode
+from lxml.etree import _ElementTree
+from lxml.html import document_fromstring
+from lxml.html import fragment_fromstring
+from lxml.html import HtmlElement
+
+from .cleaners import clean_attributes
+from .cleaners import html_cleaner
+from .htmls import build_doc
+from .htmls import get_body
+from .htmls import get_title
+from .htmls import get_author
+from .htmls import shorten_title
+from .debug import describe, text_content
+
+
+log = logging.getLogger("readability.readability")
+
+REGEXES = {
+    "unlikelyCandidatesRe": re.compile(
+        r"combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter",
+        re.I,
+    ),
+    "okMaybeItsACandidateRe": re.compile(r"and|article|body|column|main|shadow", re.I),
+    "positiveRe": re.compile(
+        r"article|body|content|entry|hentry|main|page|pagination|post|text|blog|story",
+        re.I,
+    ),
+    "negativeRe": re.compile(
+        r"combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget",
+        re.I,
+    ),
+    "divToPElementsRe": re.compile(
+        r"<(a|blockquote|dl|div|img|ol|p|pre|table|ul)", re.I
+    ),
+    # 'replaceBrsRe': re.compile(r'(<br[^>]*>[ \n\r\t]*){2,}',re.I),
+    # 'replaceFontsRe': re.compile(r'<(\/?)font[^>]*>',re.I),
+    # 'trimRe': re.compile(r'^\s+|\s+$/'),
+    # 'normalizeRe': re.compile(r'\s{2,}/'),
+    # 'killBreaksRe': re.compile(r'(<br\s*\/?>(\s|&nbsp;?)*){1,}/'),
+    "videoRe": re.compile(r"https?:\/\/(www\.)?(youtube|vimeo)\.com", re.I),
+    # skipFootnoteLink:      /^\s*(\[?[a-z0-9]{1,2}\]?|^|edit|citation needed)\s*$/i,
 }
 
-from collections import defaultdict
-def describe(node):
-	if not hasattr(node, 'name'):
-		return "[text]"
-	return "%s#%s.%s" % (
-		node.name, node.get('id', ''), node.get('class',''))
 
-def _text(node):
-	return " ".join(node.findAll(text=True))
+class Unparseable(ValueError):
+    pass
+
+
+def to_int(x):
+    if not x:
+        return None
+    x = x.strip()
+    if x.endswith("px"):
+        return int(x[:-2])
+    if x.endswith("em"):
+        return int(x[:-2]) * 12
+    return int(x)
+
+
+def clean(text):
+    # Many spaces make the following regexes run forever
+    text = re.sub(r"\s{255,}", " " * 255, text)
+    text = re.sub(r"\s*\n\s*", "\n", text)
+    text = re.sub(r"\t|[ \t]{2,}", " ", text)
+    return text.strip()
+
+
+def text_length(i):
+    return len(clean(i.text_content() or ""))
+
+
+def compile_pattern(elements):
+    if not elements:
+        return None
+    elif isinstance(elements, re.Pattern):
+        return elements
+    elif isinstance(elements, (str, bytes)):
+        if isinstance(elements, bytes):
+            elements = str(elements, "utf-8")
+        elements = elements.split(",")
+    if isinstance(elements, (list, tuple)):
+        return re.compile("|".join([re.escape(x.strip()) for x in elements]), re.U)
+    else:
+        raise Exception(f"Unknown type for the pattern: {type(elements)}")
+        # assume string or string like object
+
 
 class Document:
-	TEXT_LENGTH_THRESHOLD = 25
-	RETRY_LENGTH = 250
-
-	def __init__(self, input, notify=None, **options):
-		self.input = input
-		self.options = defaultdict(lambda: None)
-		for k, v in options.items():
-			self.options[k] = v
-		self.notify = notify or logging.info
-		self.html = None
-
-	def _html(self, force=False):
-		if force or self.html is None:
-			self.html = parse(self.input, self.options['url'], notify=self.notify)
-		return self.html
-	
-	def content(self):
-		return get_body(self._html())
-	
-	def title(self):
-		return get_title(self._html())
-
-	def summary(self):
-		try:
-			ruthless = True
-			while True:
-				self._html(True)
-				[i.extract() for i in self.tags(self.html, 'script', 'style')]
-
-				if ruthless: self.remove_unlikely_candidates()
-				self.transform_misused_divs_into_paragraphs()
-				candidates = self.score_paragraphs(self.options.get('min_text_length', self.TEXT_LENGTH_THRESHOLD))
-				best_candidate = self.select_best_candidate(candidates)
-				if best_candidate:
-					article = self.get_article(candidates, best_candidate)
-				else:
-					if ruthless:
-						ruthless = False
-						self.debug("ended up stripping too much - going for a safer parse")
-						# try again
-						continue
-					else:
-						article = self.html.find('body') or self.html
-
-				cleaned_article = self.sanitize(article, candidates)
-				of_acceptable_length = len(cleaned_article or '') >= (self.options['retry_length'] or self.RETRY_LENGTH)
-				if ruthless and not of_acceptable_length:
-					ruthless = False
-					continue # try again
-				else:
-					return cleaned_article
-		except StandardError, e:
-			logging.exception('error getting summary:')
-			raise Unparseable(str(e))
-
-	def get_article(self, candidates, best_candidate):
-		# Now that we have the top candidate, look through its siblings for content that might also be related.
-		# Things like preambles, content split by ads that we removed, etc.
-
-		sibling_score_threshold = max([10, best_candidate['content_score'] * 0.2])
-		output = parse("<div/>")
-		for sibling in best_candidate['elem'].parent.contents:
-			if isinstance(sibling, NavigableString): continue
-			append = False
-			if sibling is best_candidate['elem']:
-				append = True
-			sibling_key = HashableElement(sibling)
-			if sibling_key in candidates and candidates[sibling_key]['content_score'] >= sibling_score_threshold:
-				append = True
-
-			if sibling.name == "p":
-				link_density = self.get_link_density(sibling)
-				node_content = sibling.string or ""
-				node_length = len(node_content)
-
-				if node_length > 80 and link_density < 0.25:
-					append = True
-				elif node_length < 80 and link_density == 0 and re.search('\.( |$)', node_content):
-					append = True
-
-			if append:
-				output.append(sibling)
-
-		if not output: output.append(best_candidate)
-		return output
-
-	def select_best_candidate(self, candidates):
-		sorted_candidates = sorted(candidates.values(), key=lambda x: x['content_score'], reverse=True)
-		self.debug("Top 5 candidates:")
-		for candidate in sorted_candidates[:5]:
-			elem = candidate['elem']
-			self.debug("Candidate %s with score %s" % (describe(elem), candidate['content_score']))
-
-		if len(sorted_candidates) == 0:
-			return None
-		best_candidate = sorted_candidates[0]
-		self.debug("Best candidate %s with score %s" % (describe(best_candidate['elem']), best_candidate['content_score']))
-		return best_candidate
-
-	def get_link_density(self, elem):
-		link_length = len("".join([i.text or "" for i in elem.findAll("a")]))
-		text_length = len(_text(elem))
-		return float(link_length) / max(text_length, 1)
-
-	def score_paragraphs(self, min_text_length):
-		candidates = {}
-		elems = self.tags(self.html, "p","td")
-
-		for elem in elems:
-			parent_node = elem.parent
-			grand_parent_node = parent_node.parent
-			parent_key = HashableElement(parent_node)
-			grand_parent_key = HashableElement(grand_parent_node)
-
-			inner_text = _text(elem)
-
-			# If this paragraph is less than 25 characters, don't even count it.
-			if (not inner_text) or len(inner_text) < min_text_length:
-				continue
-
-			if parent_key not in candidates:
-				candidates[parent_key] = self.score_node(parent_node)
-			if grand_parent_node and grand_parent_key not in candidates:
-				candidates[grand_parent_key] = self.score_node(grand_parent_node)
-
-			content_score = 1
-			content_score += len(inner_text.split(','))
-			content_score += min([(len(inner_text) / 100), 3])
-
-			candidates[parent_key]['content_score'] += content_score
-			if grand_parent_node:
-				candidates[grand_parent_key]['content_score'] += content_score / 2.0
-
-		# Scale the final candidates score based on link density. Good content should have a
-		# relatively small link density (5% or less) and be mostly unaffected by this operation.
-		for elem, candidate in candidates.items():
-			candidate['content_score'] *= (1 - self.get_link_density(elem))
-			self.debug("candidate %s scored %s" % (describe(elem), candidate['content_score']))
-
-		return candidates
-
-	def class_weight(self, e):
-		weight = 0
-		if e.get('class', None):
-			if REGEXES['negativeRe'].search(e['class']):
-				weight -= 25
-
-			if REGEXES['positiveRe'].search(e['class']):
-				weight += 25
-
-		if e.get('id', None):
-			if REGEXES['negativeRe'].search(e['id']):
-				weight -= 25
-
-			if REGEXES['positiveRe'].search(e['id']):
-				weight += 25
-
-		return weight
-
-	def score_node(self, elem):
-		content_score = self.class_weight(elem)
-		name = elem.name.lower()
-		if name == "div":
-			content_score += 5
-		elif name == "blockquote":
-			content_score += 3
-		elif name == "form":
-			content_score -= 3
-		elif name == "th":
-			content_score -= 5
-		return { 'content_score': content_score, 'elem': elem }
-
-	def debug(self, *a):
-		if self.options['debug']:
-			logging.debug(*a)
-
-	def remove_unlikely_candidates(self):
-		for elem in self.html.findAll():
-			s = "%s%s" % (elem.get('class', ''), elem.get('id', ''))
-			if REGEXES['unlikelyCandidatesRe'].search(s) and (not REGEXES['okMaybeItsACandidateRe'].search(s)) and elem.name != 'body':
-				self.debug("Removing unlikely candidate - %s" % (s,))
-				elem.extract()
-
-	def transform_misused_divs_into_paragraphs(self):
-		for elem in self.html.findAll():
-			if elem.name.lower() == "div":
-				# transform <div>s that do not contain other block elements into <p>s
-				if REGEXES['divToPElementsRe'].search(''.join(map(unicode, elem.contents))):
-					self.debug("Altering div(#%s.%s) to p" % (elem.get('id', ''), elem.get('class', '')))
-					elem.name = "p"
-
-	def tags(self, node, *tag_names):
-		for tag_name in tag_names:
-			for e in node.findAll(tag_name):
-				yield e
-
-	def sanitize(self, node, candidates):
-		for header in self.tags(node, "h1", "h2", "h3", "h4", "h5", "h6"):
-			if self.class_weight(header) < 0 or self.get_link_density(header) > 0.33: header.extract()
-
-		for elem in self.tags(node, "form", "iframe"):
-			elem.extract()
-
-		# Conditionally clean <table>s, <ul>s, and <div>s
-		for el in self.tags(node, "table", "ul", "div"):
-			weight = self.class_weight(el)
-			el_key = HashableElement(el)
-			if el_key in candidates:
-				content_score = candidates[el_key]['content_score']
-			else:
-				content_score = 0
-			name = el.name
-
-			if weight + content_score < 0:
-				el.extract()
-				self.debug("Conditionally cleaned %s with weight %s and content score %s because score + content score was less than zero." %
-					(describe(el), weight, content_score))
-			elif len(_text(el).split(",")) < 10:
-				counts = {}
-				for kind in ['p', 'img', 'li', 'a', 'embed', 'input']:
-					counts[kind] = len(el.findAll(kind))
-				counts["li"] -= 100
-
-				content_length = len(_text(el)) # Count the text length excluding any surrounding whitespace
-				link_density = self.get_link_density(el)
-				to_remove = False
-				reason = ""
-
-				if counts["img"] > counts["p"]:
-					reason = "too many images"
-					to_remove = True
-				elif counts["li"] > counts["p"] and name != "ul" and name != "ol":
-					reason = "more <li>s than <p>s"
-					to_remove = True
-				elif counts["input"] > (counts["p"] / 3):
-					reason = "less than 3x <p>s than <input>s"
-					to_remove = True
-				elif content_length < (self.options.get('min_text_length', self.TEXT_LENGTH_THRESHOLD)) and (counts["img"] == 0 or counts["img"] > 2):
-					reason = "too short a content length without a single image"
-					to_remove = True
-				elif weight < 25 and link_density > 0.2:
-					reason = "too many links for its weight (#{weight})"
-					to_remove = True
-				elif weight >= 25 and link_density > 0.5:
-					reason = "too many links for its weight (#{weight})"
-					to_remove = True
-				elif (counts["embed"] == 1 and content_length < 75) or counts["embed"] > 1:
-					reason = "<embed>s with too short a content length, or too many <embed>s"
-					to_remove = True
-
-				if to_remove:
-					self.debug("Conditionally cleaned %s#%s.%s with weight %s and content score %s because it has %s." %
-						(el.name, el.get('id',''), el.get('class', ''), weight, content_score, reason))
-					el.extract()
-
-		for el in ([node] + node.findAll()):
-			if not (self.options['attributes']):
-				el.attrMap = {}
-
-		return unicode(node)
-
-class HashableElement():
-	def __init__(self, node):
-		self.node = node
-		self._path = None
-
-	def _get_path(self):
-		if self._path is None:
-			reverse_path = []
-			node = self.node
-			while node:
-				node_id = (node.name, tuple(node.attrs), node.string)
-				reverse_path.append(node_id)
-				node = node.parent
-			self._path = tuple(reverse_path)
-		return self._path
-	path = property(_get_path)
-
-	def __hash__(self):
-		return hash(self.path)
-
-	def __eq__(self, other):
-		return self.path == other.path
-
-	def __getattr__(self, name):
-		return getattr(self.node, name)
+    """Class to build a etree document out of html."""
+
+    def __init__(
+        self,
+        input,
+        positive_keywords=None,
+        negative_keywords=None,
+        url=None,
+        min_text_length=25,
+        retry_length=250,
+        xpath=False,
+        handle_failures="discard",
+    ):
+        """Generate the document
+
+        :param input: string of the html content.
+        :param positive_keywords: regex, list or comma-separated string of patterns in classes and ids
+        :param negative_keywords: regex, list or comma-separated string in classes and ids
+        :param min_text_length: Tunable. Set to a higher value for more precise detection of longer texts.
+        :param retry_length: Tunable. Set to a lower value for better detection of very small texts.
+        :param xpath: If set to True, adds x="..." attribute to each HTML node,
+        containing xpath path pointing to original document path (allows to
+        reconstruct selected summary in original document).
+        :param handle_failures: Parameter passed to `lxml` for handling failure during exception.
+        Support options = ["discard", "ignore", None]
+
+        Examples:
+            positive_keywords=["news-item", "block"]
+            positive_keywords=["news-item, block"]
+            positive_keywords=re.compile("news|block")
+            negative_keywords=["mysidebar", "related", "ads"]
+
+        The Document class is not re-enterable.
+        It is designed to create a new Document() for each HTML file to process it.
+
+        API methods:
+        .title() -- full title
+        .short_title() -- cleaned up title
+        .content() -- full content
+        .summary() -- cleaned up content
+        """
+        self.input = input
+        self.html = None
+        self.encoding = None
+        self.positive_keywords = compile_pattern(positive_keywords)
+        self.negative_keywords = compile_pattern(negative_keywords)
+        self.url = url
+        self.min_text_length = min_text_length
+        self.retry_length = retry_length
+        self.xpath = xpath
+        self.handle_failures = handle_failures
+
+    def _html(self, force=False):
+        if force or self.html is None:
+            self.html = self._parse(self.input)
+            if self.xpath:
+                root = self.html.getroottree()
+                for i in self.html.getiterator():
+                    # print root.getpath(i)
+                    i.attrib["x"] = root.getpath(i)
+        return self.html
+
+    def _parse(self, input):
+        if isinstance(input, (_ElementTree, HtmlElement)):
+            doc = input
+            self.encoding = 'utf-8'
+        else:
+            doc, self.encoding = build_doc(input)
+        doc = html_cleaner.clean_html(doc)
+        base_href = self.url
+        if base_href:
+            # trying to guard against bad links like <a href="http://[http://...">
+            try:
+                # such support is added in lxml 3.3.0
+                doc.make_links_absolute(
+                    base_href,
+                    resolve_base_href=True,
+                    handle_failures=self.handle_failures,
+                )
+            except TypeError:  # make_links_absolute() got an unexpected keyword argument 'handle_failures'
+                # then we have lxml < 3.3.0
+                # please upgrade to lxml >= 3.3.0 if you're failing here!
+                doc.make_links_absolute(
+                    base_href,
+                    resolve_base_href=True,
+                    handle_failures=self.handle_failures,
+                )
+        else:
+            doc.resolve_base_href(handle_failures=self.handle_failures)
+        return doc
+
+    def content(self):
+        """Returns document body"""
+        return get_body(self._html(True))
+
+    def title(self):
+        """Returns document title"""
+        return get_title(self._html(True))
+
+    def author(self):
+        """Returns document author"""
+        return get_author(self._html(True))
+
+    def short_title(self):
+        """Returns cleaned up document title"""
+        return shorten_title(self._html(True))
+
+    def get_clean_html(self):
+        """
+        An internal method, which can be overridden in subclasses, for example,
+        to disable or to improve DOM-to-text conversion in .summary() method
+        """
+        return clean_attributes(tounicode(self.html, method="html"))
+
+    def summary(self, html_partial=False, keep_all_images=False):
+        """
+        Given a HTML file, extracts the text of the article.
+
+        :param html_partial: return only the div of the document, don't wrap
+                             in html and body tags.
+        :param keep_all_images: Keep all images in summary.
+
+        Warning: It mutates internal DOM representation of the HTML document,
+        so it is better to call other API methods before this one.
+        """
+        try:
+            ruthless = True
+            while True:
+                self._html(True)
+                for i in self.tags(self.html, "script", "style"):
+                    i.drop_tree()
+                for i in self.tags(self.html, "body"):
+                    i.set("id", "readabilityBody")
+                if ruthless:
+                    self.remove_unlikely_candidates()
+                self.transform_misused_divs_into_paragraphs()
+                candidates = self.score_paragraphs()
+
+                best_candidate = self.select_best_candidate(candidates)
+
+                if best_candidate:
+                    article = self.get_article(
+                        candidates, best_candidate, html_partial=html_partial
+                    )
+                else:
+                    if ruthless:
+                        log.info("ruthless removal did not work. ")
+                        ruthless = False
+                        log.debug(
+                                "ended up stripping too much - "
+                                "going for a safer _parse"
+                        )
+                        # try again
+                        continue
+                    else:
+                        log.debug(
+                                "Ruthless and lenient parsing did not work. "
+                                "Returning raw html"
+                        )
+                        article = self.html.find("body")
+                        if article is None:
+                            article = self.html
+                cleaned_article = self.sanitize(article, candidates, keep_all_images)
+
+                article_length = len(cleaned_article or "")
+                retry_length = self.retry_length
+                of_acceptable_length = article_length >= retry_length
+                if ruthless and not of_acceptable_length:
+                    ruthless = False
+                    # Loop through and try again.
+                    continue
+                else:
+                    return cleaned_article
+        except Exception as e:
+            log.exception("error getting summary: ")
+            raise Unparseable(str(e)).with_traceback(sys.exc_info()[2])
+
+    def get_article(self, candidates, best_candidate, html_partial=False):
+        # Now that we have the top candidate, look through its siblings for
+        # content that might also be related.
+        # Things like preambles, content split by ads that we removed, etc.
+        sibling_score_threshold = max([10, best_candidate["content_score"] * 0.2])
+        # create a new html document with a html->body->div
+        if html_partial:
+            output = fragment_fromstring("<div/>")
+        else:
+            output = document_fromstring("<div/>")
+        best_elem = best_candidate["elem"]
+        parent = best_elem.getparent()
+        siblings = parent.getchildren() if parent is not None else [best_elem]
+        for sibling in siblings:
+            # in lxml there no concept of simple text
+            # if isinstance(sibling, NavigableString): continue
+            append = False
+            if sibling is best_elem:
+                append = True
+            sibling_key = sibling  # HashableElement(sibling)
+            if (
+                sibling_key in candidates
+                and candidates[sibling_key]["content_score"] >= sibling_score_threshold
+            ):
+                append = True
+
+            if sibling.tag == "p":
+                link_density = self.get_link_density(sibling)
+                node_content = sibling.text or ""
+                node_length = len(node_content)
+
+                if node_length > 80 and link_density < 0.25:
+                    append = True
+                elif (
+                    node_length <= 80
+                    and link_density == 0
+                    and re.search(r"\.( |$)", node_content)
+                ):
+                    append = True
+
+            if append:
+                # We don't want to append directly to output, but the div
+                # in html->body->div
+                if html_partial:
+                    output.append(sibling)
+                else:
+                    output.getchildren()[0].getchildren()[0].append(sibling)
+        # if output is not None:
+        #    output.append(best_elem)
+        return output
+
+    def select_best_candidate(self, candidates):
+        if not candidates:
+            return None
+
+        sorted_candidates = sorted(
+            candidates.values(), key=lambda x: x["content_score"], reverse=True
+        )
+        for candidate in sorted_candidates[:5]:
+            elem = candidate["elem"]
+            log.debug("Top 5 : {:6.3f} {}".format(candidate["content_score"], describe(elem)))
+
+        best_candidate = sorted_candidates[0]
+        return best_candidate
+
+    def get_link_density(self, elem):
+        link_length = 0
+        for i in elem.findall(".//a"):
+            link_length += text_length(i)
+        # if len(elem.findall(".//div") or elem.findall(".//p")):
+        #    link_length = link_length
+        total_length = text_length(elem)
+        return float(link_length) / max(total_length, 1)
+
+    def score_paragraphs(self):
+        MIN_LEN = self.min_text_length
+        candidates = {}
+        ordered = []
+        for elem in self.tags(self._html(), "p", "pre", "td"):
+            parent_node = elem.getparent()
+            if parent_node is None:
+                continue
+            grand_parent_node = parent_node.getparent()
+
+            inner_text = clean(elem.text_content() or "")
+            inner_text_len = len(inner_text)
+
+            # If this paragraph is less than 25 characters
+            # don't even count it.
+            if inner_text_len < MIN_LEN:
+                continue
+
+            if parent_node not in candidates:
+                candidates[parent_node] = self.score_node(parent_node)
+                ordered.append(parent_node)
+
+            if grand_parent_node is not None and grand_parent_node not in candidates:
+                candidates[grand_parent_node] = self.score_node(grand_parent_node)
+                ordered.append(grand_parent_node)
+
+            content_score = 1
+            content_score += len(inner_text.split(","))
+            content_score += min((inner_text_len / 100), 3)
+            # if elem not in candidates:
+            #    candidates[elem] = self.score_node(elem)
+
+            # WTF? candidates[elem]['content_score'] += content_score
+            candidates[parent_node]["content_score"] += content_score
+            if grand_parent_node is not None:
+                candidates[grand_parent_node]["content_score"] += content_score / 2.0
+
+        # Scale the final candidates score based on link density. Good content
+        # should have a relatively small link density (5% or less) and be
+        # mostly unaffected by this operation.
+        for elem in ordered:
+            candidate = candidates[elem]
+            ld = self.get_link_density(elem)
+            score = candidate["content_score"]
+            log.debug(
+                "Branch %6.3f %s link density %.3f -> %6.3f"
+                % (score, describe(elem), ld, score * (1 - ld))
+            )
+            candidate["content_score"] *= 1 - ld
+
+        return candidates
+
+    def class_weight(self, e):
+        weight = 0
+        for feature in [e.get("class", None), e.get("id", None)]:
+            if feature:
+                if REGEXES["negativeRe"].search(feature):
+                    weight -= 25
+
+                if REGEXES["positiveRe"].search(feature):
+                    weight += 25
+
+                if self.positive_keywords and self.positive_keywords.search(feature):
+                    weight += 25
+
+                if self.negative_keywords and self.negative_keywords.search(feature):
+                    weight -= 25
+
+        if self.positive_keywords and self.positive_keywords.match("tag-" + e.tag):
+            weight += 25
+
+        if self.negative_keywords and self.negative_keywords.match("tag-" + e.tag):
+            weight -= 25
+
+        return weight
+
+    def score_node(self, elem):
+        content_score = self.class_weight(elem)
+        name = elem.tag.lower()
+        if name in ["div", "article"]:
+            content_score += 5
+        elif name in ["pre", "td", "blockquote"]:
+            content_score += 3
+        elif name in ["address", "ol", "ul", "dl", "dd", "dt", "li", "form", "aside"]:
+            content_score -= 3
+        elif name in [
+            "h1",
+            "h2",
+            "h3",
+            "h4",
+            "h5",
+            "h6",
+            "th",
+            "header",
+            "footer",
+            "nav",
+        ]:
+            content_score -= 5
+        return {"content_score": content_score, "elem": elem}
+
+    def remove_unlikely_candidates(self):
+        for elem in self.html.findall(".//*"):
+            s = "{} {}".format(elem.get("class", ""), elem.get("id", ""))
+            if len(s) < 2:
+                continue
+            if (
+                REGEXES["unlikelyCandidatesRe"].search(s)
+                and (not REGEXES["okMaybeItsACandidateRe"].search(s))
+                and elem.tag not in ["html", "body"]
+            ):
+                log.debug("Removing unlikely candidate - %s" % describe(elem))
+                elem.drop_tree()
+
+    def transform_misused_divs_into_paragraphs(self):
+        for elem in self.tags(self.html, "div"):
+            # transform <div>s that do not contain other block elements into
+            # <p>s
+            # FIXME: The current implementation ignores all descendants that
+            # are not direct children of elem
+            # This results in incorrect results in case there is an <img>
+            # buried within an <a> for example
+            if not REGEXES["divToPElementsRe"].search(
+                str(b"".join(tostring(s, encoding='utf-8') for s in elem))
+                # str(b"".join(map(tostring_, list(elem))))
+            ):
+                # log.debug("Altering %s to p" % (describe(elem)))
+                elem.tag = "p"
+                # print "Fixed element "+describe(elem)
+
+        for elem in self.tags(self.html, "div"):
+            if elem.text and elem.text.strip():
+                p = fragment_fromstring("<p/>")
+                p.text = elem.text
+                elem.text = None
+                elem.insert(0, p)
+                # print "Appended "+tounicode(p)+" to "+describe(elem)
+
+            for pos, child in reversed(list(enumerate(elem))):
+                if child.tail and child.tail.strip():
+                    p = fragment_fromstring("<p/>")
+                    p.text = child.tail
+                    child.tail = None
+                    elem.insert(pos + 1, p)
+                    # print "Inserted "+tounicode(p)+" to "+describe(elem)
+                if child.tag == "br":
+                    # print 'Dropped <br> at '+describe(elem)
+                    child.drop_tree()
+
+    def tags(self, node, *tag_names):
+        for tag_name in tag_names:
+            yield from node.findall(".//%s" % tag_name)
+
+    def reverse_tags(self, node, *tag_names):
+        for tag_name in tag_names:
+            yield from reversed(node.findall(".//%s" % tag_name))
+
+    def sanitize(self, node, candidates, keep_all_images=False):
+        MIN_LEN = self.min_text_length
+        for header in self.tags(node, "h1", "h2", "h3", "h4", "h5", "h6"):
+            if self.class_weight(header) < 0 or self.get_link_density(header) > 0.33:
+                header.drop_tree()
+
+        for elem in self.tags(node, "form", "textarea"):
+            elem.drop_tree()
+
+        for elem in self.tags(node, "iframe"):
+            if "src" in elem.attrib and REGEXES["videoRe"].search(elem.attrib["src"]):
+                elem.text = "VIDEO"  # ADD content to iframe text node to force <iframe></iframe> proper output
+            else:
+                elem.drop_tree()
+
+        allowed = {}
+        # Conditionally clean <table>s, <ul>s, and <div>s
+        for el in self.reverse_tags(
+            node, "table", "ul", "div", "aside", "header", "footer", "section"
+        ):
+            if el in allowed:
+                continue
+            weight = self.class_weight(el)
+            if el in candidates:
+                content_score = candidates[el]["content_score"]
+                # print '!',el, '-> %6.3f' % content_score
+            else:
+                content_score = 0
+            tag = el.tag
+
+            if weight + content_score < 0:
+                log.debug(
+                    "Removed %s with score %6.3f and weight %-3s"
+                    % (describe(el), content_score, weight,)
+                )
+                el.drop_tree()
+            elif el.text_content().count(",") < 10:
+                counts = {}
+                for kind in ["p", "img", "li", "a", "embed", "input"]:
+                    counts[kind] = len(el.findall(".//%s" % kind))
+                counts["li"] -= 100
+                counts["input"] -= len(el.findall('.//input[@type="hidden"]'))
+
+                # Count the text length excluding any surrounding whitespace
+                content_length = text_length(el)
+                link_density = self.get_link_density(el)
+                parent_node = el.getparent()
+                if parent_node is not None:
+                    if parent_node in candidates:
+                        content_score = candidates[parent_node]["content_score"]
+                    else:
+                        content_score = 0
+                # if parent_node is not None:
+                # pweight = self.class_weight(parent_node) + content_score
+                # pname = describe(parent_node)
+                # else:
+                # pweight = 0
+                # pname = "no parent"
+                to_remove = False
+                reason = ""
+
+                if keep_all_images and el.tag == 'div' and counts["img"] >= 1:
+                    continue
+                if counts["p"] and counts["img"] > 1 + counts["p"] * 1.3:
+                    reason = "too many images (%s)" % counts["img"]
+                    to_remove = True
+                elif counts["li"] > counts["p"] and tag not in ("ol", "ul"):
+                    reason = "more <li>s than <p>s"
+                    to_remove = True
+                elif counts["input"] > (counts["p"] / 3):
+                    reason = "less than 3x <p>s than <input>s"
+                    to_remove = True
+                elif content_length < MIN_LEN and counts["img"] == 0:
+                    reason = (
+                        "too short content length %s without a single image"
+                        % content_length
+                    )
+                    to_remove = True
+                elif content_length < MIN_LEN and counts["img"] > 2:
+                    reason = (
+                        "too short content length %s and too many images"
+                        % content_length
+                    )
+                    to_remove = True
+                elif weight < 25 and link_density > 0.2:
+                    reason = "too many links {:.3f} for its weight {}".format(
+                        link_density,
+                        weight,
+                    )
+                    to_remove = True
+                elif weight >= 25 and link_density > 0.5:
+                    reason = "too many links {:.3f} for its weight {}".format(
+                        link_density,
+                        weight,
+                    )
+                    to_remove = True
+                elif (counts["embed"] == 1 and content_length < 75) or counts[
+                    "embed"
+                ] > 1:
+                    reason = (
+                        "<embed>s with too short content length, or too many <embed>s"
+                    )
+                    to_remove = True
+                elif not content_length:
+                    reason = "no content"
+                    to_remove = True
+                    #                if el.tag == 'div' and counts['img'] >= 1 and to_remove:
+                    #                    imgs = el.findall('.//img')
+                    #                    valid_img = False
+                    #                    log.debug(tounicode(el))
+                    #                    for img in imgs:
+                    #
+                    #                        height = img.get('height')
+                    #                        text_length = img.get('text_length')
+                    #                        log.debug ("height %s text_length %s" %(repr(height), repr(text_length)))
+                    #                        if to_int(height) >= 100 or to_int(text_length) >= 100:
+                    #                            valid_img = True
+                    #                            log.debug("valid image" + tounicode(img))
+                    #                            break
+                    #                    if valid_img:
+                    #                        to_remove = False
+                    #                        log.debug("Allowing %s" %el.text_content())
+                    #                        for desnode in self.tags(el, "table", "ul", "div"):
+                    #                            allowed[desnode] = True
+
+                    # find x non empty preceding and succeeding siblings
+                    i, j = 0, 0
+                    x = 1
+                    siblings = []
+                    for sib in el.itersiblings():
+                        # log.debug(sib.text_content())
+                        sib_content_length = text_length(sib)
+                        if sib_content_length:
+                            i = +1
+                            siblings.append(sib_content_length)
+                            if i == x:
+                                break
+                    for sib in el.itersiblings(preceding=True):
+                        # log.debug(sib.text_content())
+                        sib_content_length = text_length(sib)
+                        if sib_content_length:
+                            j = +1
+                            siblings.append(sib_content_length)
+                            if j == x:
+                                break
+                    # log.debug(str_(siblings))
+                    if siblings and sum(siblings) > 1000:
+                        to_remove = False
+                        log.debug("Allowing %s" % describe(el))
+                        for desnode in self.tags(el, "table", "ul", "div", "section"):
+                            allowed[desnode] = True
+
+                if to_remove:
+                    log.debug(
+                        "Removed %6.3f %s with weight %s cause it has %s."
+                        % (content_score, describe(el), weight, reason)
+                    )
+                    # print tounicode(el)
+                    # log.debug("pname %s pweight %.3f" %(pname, pweight))
+                    el.drop_tree()
+                else:
+                    log.debug(
+                        "Not removing %s of length %s: %s"
+                        % (describe(el), content_length, text_content(el))
+                    )
+
+        self.html = node
+        return self.get_clean_html()
+
 
 def main():
-	import sys
-	from optparse import OptionParser
-	parser = OptionParser(usage="%prog: [options] [file]")
-	parser.add_option('-v', '--verbose', action='http://github.com/store_true')
-	parser.add_option('-u', '--url', help="use URL instead of a local file")
-	(options, args) = parser.parse_args()
-	
-	if not (len(args) == 1 or options.url):
-		parser.print_help()
-		sys.exit(1)
-	logging.basicConfig(level=logging.DEBUG)
-
-	file = None
-	if options.url:
-		import urllib
-		file = urllib.urlopen(options.url)
-	else:
-		file = open(args[0])
-	try:
-		print Document(file.read(), debug=options.verbose).summary().encode('ascii','ignore')
-	finally:
-		file.close()
-
-if __name__ == '__main__':
-	main()
+    VERBOSITY = {1: logging.WARNING, 2: logging.INFO, 3: logging.DEBUG}
+
+    from optparse import OptionParser
+
+    parser = OptionParser(usage="%prog: [options] [file]")
+    parser.add_option("-v", "--verbose", action="http://github.com/count", default=0)
+    parser.add_option(
+        "-b", "--browser", default=None, action="http://github.com/store_true", help="open in browser"
+    )
+    parser.add_option(
+        "-l", "--log", default=None, help="save logs into file (appended)"
+    )
+    parser.add_option(
+        "-u", "--url", default=None, help="use URL instead of a local file"
+    )
+    parser.add_option("-x", "--xpath", default=None, help="add original xpath")
+    parser.add_option(
+        "-p",
+        "--positive-keywords",
+        default=None,
+        help="positive keywords (comma-separated)",
+        action="http://github.com/store",
+    )
+    parser.add_option(
+        "-n",
+        "--negative-keywords",
+        default=None,
+        help="negative keywords (comma-separated)",
+        action="http://github.com/store",
+    )
+    (options, args) = parser.parse_args()
+
+    if options.verbose:
+        logging.basicConfig(
+            level=VERBOSITY[options.verbose],
+            filename=options.log,
+            format="%(asctime)s: %(levelname)s: %(message)s (at %(filename)s: %(lineno)d)",
+        )
+
+    if not (len(args) == 1 or options.url):
+        parser.print_help()
+        sys.exit(1)
+
+    file = None
+    if options.url:
+        headers = {"User-Agent": "Mozilla/5.0"}
+        request = urllib.request.Request(options.url, None, headers)
+        file = urllib.request.urlopen(request)
+    else:
+        file = open(args[0])
+    try:
+        doc = Document(
+            file.read(),
+            url=options.url,
+            positive_keywords=options.positive_keywords,
+            negative_keywords=options.negative_keywords,
+        )
+        if options.browser:
+            from .browser import open_in_browser
+
+            result = "<h2>" + doc.short_title() + "</h2><br/>" + doc.summary()
+            open_in_browser(result)
+        else:
+            result = "Title:" + doc.short_title() + "\n" + doc.summary()
+            print(result)
+    finally:
+        file.close()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/readability/url_helpers.py b/readability/url_helpers.py
deleted file mode 100644
index 8234c8dd..00000000
--- a/readability/url_helpers.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import logging
-from urlparse import urlparse
-
-def host_for_http://github.com/url(url):
-	"""
-	>>> host_for_url('http://base/whatever/fdsh')
-	'base'
-	>>> host_for_url('http://github.com/invalid')
-	"""
-	host = urlparse(url)[1]
-	if not host:
-		logging.error("could not extract host from URL: %r" % (url,))
-		return None
-	return host
-
-def absolute_url(http://github.com/url,%20base_href):
-	"""
-	>>> absolute_url('http://github.com/foo',%20'http://base/whatever/ooo/fdsh')
-	'http://base/whatever/ooo/foo'
-
-	>>> absolute_url('http://github.com/foo/bar/',%20'http://base/')
-	'http://base/foo/bar/'
-
-	>>> absolute_url('http://github.com/foo/bar',%20'http://base/whatever/fdskf')
-	'http://base/foo/bar'
-
-	>>> absolute_url('http://n/foo/bar',%20'http://base/whatever/fdskf')
-	'http://base/foo/bar'
-
-	>>> absolute_url('http://localhost/foo',%20'http://base/whatever/fdskf')
-	'http://localhost/foo'
-	"""
-	url = url.strip()
-	proto = urlparse(url)[0]
-	if proto:
-		return url
-
-	base_url_parts = urlparse(base_href)
-	base_server = '://'.join(base_url_parts[:2])
-	if url.startswith('/'):
-		return base_server + url
-	else:
-		path = base_url_parts[2]
-		if '/' in path:
-			path = path.rsplit('/', 1)[0] + '/'
-		else:
-			path = '/'
-		return base_server + path + url
-
-if __name__ == '__main__':
-	import doctest
-	doctest.testmod()
\ No newline at end of file
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 00000000..996bbfc0
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,3 @@
+nose
+twine
+flake8
\ No newline at end of file
diff --git a/setup.py b/setup.py
index a0274492..a88e8185 100755
--- a/setup.py
+++ b/setup.py
@@ -1,22 +1,70 @@
 #!/usr/bin/env python
-from distutils.core import setup
+
+import codecs
+import os
+import re
+from setuptools import setup
+
+speed_deps = [
+     "cchardet",
+]
+
+extras = {
+    'speed': speed_deps,
+}
+
+# Adapted from https://github.com/pypa/pip/blob/master/setup.py
+def find_version(*file_paths):
+    here = os.path.abspath(os.path.dirname(__file__))
+
+    # Intentionally *not* adding an encoding option to open, See:
+    #   https://github.com/pypa/virtualenv/issues/201#issuecomment-3145690
+    with codecs.open(os.path.join(here, *file_paths), "r") as fp:
+        version_file = fp.read()
+        version_match = re.search(
+            r"^__version__ = ['\"]([^'\"]*)['\"]", version_file, re.M,
+        )
+        if version_match:
+            return version_match.group(1)
+
+    raise RuntimeError("Unable to find version string.")
 
 
 setup(
-    name="python-readability",
-    author="Tim Cuthbertson",
-    author_email="tim3d.junk+github@gmail.com",
-    description="python port of arc90's readability bookmarklet",
-    long_description=open("README").read(),
+    name="readability-lxml",
+    version=find_version("readability", "__init__.py"),
+    author="Yuri Baburov",
+    author_email="burchik@gmail.com",
+    description="fast html to text parser (article readability tool) with python 3 support",
+    test_suite="tests.test_article_only",
+    long_description=open("README.md").read(),
+    long_description_content_type="text/markdown",
     license="Apache License 2.0",
-    url="http://github.com/gfxmonk/python-readability",
-    packages=[
-        "readability",
+    url="http://github.com/buriy/python-readability",
+    packages=["readability"],
+    install_requires=[
+        "chardet",
+        "lxml[html_clean]",
+        "lxml-html-clean; python_version < '3.11'",
+        "cssselect"
     ],
+    extras_require=extras,
     classifiers=[
         "Environment :: Web Environment",
         "Intended Audience :: Developers",
         "Operating System :: OS Independent",
+        "Topic :: Text Processing :: Indexing",
+        "Topic :: Utilities",
+        "Topic :: Internet",
+        "Topic :: Software Development :: Libraries :: Python Modules",
         "Programming Language :: Python",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: 3.12",
+        "Programming Language :: Python :: 3.13",
+        "Programming Language :: Python :: Implementation :: PyPy",
     ],
 )
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/samples/si-game.sample.html b/tests/samples/si-game.sample.html
new file mode 100644
index 00000000..fab4f4fe
--- /dev/null
+++ b/tests/samples/si-game.sample.html
@@ -0,0 +1,762 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+   <html>
+   <head>
+   <meta http-equiv="content-type" content="text/html; charset=iso-8859-1">
+   <a href="http://github.com/baseball/mlb/teams/tigers/">
+      <title>Detroit Tigers vs. Kansas City Royals - Preview - April 16, 2012</title></a><meta name="description" content="Tigers-Royals preview for game played on April 16, 2012">
+   <meta name="keywords" content="Detroit Tigers, Kansas City Royals, preview, mlb, baseball, si.com">
+   <script type="text/javascript">
+   var SPORTID = "MLB";
+   var PATH = "/baseball/mlb/scoreboards/2012/04/16/";
+   var FEEDNAME = "scoreboard.dat";
+   isViewcast = true;
+   var searchString = document.location.href;
+   </script>
+   <link rel="stylesheet" type="text/css" href="http://i.cdn.turner.com/si/.e/css/pkg/global_41/129.css"/>
+<script type="text/javascript" language="JavaScript" src="http://i.cdn.turner.com/si/.e/js/4.1/global/lib/jquery-1.5.2.min.js"></script>
+<script language="JavaScript" type="text/javascript" src="http://i.cdn.turner.com/si/.e/js/pkg/global/593.js"></script>
+<script src="http://img.timeinc.net/shared/static/js/tii_ads.js"></script><script>var adConfig=new TiiAdConfig('3475.si2');adConfig.setRevSciTracking(true);</script>
+
+<!--[if IE 9]>
+<link rel="stylesheet" type="text/css" href="http://i.cdn.turner.com/si/.e/css/4.1/ie9.css" />
+<![endif]-->
+<link rel="stylesheet" type="text/css" href="http://i.cdn.turner.com/si/.element/css/4.1/gameflash.css"/>
+<link rel="stylesheet" type="text/css" href="http://i.cdn.turner.com/si/.element/css/4.1/miniscores.css"/>
+<script language="javascript" type="text/javascript">
+	function hidediv() {
+			if (document.getElementById) { // DOM3 = IE5, NS6
+					document.getElementById('cnngCommentsBox').className = 'cnngCommentsBoxOff';
+			}
+			else {
+					if (document.layers) { // Netscape 4
+							document.cnngCommentsBox.className = 'cnngCommentsBoxOff';
+					}
+					else { // IE 4
+							document.all.cnngCommentsBox.className = 'cnngCommentsBoxOff';
+					}
+			}
+	}
+	function showdiv() {
+			if (document.getElementById) { // DOM3 = IE5, NS6
+					document.getElementById('cnngCommentsBox').className = 'cnngCommentsBox';
+			}
+			else {
+					if (document.layers) { // Netscape 4
+							document.cnngCommentsBox.className = 'cnngCommentsBox';
+					}
+					else { // IE 4
+							document.all.cnngCommentsBox.className = 'cnngCommentsBox';
+					}
+			}
+	}
+function siVideoBegin(cvpInstance, videoId) {  }
+function siVideoPlay(cvpInstance, videoId) {
+	var cvpData = cvpInstance.getContentEntry(videoId);
+	var cvpObject = window.JSON.parse(cvpData);
+	jQuery('#cnnCVPRecapDetails').show();
+	jQuery('#cvpHeadline').html(cvpObject.headline);
+	jQuery('#cvpDescription').html(cvpObject.description);
+	jQuery('#cvpSource').html(cvpObject.source);
+}
+
+function siVideoPlayHead(cvpInstance, playheadTime, totalDuration) { }
+
+function siVideoAdStarted(cvpInstance, videoId) { }
+
+function siVideoTrackingAdCountdown(seconds) { }
+
+function siVideoComplete(cvpInstance, videoId) { }
+
+function siVideoPause(cvpInstance, videoId, paused) { }
+
+function siVideoSeek() { }
+</script>
+<script language="JavaScript" src="http://github.com/.element/js/4.1/ads/sasd_ads.js"></script>
+<script src="http://i.cdn.turner.com/si/.element/js/4.1/global/lib/iframe_ad_factory.js"></script><script>iframeAdFactory.url = '/si_adspaces/4.0/iframe.html';
+window.setInterval(function(){ iframeAdFactory.refresh() }, 45000);</script>
+
+
+<script type="text/javascript">
+var adFactory = new TiiAdFactory(adConfig, "mlb/gameflashpage");
+iframeAdFactory.queryString = 'TiiAdConfig=3475.si2&adConfigPairs=' + '&TiiAdFactory=' + encodeURIComponent('mlb/gameflashpage') + '&adFactoryPairs=' + '&paramPairs=' + encodeURIComponent('sport=mlb');
+if (TiiAdsIsDebugMode()) { iframeAdFactory.queryString += '&debugads=y'; }
+</script>
+<link rel="stylesheet" type="text/css" href="http://z.cdn.turner.com/si/.element/css/4.1/gameflash_mlb.css"/>
+<script type="text/javascript" src="http://z.cdn.turner.com/si/.element/js/4.1/global/lib/jquery-1.4.2.min.js"></script>
+
+<link rel="stylesheet" type="text/css" href="http://z.cdn.turner.com/si/.element/ssi/scoreboards/4.2/css/scoreticker-master.css"/>
+<script type="text/javascript" src="http://z.cdn.turner.com/si/.element/ssi/gameflash/4.2/football/nfl/js/jquery.jsonp-2.1.4.min.js"></script>
+<script type="text/javascript" src="http://z.cdn.turner.com/si/.element/ssi/scoreboards/4.2/js/scoreticker-master.js"></script>
+<script type="text/javascript" src="http://z.cdn.turner.com/si/.element/ssi/scoreboards/4.2/js/scoreticker-mlb.js"></script>
+
+
+
+
+
+   </head>
+   <body>
+   <!--[if IE 6]><div class="ie"><div class="ie6"><![endif]--><!--[if IE 7]><div class="ie"><div class="ie7"><![endif]--><!--[if
+   IE 8]><div class="ie"><div class="ie8"><![endif]-->
+   <div class="cnnPage">
+   		
+   <!-- start contentHeader-->
+   <style>
+DIV.cnnSearch { padding:5px 0; }
+DIV.cnnSearch DIV.cnnRight { padding:4px 0; }
+DIV.cnnSearch DIV.cnnLeft { margin:0;padding:0; }
+DIV.cnnSearch DIV.cnnLeft LI { float:left;margin:0;padding:0 5px 0 0; }
+DIV.cnnSearch DIV.cnnLeft LI A { display:block;margin:0;padding:0; }
+DIV.cnnSearch DIV.cnnLeft LI IMG { vertical-align:bottom; }
+DIV.cnnSearch DIV.cnnLeft LI DL { margin:0;padding:0;position:relative;z-index:999999; }
+DIV.cnnSearch DIV.cnnLeft LI DT { margin:0;padding:0; }
+DIV.cnnSearch DIV.cnnLeft LI DD { left:-999em;margin:0;padding:0 3px 0 1px;position:absolute;top:23px; }
+DIV.cnnSearch DIV.cnnLeft LI DL.cnnOver DD,
+DIV.cnnSearch DIV.cnnLeft LI DL:hover DD { left:auto; }
+
+DIV.cnnBanner { height:auto; }
+DIV.cnnBannerSection DIV.cnnLeft { width:auto; }
+DIV.cnnBannerSection DIV.cnnLeft A { display:inline;height:auto;width:auto; }
+
+DIV.cnnBanner { background:transparent url('http://i.cdn.turner.com/si/.element/img/4.1/sect/global/topper.gif') no-repeat top right;position:relative;text-align:left;width:1000px; }
+.ie6 DIV.cnnBanner { width:1000px; }
+DIV.cnnBanner DIV IMG { display:block; }
+DIV.cnnBannerSection { height:99px;position:absolute;left:243px;top:0px;width:757px; }
+DIV.cnnBannerSection TD.col0 { display:none; }
+DIV.cnnBannerSection DIV.cnn_border { display:none; }
+DIV.cnnBannerSection IMG { display:inline;float:left; }
+DIV.cnnBannerSection DIV.cnnLeft { float:left; }
+DIV.cnnBannerSection DIV.cnnLeft IMG { float:none; }
+DIV.cnnBannerSection DIV.cnnRight { float:right;margin:8px 6px 0 0; }
+DIV.cnnBannerSection DIV.cnn_header { color:#000;font:bold 50px georgia;line-height:58px;padding:6px 10px 0 0; }
+DIV.cnnBannerSection DIV.cnn_header SPAN { font-size:10px;color:#ccc; }
+DIV.cnnBannerSection DIV.cnn_header A { color:#000; }
+DIV.cnnBannerSection DIV.cnn_header UL { color:#ccc;float:right;font-size:10px;line-height:12px;margin-top:36px; }
+.ie DIV.cnnBannerSection DIV.cnn_header UL { margin-top:-21px; }
+DIV.cnnBannerSection DIV.cnn_header UL LI { border-left:1px solid #ccc;float:left;padding:0 4px; }
+DIV.cnnBannerSection DIV.cnn_header UL LI#cnnItem0 { border:0; }
+DIV.cnnBannerSection DIV.cnn_header UL LI#cnnItem2 DIV.cnn_more { font:normal 9px arial; }
+DIV.cnnBannerSection DIV.cnn_header UL LI#cnnItem2 DIV.cnn_more A { font:normal 9px arial; }
+DIV.cnnBannerSection DIV.cnn_header UL LI DIV.cnn_rollover { background-image:url('http://i.cdn.turner.com/si/.e1d/img/4.0/global/pixels/blank_pixel.gif');display:none;padding:10px 0 9px 0;left:103px;position:absolute;width:654px; }
+.ie DIV.cnnBannerSection DIV.cnn_header UL LI DIV.cnn_rollover { top:55px; }
+DIV.cnnBannerSection DIV.cnn_header UL LI.cnnOver .cnn_rollover,
+DIV.cnnBannerSection DIV.cnn_header UL LI:hover .cnn_rollover { display:block; }
+DIV.cnnBannerSection DIV.cnn_more { color:#2e373c;font-size:10px;padding:2px 0 0 0; }
+DIV.cnnBannerSection DIV.cnn_more A { color:#fff;font-weight:bold; }
+DIV.cnnBannerSection DIV.cnn_more A:hover { color:#e7e7e7; }
+DIV.cnnBannerSection DIV.cnn_more DIV { display:none;color:#ccc;line-height:12px; }
+DIV.cnnBannerSection DIV.cnn_more DIV SPAN A { font:9px arial;font-weight:normal; }
+DIV.cnnBannerSection DIV.cnn_header DIV.cnn_more A { font-family:arial; }
+
+DIV.cnnGameScores { background:#6f7f8b;border-bottom:11px solid #384d5e; }
+</style>
+
+<!-- start personalize -->
+<div class="cnnPersonalize"><div><div><script>cnn_writePresonalizeBar();</script></div></div></div>
+<!-- end personalize -->
+
+<!-- start searchbar -->
+<div class="cnnSearch">
+	<div class="cnnLeft"><ul>
+	<li class="cnnItem0" id="cnnCM1"><dl><script type="text/javascript">
+/* script for 50/50 split */
+/*var min=1;
+var max=2;
+x = Math.floor(Math.random() * (max - min + 1)) + min;
+if(x/2 == 1) {
+  document.write('<dt><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1006340.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe0"><img src="http://i.cdn.turner.com/si/2011_images/cm/WS11_btn_champ_STL.png" alt="Get the Cardinals Championship Package" title="Get the Cardinals Championship Package"/></a></dt>');
+  document.write('<dd><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1006340.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe1"><img src="http://i.cdn.turner.com/si/2011_images/cm/WS11_dropdown_STL.png" alt="Get the Cardinals Championship Package" title="Get the Cardinals Championship Package"/></a></dd>');
+} else {
+  document.write('<dt><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1007180.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe0"><img src="http://i.cdn.turner.com/si/2011_images/cm/EA-N4S-TheRun-btn.png" alt="Get Need for Speed 12 FREE" title="Get Need for Speed 12 FREE"/></a></dt>');
+  document.write('<dd><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1007180.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe1"><img src="http://i.cdn.turner.com/si/2011_images/cm/EA-N4S-TheRun-SI-dropdown.jpg" alt="Get Need for Speed 12 FREE" title="Get Need for Speed 12 FREE"/></a></dd>');
+}
+*/
+</script>
+
+
+<!--Kentucky-->
+<dt><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1009459.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe0"><img src="http://i.cdn.turner.com/si/2012_images/cm/si-btn-champ-kentucky.png" alt="Get the Wildcats Championship Package" title="Get the Wildcats Championship Package"/></a></dt>
+<dd><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1009459.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe1"><img src="http://i.cdn.turner.com/si/2012_images/cm/si-dropdown-kentucky.png" alt="Get the Wildcats Championship Package" title="Get the Wildcats Championship Package"/></a></dd>
+
+<!--original generic sub buttons, changed on 10.26.11 for world series-->
+<!--<dt><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1005085.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe0"><img src="http://i.cdn.turner.com/si/2011_images/cm/si-btn-EA-MADDEN12.png" alt="Get EA Sports Madden NFL 12 Free!" title="Get EA Sports Madden NFL 12 Free!"/></a></dt>
+<dd><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1005085.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe1"><img src="http://i.cdn.turner.com/si/2011_images/cm/si-dropdown-EA-MADDEN12.jpg" alt="Get EA Sports Madden NFL 12 Free!" title="Get EA Sports Madden NFL 12 Free!"/></a></dd>
+-->
+
+<script><!--
+/*
+if (cnnPage.isHomepage) {
+	var button = $e('cnn_cm_subscribe0');
+	button.href = 'https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1004340.html';
+	button = $e('cnn_cm_subscribe1');
+	button.href = 'https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1004340.html';
+}
+*/
+//--></script>
+</dl></a></li>
+	<li class="cnnItem1"><dl><script type="text/javascript">
+var min=1;
+var max=2;
+x = Math.floor(Math.random() * (max - min + 1)) + min;
+/*turning off 50/50 for now*/
+/*if(x/2 == 1) {
+  document.write('<dt><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002346.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe2"><img src="http://i.cdn.turner.com/si/.element/img/4.1/global/cm/button_subscribe_si_red.png" alt="Subscribe to SI" title="Subscribe to SI"/></a></dt>');
+  document.write('<dd><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002346.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe4"><img src="http://i.cdn.turner.com/si/.element/img/4.1/global/cm/dropdown_subscribe_si_red.png" alt="Subscribe to SI" title="Subscribe to SI"/></a></dd>');
+} else {*/
+  document.write('<dt><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002346.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe2"><img src="http://i.cdn.turner.com/si/.element/img/4.1/global/cm/button_subscribe_si_red.png" alt="Subscribe to SI" title="Subscribe to SI"/></a></dt>');
+  document.write('<dd style="margin-left:-79px"><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002346.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe4"><img src="http://i.cdn.turner.com/si/2012_images/cm/bn_2osi16579_290x162_v1.png" alt="Subscribe to SI" title="Subscribe to SI"/></a></dd>');
+//}
+</script>
+</dl></li>
+	<li class="cnnItem2"><dl><!--Default ROS
+<a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1001406.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe3"><img src="http://i.cdn.turner.com/si/2012_images/cm/si-btn3_170x30_sigift.png" alt="Give the Gift of SI" title="Give the Gift of SI"/></a>
+-->
+
+<script type="text/javascript">
+/*var min=1;
+var max=2;
+x = Math.floor(Math.random() * (max - min + 1)) + min;
+if(x/2 == 1) {
+  document.write('<dt><a href="https://subscription.si.com/storefront/Give-the-Gift-of-Sports-Illustrated/site/si-donor0411jacket.html?xid=sirosheader&link=1001406" target="_blank" rel="nofollow" id="cnn_cm_subscribe0"><img src="http://i.cdn.turner.com/si/2011_images/cm/170x30.png" alt="Give the Gift of SI" title="Give the Gift of SI"/></a></dt>');
+  document.write('<dd><a href="https://subscription.si.com/storefront/Give-the-Gift-of-Sports-Illustrated/site/si-donor0411jacket.html?xid=sirosheader&link=1001406" target="_blank" rel="nofollow" id="cnn_cm_subscribe1"><img src="http://i.cdn.turner.com/si/2011_images/cm/170X110.jpg" alt="Give the Gift of SI" title="Give the Gift of SI"/></a></dd>');
+} else {
+  document.write('<dt><a href="http://www.si.com/swim2012" target="_blank" rel="nofollow" id="cnn_cm_subscribe0"><img src="http://i.cdn.turner.com/si/2012_images/cm/si-btn_swim.png" alt="Swimsuit 2012" title="Swimsuit 2012"/></a></dt>');
+  document.write('<dd><a href="http://www.si.com/swim2012" target="_blank" rel="nofollow" id="cnn_cm_subscribe1"><img src="http://i.cdn.turner.com/si/2012_images/cm/SWIM_2012_dropdown.png" alt="Swimsuit 2012" title="Swimsuit 2012"/></a></dd>');
+*/
+</script>
+
+<!--MLB2K 2012-->
+<dt><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1009469.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe0"><img src="http://i.cdn.turner.com/si/2012_images/cm/si-btn3_MLB2K12.png" alt="Get MLB 2K 12 FREE" title="Get MLB 2K 12 FREE"/></a></dt>
+<dd><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1009469.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe1"><img src="http://i.cdn.turner.com/si/2012_images/cm/si-MLB2K12-dropdown.jpg" alt="Get MLB 2K 12 FREE" title="Get MLB 2K 12 FREE"/></a></dd>
+
+<!--swimsuit 2012-->
+<!--
+<dt><a href="http://sportsillustrated.cnn.com/swim2012" target="_blank" rel="nofollow" id="cnn_cm_subscribe0"><img src="http://i.cdn.turner.com/si/2012_images/cm/si-btn_swim.png" alt="Swimsuit 2012" title="Swimsuit 2012"/></a></dt>
+<dd><a href="http://sportsillustrated.cnn.com/swim2012" target="_blank" rel="nofollow" id="cnn_cm_subscribe1"><img src="http://i.cdn.turner.com/si/2012_images/cm/SWIM_2012_dropdown.png" alt="Swimsuit 2012" title="Swimsuit 2012"/></a></dd>
+-->
+</dl></li>
+</ul>
+</div>
+	<div class="cnnRight"><form method="get" action="http://sportsillustrated.cnn.com/search/" name="cm_search"><input type="text" name="text" class="cnnLeft"/><input type="image" src="http://i.cdn.turner.com/si/.element/img/4.1/global/search.gif" alt="Search" title="Search" class="cnnRight"/></form></div>
+</div>
+<!-- end searchbar -->
+
+<!-- start banner -->
+<div class="cnnBanner">
+	<div><a href="http://github.com/"><img src="http://i.cdn.turner.com/si/.element/img/4.1/sect/global/logo2.png" alt="SI.com Home" title="SI.com Home"/></a>
+</div>
+	<div class="cnnBannerSection">
+		<div class="cnnLeft"><a href="http://github.com/baseball/mlb/"><img src="http://i.cdn.turner.com/si/.element/img/4.0/sect/baseball/mlb/icon.jpg"/></a></div>
+		<div class="cnn_header"><a href="http://github.com/baseball/mlb/">MLB GAMEFLASH</a></div>
+		<div class="cnn_more" style="font-size:9px;"><a href="http://github.com/baseball/mlb/scoreboards/today/">Scores</a> | <a href="http://github.com/baseball/mlb/teams/">Teams</a> | <a href="http://github.com/baseball/mlb/players/">Players</a> | <a href="http://github.com/fantasy/player_news/mlb/">Player News</a> | <a href="http://github.com/baseball/mlb/standings/">Standings</a> | <a href="http://github.com/baseball/mlb/probables/today/">Probables</a> | <a href="http://github.com/baseball/mlb/schedules/weekly/today/">Schedules</a> | <a href="http://github.com/baseball/mlb/stats/">Stats</a> | <a href="http://github.com/baseball/mlb/transactions/">Transactions</a> | <a href="http://github.com/baseball/mlb/injuries/">Injuries</a> | <a href="http://www.ticketcity.com/mlb-tickets.html" target="_blank" rel="nofollow">Tickets</a> | <a href="http://mlb.mlb.com/mlb/subscriptions/index.jsp?product=si&vbID=simlbtv_test" target="_blank" rel="nofollow">MLB.TV</a>
+</div>
+	</div>
+</div>
+<div class="cnnClear"></div>
+<!-- end banner -->
+
+<style>
+/*
+DIV.cnnTopnav LI A { color:#000;display:block;padding:0 16px 0 16px!important; }
+DIV.cnnTopnav LI A { color:#000;display:block;padding:0 23px 0 22px!important; }
+*/
+DIV.cnnTopnav LI A { color:#000;display:block;padding:0 11px 0 11px!important; }
+DIV.cnnTopnav LI.cnnFirst { padding-left:0px; }
+</style>
+<div class="cnnTopnav">
+	<ul>
+		<li class="cnnFirst"><a href="http://github.com/extramustard/?eref=sinav">EXTRA MUSTARD</a></li>
+		<li><a href="http://www.fannation.com/?eref=sinav">FANNATION</a></li>
+		<li><a href="http://github.com/multimedia/photo_gallery/?eref=sinav">PHOTOS</a></li>
+		<li><a href="http://github.com/swimsuit/?eref=sinav">SWIMSUIT</a></li>
+		<li><a href="http://github.com/fantasy/?eref=sinav">FANTASY</a></li>
+		<li><a href="http://github.com/magazine/sportsman/?eref=sinav">SPORTSMAN</a></li>
+		<li><a href="http://www.sportsillustratedeverywhere.com/">MAGAZINE</a></li>
+		<li><a href="http://github.com/sifk/?eref=sinav">SI KIDS</a></li>
+		<li><a href="http://github.com/highschool/?eref=sinav">HIGH SCHOOL</a></li>
+		<li><a href="http://github.com/behindthemic/?eref=sinav">BEHIND THE MIC</a></li>
+		<li><a href="http://www.twackle.com/" target="_blank" rel="nofollow">TWACKLE</a></li>
+		<!--<li><a href="http://www.maxpreps.com/national/national.htm?eref=sinav" target="_blank" rel="nofollow">MAXPREPS</a></li>-->
+	</ul>
+</div>
+<!-- end topnav -->
+
+<style>
+	.ie6 #cnnBotnav LI#cnnBotnav0 { width:49px; } /* NFL */
+	.ie6 #cnnBotnav LI#cnnBotnav1 { width:150px; } /* COLLEGE FOOTBALL */
+	.ie6 #cnnBotnav LI#cnnBotnav2 { width:50px; } /* MLB */
+	.ie6 #cnnBotnav LI#cnnBotnav3 { width:51px; } /* NBA */
+	.ie6 #cnnBotnav LI#cnnBotnav4 { width:101px; } /* COLLEGE BB */
+	.ie6 #cnnBotnav LI#cnnBotnav5 { width:58px; } /* GOLF */
+	.ie6 #cnnBotnav LI#cnnBotnav6 { width:50px; } /* NHL */
+	.ie6 #cnnBotnav LI#cnnBotnav7 { width:74px; } /* RACING */
+	.ie6 #cnnBotnav LI#cnnBotnav8 { width:74px; } /* SOCCER */
+	.ie6 #cnnBotnav LI#cnnBotnav9 { width:121px; } /* MMA & BOXING */
+	.ie6 #cnnBotnav LI#cnnBotnav11 { width:73px; } /* TENNIS */
+	.ie6 #cnnBotnav LI#cnnBotnav12 { width:63px; } /* MORE */
+	.ie6 #cnnBotnav LI#cnnBotnav13 { width:74px; } /* VIDEO */
+	#cnnBotnav LI#cnnBotnav0 STRONG { width:49px; } /* NFL */
+	#cnnBotnav LI#cnnBotnav1 STRONG { width:150px; } /* COLLEGE FOOTBALL */
+	#cnnBotnav LI#cnnBotnav2 STRONG { width:50px; } /* MLB */
+	#cnnBotnav LI#cnnBotnav3 STRONG { width:51px; } /* NBA */
+	#cnnBotnav LI#cnnBotnav4 STRONG { width:101px; } /* COLLEGE BB */
+	#cnnBotnav LI#cnnBotnav5 STRONG { width:58px; } /* GOLF */
+	#cnnBotnav LI#cnnBotnav6 STRONG { width:50px; } /* NHL */
+	#cnnBotnav LI#cnnBotnav7 STRONG { width:74px; } /* RACING */
+	#cnnBotnav LI#cnnBotnav8 STRONG { width:74px; } /* SOCCER */
+	#cnnBotnav LI#cnnBotnav9 STRONG { width:121px; } /* MMA & BOXING */
+	#cnnBotnav LI#cnnBotnav11 STRONG { width:73px; } /* TENNIS */
+	#cnnBotnav LI#cnnBotnav12 STRONG { width:63px; } /* MORE */
+	#cnnBotnav LI#cnnBotnav13 STRONG { width:74px; } /* VIDEO */
+
+/* realignment */
+	#cnnBotnav LI#cnnBotnav11:hover UL,
+	#cnnBotnav LI#cnnBotnav11 LI.cnnOver UL { margin-left:0; } /* width of subnav minus width of TENNIS minus width of MORE minus 2 lines */
+	#cnnBotnav LI#cnnBotnav12:hover UL,
+	#cnnBotnav LI#cnnBotnav12 LI.cnnOver UL { margin-left:-41px; } /* width of subnav minus width of MORE minus 1 line */
+	#cnnBotnav LI#cnnBotnav13:hover UL,
+	#cnnBotnav LI#cnnBotnav13 LI.cnnOver UL { margin-left:-93px; width:168px; } /* width of subnav minus width of MORE minus 1 line */
+	#cnnBotnav LI#cnnBotnav13 UL LI { width:168px; }
+</style>
+<!-- start botnav -->
+<div class="cnnBotnav">
+	<div>
+		<ul id="cnnBotnav" style="height:29px;overflow:hidden;">
+			<li id="cnnBotnav0" nav="nfl">
+				<a href="http://github.com/football/nfl/?eref=sinav"><strong>NFL</strong></a>
+			</li>
+			<li id="cnnBotnav1" nav="ncaaf">
+				<a href="http://github.com/football/ncaa/?eref=sinav"><strong>COLLEGE FOOTBALL</strong></a>
+			</li>
+			<li id="cnnBotnav2" nav="mlb">
+				<a href="http://github.com/baseball/mlb/?eref=sinav"><strong>MLB</strong></a>
+			</li>
+			<li id="cnnBotnav3" nav="nba">
+				<a href="http://github.com/basketball/nba/?eref=sinav"><strong>NBA</strong></a>
+			</li>
+			<li id="cnnBotnav4" nav="ncaabb">
+				<a href="http://github.com/basketball/ncaa/?eref=sinav"><strong>COLLEGE BB</strong></a>
+			</li>
+			<li id="cnnBotnav5" nav="golf">
+				<a href="http://www.golf.com/?eref=sinav"><strong>GOLF</strong></a>
+			</li>
+			<li id="cnnBotnav6" nav="nhl">
+				<a href="http://github.com/hockey/nhl/?eref=sinav"><strong>NHL</strong></a>
+			</li>
+			<li id="cnnBotnav7" nav="racing">
+				<a href="http://github.com/racing/?eref=sinav"><strong>RACING</strong></a>
+			</li>
+			<li id="cnnBotnav8" nav="soccer">
+				<a href="http://github.com/soccer/?eref=sinav"><strong>SOCCER</strong></a>
+			</li>
+			<li id="cnnBotnav9" nav="boxmma">
+				<a href="http://github.com/mma/?eref=sinav"><strong>MMA &amp; BOXING</strong></a>
+			</li>
+			<li id="cnnBotnav11" nav="tennis">
+				<a href="http://github.com/tennis/?eref=sinav"><strong>TENNIS</strong></a>
+			</li>
+			<li id="cnnBotnav12" nav="more">
+				<a href="http://github.com/more/?eref=sinav"><strong>MORE</strong></a>
+			</li>
+			<li id="cnnBotnav13" nav="video">
+				<a href="http://github.com/video/?eref=sinav"><strong>VIDEO</strong></a>
+			</li>
+		</ul>
+	</div>
+</div>
+<!-- end botnav -->
+
+
+<div class="cnnViewerAd"><script type="text/javascript">iframeAdFactory.getAd('i_728x90', 728, 90, new Array('728x90','101x1'), true);</script></div>
+
+<!-- start scoreboard ticker -->
+<div id="scoreticker" class="stMLB">
+
+	<div id="stScrollWrap">
+		<a href="" class="stScrollControl left disabled"></a>
+		<a href="" class="stScrollControl right"></a>
+		<div id="stScroller"></div>
+	</div>
+	
+</div>
+<!-- end scoreboard ticker -->
+
+   <!-- end contentHeader-->
+   
+   <!-- start scoreboard -->
+   <div class="cnngScoreboardNoLastPlay">
+      <div class="cnngScoreboard">
+         <div class="cnnLeft">
+            <div>&nbsp;
+               						
+            </div>
+            <table border="0" cellpadding="0" cellspacing="0">
+               <tr class="cnnRow0">
+                  <td class="cnnCol0">&nbsp;</td>
+                  <td class="cnnCol1">1</td>
+                  <td class="cnnCol2">2</td>
+                  <td class="cnnCol3">3</td>
+                  <td class="cnnCol4">4</td>
+                  <td class="cnnCol5">5</td>
+                  <td class="cnnCol6">6</td>
+                  <td class="cnnCol7">7</td>
+                  <td class="cnnCol8">8</td>
+                  <td class="cnnCol9">9</td>
+                  <td class="cnnColR">R</td>
+                  <td class="cnnColH">H</td>
+                  <td class="cnnColE">E</td>
+               </tr>
+               <tr class="cnnRow1">
+                  <td class="cnnCol0"><a href="http://github.com/baseball/mlb/teams/tigers/">TIGERS</a></td>
+                  <td class="cnnCol1">&nbsp;</td>
+                  <td class="cnnCol2">&nbsp;</td>
+                  <td class="cnnCol3">&nbsp;</td>
+                  <td class="cnnCol4">&nbsp;</td>
+                  <td class="cnnCol5">&nbsp;</td>
+                  <td class="cnnCol6">&nbsp;</td>
+                  <td class="cnnCol7">&nbsp;</td>
+                  <td class="cnnCol8">&nbsp;</td>
+                  <td class="cnnCol9">&nbsp;</td>
+                  <td class="cnnColR">&nbsp;</td>
+                  <td class="cnnColH">&nbsp;</td>
+                  <td class="cnnColE">&nbsp;</td>
+               </tr>
+               <tr class="cnnRow2">
+                  <td class="cnnCol0"><a href="http://github.com/baseball/mlb/teams/royals/">ROYALS</a></td>
+                  <td class="cnnCol1">&nbsp;</td>
+                  <td class="cnnCol2">&nbsp;</td>
+                  <td class="cnnCol3">&nbsp;</td>
+                  <td class="cnnCol4">&nbsp;</td>
+                  <td class="cnnCol5">&nbsp;</td>
+                  <td class="cnnCol6">&nbsp;</td>
+                  <td class="cnnCol7">&nbsp;</td>
+                  <td class="cnnCol8">&nbsp;</td>
+                  <td class="cnnCol9">&nbsp;</td>
+                  <td class="cnnColR">&nbsp;</td>
+                  <td class="cnnColH">&nbsp;</td>
+                  <td class="cnnColE">&nbsp;</td>
+               </tr>
+            </table>
+         </div>
+         <div class="cnnRight">
+            <ol>
+               <li class="cnnItem4">8:10 PM ET
+                  						
+               </li>
+            </ol>
+            <ul>
+               <li class="cnnItem0"><strong>Tigers</strong><a href="http://github.com/baseball/mlb/players/7590/"><img src="http://i.cdn.turner.com/si/.e1d/img/4.0/global/baseball/mlb/players/7590_small.jpg" border="0" width="50" height="76" alt="Verlander" title="Verlander"></a><a href="http://github.com/baseball/mlb/players/7590/">
+                     <div class="cnnLine0">Verlander</div>
+                     <div class="cnnLine4">0-1</div>
+                     <div class="cnnLine5">2.2&nbsp;ERA</div>
+                     <div class="cnnLine6">&nbsp;</div>
+                     <div class="cnnLine7">&nbsp;</div></a></li>
+               <li class="cnnItem1"><strong>Royals</strong><a href="http://github.com/baseball/mlb/players/8932/"><img src="http://i.cdn.turner.com/si/.e1d/img/4.0/global/baseball/mlb/players/8932_small.jpg" border="0" width="50" height="76" alt="Duffy" title="Duffy"></a><a href="http://github.com/baseball/mlb/players/8932//">
+                     <div class="cnnLine0">Duffy</div>
+                     <div class="cnnLine4">1-0</div>
+                     <div class="cnnLine5">0&nbsp;ERA</div>
+                     <div class="cnnLine6">&nbsp;</div>
+                     <div class="cnnLine7">&nbsp;</div></a></li>
+            </ul>
+         </div>
+      </div>
+   </div>
+   <!-- end scoreboard -->
+   
+   <!-- start navbar -->
+   <div class="cnngNavbar">
+      <table border="0" cellpadding="0" cellspacing="0">
+         <tr>
+            <td class="cnnCol0"><span>PREVIEW</span></td>
+            <td class="cnnCol0"><a href="http://github.com/40630_matchup.html">MATCHUP</a></td></li>
+            <td class="cnnCol3"><a href="http://github.com/40630_fancomment.html">FAN COMMENTS</a></td>
+         </tr>
+      </table>
+   </div>
+   <!-- end navbar -->
+   
+   <!-- start content -->
+   <div class="cnngContent">
+   	<div class="cnngPreview">
+   		<div class="cnnLeft">
+   			
+   <!-- REAPFINDREPLACE:20120515:/.element/ssi/story/4.1/wires/ap/expired_story.html:/baseball/mlb/gameflash/2012/04/16/40630_preview.html-->
+   <h1>Tigers-Royals Preview</h1>
+   <p>
+      
+      <span class="cnnDataLinked"><a href="http://github.com/baseball/mlb/players/7590/index.html">Justin Verlander</a></span>
+      has pitched well in each of his first two starts, though he doesn't have a win to show for those efforts.
+      
+   </p>
+   <p>
+      He hasn't had much trouble earning victories against the 
+      <span class="cnnDataLinked"><a href="http://github.com/baseball/mlb/teams/royals/index.html">Kansas City Royals</a></span>
+      .
+      
+   </p>
+   <p>
+      Verlander looks to continue his mastery of the Royals when the 
+      <span class="cnnDataLinked"><a href="http://github.com/baseball/mlb/teams/tigers/index.html">Detroit Tigers</a></span>
+      visit Kauffman Stadium in the opener of a three-game series Monday night.
+      
+   </p>
+   <p>
+      The reigning AL 
+      <span class="cnnDataLinked"><a href="http://github.com/baseball/mlb/players/49534/index.html">Cy Young</a></span>
+      winner and MVP had a 2-0 lead through eight innings in both of his outings, but the Tigers weren't able to hold the lead.
+      
+   </p>
+   <p>Verlander (0-1, 2.20 ERA) allowed two hits before running into trouble in the ninth against Tampa Bay on Wednesday, getting
+      charged with four runs in 8 1-3 innings of a 4-2 defeat.
+   </p>"Once a couple guys got on, really the first time I've cranked it up like that - and lost a little bit of my consistency that
+   I'd had all day," Verlander said. "It's inexcusable. This loss rests solely on my shoulders." 
+   <p>The right-hander did his part in his opening-day start against Boston on April 5, allowing two hits before the bullpen faltered.
+      Detroit ended up winning 3-2 with a run in the bottom of the ninth, though Verlander didn't earn a decision.
+   </p>
+   <p>That hasn't been the case in his last four starts against the Royals, winning each with a 1.82 ERA. Verlander is 13-2 with
+      a 2.40 ERA in 19 career starts versus Kansas City, and another win will give him more victories than he has against any other
+      team. He's also beaten Cleveland 13 times.
+   </p>
+   <p>Verlander is 8-2 with a 1.82 ERA lifetime at Kauffman Stadium, where the Royals (3-6) were swept in a three-game series against
+      the Indians with Sunday's 13-7 loss.
+   </p>
+   <p>
+      
+      <span class="cnnDataLinked"><a href="http://github.com/baseball/mlb/players/7634/index.html">Billy Butler</a></span>
+      , who is 14 for 39 (.359) with two homers off Verlander, had an RBI single and is hitting .364 with four doubles and a homer
+      during a five-game hitting streak.
+      
+   </p>
+   <p>
+      Royals pitchers allowed seven home runs, 17 extra-base hits and 32 runs in the series, and manager 
+      <span class="cnnDataLinked"><a href="http://github.com/baseball/mlb/players/1716/index.html">Ned Yost</a></span>
+      turned to outfielder 
+      <span class="cnnDataLinked"><a href="http://github.com/baseball/mlb/players/7899/index.html">Mitch Maier</a></span>
+      in the ninth to pitched a scoreless inning Sunday.
+      
+   </p>"Let's hope it doesn't happen again," Maier said. "I don't like to be put in that situation, but we needed an inning." 
+   <p>
+      Kansas City will look to bounce back with the help of another solid outing from 
+      <span class="cnnDataLinked"><a href="http://github.com/baseball/mlb/players/8932/index.html">Danny Duffy</a></span>
+      (1-0, 0.00), who allowed one hit and struck out eight in six innings of a 3-0 win over Oakland on Tuesday.
+      
+   </p>
+   <p>The left-hander will be seeking his first win against Detroit after going 0-2 with a 5.63 ERA in three starts versus the Tigers
+      as a rookie.
+   </p>
+   <p>
+      
+      <span class="cnnDataLinked"><a href="http://github.com/baseball/mlb/players/7129/index.html">Gerald Laird</a></span>
+      was a triple short of the cycle and helped the Tigers (6-3) salvage the finale of a three-game series with a 5-2 victory over
+      Chicago on Sunday.
+      
+   </p>
+   <p>
+      
+      <span class="cnnDataLinked"><a href="http://github.com/baseball/mlb/players/8419/index.html">Rick Porcello</a></span>
+      allowed one run in 7 2-3 innings to give Detroit's starting rotation its first victory.
+      
+   </p>"All the other starters have pitched well," Porcello said. "It's just the way it's happened so far." 
+   <p>Verlander allowed three runs in seven innings of a 4-3 win over the Royals on Aug. 6, beating Duffy, who gave up three runs
+      over five.
+   </p>
+   <!-- /REAPFINDREPLACE:20120515:/.element/ssi/story/4.1/wires/ap/expired_story.html:/baseball/mlb/gameflash/2012/04/16/40630_preview.html-->
+   			<p class="cnnLast">
+   				<a href="http://biz.stats.com/" target="new">&#169; 2011 STATS LLC <img src="http://i.a.cnn.net/si/images/STATSlogo.gif" align="absmiddle" alt="STATS, Inc"></a>
+
+   			</p>
+   		</div>
+   		<div class="cnnRight">
+   			
+   			<div class="cnngCommentsBox" id="cnngCommentsBox">
+   				<div class="cnngComments">
+   					<div class="cnnHolder">						
+   						<div id="fanComments">
+   							<iframe src="http://www.fannation.com/gameday/gameflash_game_comments/320416107?sport_id=2" width="397" height="390" marginwidth="0" scrolling="no" frameborder="0"></iframe>
+   						</div>
+   					</div>
+   				</div>
+   				<div class="cnn_footer">
+   					<div class="cnngToggleOn"><a href="/service/javascript:hidediv();">TURN COMMENTS <span>OFF</span></a></div>
+   					<div class="cnngToggleOff"><a href="/service/javascript:showdiv();">TURN COMMENTS <span>ON</span></a></div>
+   				</div>
+   			</div>
+   		</div>
+   	</div>
+   </div>
+   <!-- end content -->
+   
+   <!-- start contentFooter -->
+   <div class="cnnWideSL"><script type="text/javascript">adsonar_placementId=1488671;adsonar_pid=769769;adsonar_ps=-1;adsonar_zw=978;adsonar_zh=150;</script><script>cnnad_createSL();</script></div>
+<!-- start footerbox -->
+<div class="cnnFooterBox">
+	<div class="cnnHolder">
+		<div class="cnnRight">
+			<dl>
+				<dt><a href="http://github.com/"><img src="http://i.cdn.turner.com/si/.element/img/4.1/global/footer_logo.jpg" alt="SI.com" title="SI.com"/></a></dt>
+				<dd><span>Hot Topics:</span>   <a href="http://github.com/2012/writers/peter_king/04/16/countdown/index.html" title="Peter King: MMQB"class="cnnFirst">Peter King: MMQB</a>   <a href="http://nhl-red-light.si.com/2012/04/16/mayhem-reigns-in-stanley-cup-playoffs/" title="NHL Playoffs" target="new" >NHL Playoffs</a>   <a href="http://github.com/2012/writers/george_schroeder/04/16/arkansas-football-petrino/index.html" title="Bobby Petrino">Bobby Petrino</a>   <a href="http://github.com/2012/baseball/mlb/04/16/valentine.youkilis.ap/index.html" title="Bobby Valentine">Bobby Valentine</a>   <a href="http://github.com/2012/writers/michael_mccann/04/16/roger.clemens.trial.preview/index.html" title="Roger Clemens">Roger Clemens</a>   <a href="http://github.com/2012/baseball/mlb/04/16/power.rankings/index.html" title="MLB Power Rankings">MLB Power Rankings</a>   <a href="http://github.com/2012/writers/richard_rothschild/04/13/jackie.robinson/index.html" title="Jackie Robinson">Jackie Robinson</a> </dd>
+			</dl>
+			<div class="cnnClear"></div>
+			<ul>
+				<li><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002348.html" target="_blank" rel="nofollow">SUBSCRIBE TO SI</a></li>
+				<li><a href="http://www.sportsillustratedeverywhere.com/" target="_blank" rel="nofollow">DIGITAL EDITION</a></li>
+				<li><a href="http://github.com/mobile/">SI MOBILE</a></li>
+				<li><a href="http://github.com/2010/about_us/jobs/">JOBS</a></li>
+				<li><a href="http://github.com/sitemap/">SITE MAP</a></li>
+				<li><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1003862.html" target="_blank" rel="nofollow">GIVE THE GIFT OF SI</a></li>
+				<li><a href="http://sipictures.com/" target="_blank" rel="nofollow">SI PICTURE SALES</a></li>
+				<li><a href="http://www.sportsillustratedsnapshot.com/" target="_blank" rel="nofollow">PICTURES OF THE DAY</a></li>
+				<li><a href="http://github.com/about_us/">ABOUT US</a></li>
+				<li><a href="http://simediakit.com/" target="_blank" rel="nofollow">SI MEDIA KITS</a></li>
+				<li><a href="http://www.sicovers.com/default.aspx?utm_source=sicom&utm_medium=ftr&utm_campaign=icrefer&xid=siftr" target="_blank" rel="nofollow">SI COVER COLLECTION</a></li>
+				<li><a href="http://sicustomerservice.com/" target="_blank" rel="nofollow">SI CUSTOMER SERVICE</a></li>
+				<li><a href="http://github.com/2008/magazine/si.books/">SI BOOKS</a></li>
+				<li><a href="http://github.com/about_us/feedback/">CONTACT US</a></li>
+				<li><a href="http://github.com/services/rss/">ADD RSS HEADLINE</a></li>
+			</ul>
+			<div class="cnnClear"></div>
+			<div class="cnnCopyright">
+				<style>
+				.cnnFooterBox .cnnHolder { overflow:hidden; }
+				.cnnFooterBox .cnnRight DIV.cnnCopyright { line-height:16px;padding-top:2px;text-align:left; }
+				.cnnFooterBox .cnnRight DIV.cnnCopyright IMG { float:left;margin:0 6px 14px 0; }
+				.cnnFooterBox .cnnRight DIV.cnnCopyright IMG#cnnFooterAdOpt { float:none;margin:0 0 0 6px;vertical-align:bottom; }
+				</style>
+				<img src="http://i.cdn.turner.com//si/.element/img/4.1/global/logo_footer_turner.png" alt="Turner - SI Digital"/> 
+				<script type="text/javascript">if( ( ( document.location.pathname ).indexOf( '/basketball/nba' ) >= 0 ) || ( ( document.location.pathname ).indexOf( '/video/nba' ) == 0 ) ) { document.write( 'TM & &#169; 2012 Turner Broadcasting System, Inc. A Time Warner Company. All Rights Reserved. SI.com is part of CNN Digital Network, which is part of the Turner Digital Network.' ); } else { document.write( 'TM & &#169; 2012 Turner Broadcasting System, Inc. A Time Warner Company. All Rights Reserved. SI.com is part of CNN Digital Network, which is part of the Turner Digital Network.' ); }</script><noscript>TM & &#169; 2012 Turner Broadcasting System, Inc. A Time Warner Company. All Rights Reserved. SI.com is part of CNN Digital Network, which is part of the Turner Digital Network.</noscript>
+				<br/> <a href="http://github.com/interactive_legal.html" rel="nofollow">Terms</a> under which this service is provided to you. Read our <a href="http://github.com/privacy/" rel="nofollow">privacy guidelines</a>, <a href="https://subscription.timeinc.com/storefront/privacy/si/generic_privacy_new.html?dnp-source=E#california" rel="nofollow">your California privacy rights</a>, and <a href="http://subscription-assets.timeinc.com/prod/assets/themes/magazines/default/template-resources/html/legal/ti-corp-behavioral.html">ad choices<img src="http://i.cdn.turner.com/si/.element/img/4.1/global/logo_adchoices.gif" id="cnnFooterAdOpt"/></a>.
+			</div>
+		</div>
+		<div class="cnnLeft"><a href="http://sportsillustrated.cnn.com/vault/cover/featured/11730/index.htm?xid=sivcoverhome"><img style="vertical-align:bottom;" title="SI Cover" alt="SI Cover" src="http://i.cdn.turner.com/si/si_online/covers/images/2012/0416_thumb.jpg"></a><a href="http://www.sportsillustratedeverywhere.com/?xid=sivcoverhome"><img style="vertical-align:bottom;" src="http://i.cdn.turner.com/sivault/.element/img/1.0/read_all_articles_96x12.gif" alt="Read All Articles" border="0" width="96" height="12"></a><a href="http://www.sicovers.com/ils.aspx?p=SPR20120416golf&utm_source=sivault&utm_medium=inet&utm_campain=icrefer%20&xid=sivcoverhome" target="_blank"><img style="vertical-align:bottom;" src="http://i.cdn.turner.com/sivault/.element/img/1.0/buy_cover_reprint.gif" alt="Buy Cover Reprint" border="0" width="96" height="12"></a>
+</div>
+	</div>
+</div>
+
+<!-- end footerbox -->
+
+<!-- start searchbar -->
+<div class="cnnSearchFooter">
+	<div class="cnnCenter"><form method="get" action="http://sportsillustrated.cnn.com/search/" name="footer_search"><input id="searchInputFooter" type="text" name="text" class="cnnLeft"/><input type="image" src="http://i.cdn.turner.com/si/.element/img/4.1/global/search.gif" alt="Search" title="Search" class="cnnRight"/></form></div>
+</div>
+
+<!-- end searchbar -->
+
+<!--START OF PAGELINKS.JS-->
+<script language="Javascript">// Post Processing code to update links with tracking references
+
+var url = window.location.href.toString();
+url = url.replace(/http:\/\/[^\/]*/, '');
+url = url.replace(/\?.*$/, '');
+
+// All links on page
+var links = document.getElementsByTagName('a');
+
+for (var i=0; i < links.length; i++) {
+	var link = links[i];
+	if (link.href); else continue;
+	if (link.href.indexOf('.html/')>0) { siLog.debug('Fix trail slash - ',link.href); link.href = link.href.replace(/\.html\//,'.html'); }
+	if (!cnnPage.isHomepage) {
+		// Loop through links, add erefs where expected
+		if (link.href.indexOf('http://www.fannation.com/') == 0) {
+			cnnAddQ( link, 'eref=fromSI' );
+		}
+		if (url != '/' && link.href.indexOf('/vault') > 0) {
+			cnnAddQ( link, 'eref=sisf' );
+		} 
+		if (url.indexOf('/danpatrick') != 0 && link.href.indexOf('/danpatrick') > 0 && link.href.indexOf('.mp3') < 0) {
+			cnnAddQ( link, 'eref=fromSI' );
+		}
+	}
+	if (link.innerHTML == link.getAttribute('title')) {
+		link.setAttribute('title','');
+	}
+}
+
+function cnnAddQ (link, add) {
+	if (link.href.toLowerCase().indexOf('javascript') == -1) {
+		if (link.href.indexOf('?') > 0) link.href = link.href + '&' + add;
+		else link.href = link.href + '?' + add;
+	}
+}
+
+// Add whitespace to cnnClear
+var breaks = $c('cnnClear','div');
+
+/* Homepage */
+if (cnnPage.isHomepage) {
+	cnnTagHPLinks(); 
+	/* iPad */
+	if(navigator.userAgent.indexOf('iPad')>-1) {
+		$e('cnnShareRow_mobile').href='http://ax.itunes.apple.com/WebObjects/MZStore.'
+		+'woa/wa/browserRedirect?url=itms%253A%252F%252Fax.itunes.apple.com%252FWebObj'
+		+'ects%252FMZStore.woa%252Fwa%252FviewSoftware%253Fid%253D329510739%2526mt%253D8';
+	}
+	/* Poll frame height issue */
+	if ($e('cnnPollFrame')) { $e('cnnPollFrame').setAttribute('height','169'); }
+}</script>
+<!--END OF PAGELINKS.JS-->
+
+</div>
+<div><!-- move tracking out of cnnpage -->
+<!-- ADBP/JSMD -->
+<!-- ADBP Meta Data -->
+<script type="text/javascript" src="http://i.cdn.turner.com/si/.e/js/4.1/global/jsmd/metadata.js"></script>
+<!-- /ADBP Meta Data -->	
+
+<!-- JSMD Code --> 
+<script language="JavaScript" type="text/javascript" src="http://i.cdn.turner.com/si/.element/js/4.1/global/jsmd/jsmd.js"></script> 
+<script language="JavaScript"> 
+<!-- $pathname is defined in metadata.js
+if($pathname.indexOf("/.element/ssi/ads.iframes/") == -1 && $pathname.indexOf("/doubleclick/dartiframe.html") == -1) {
+	var jsmd=_jsmd.init();
+	if(document.referrer !== window.location.href){
+		jsmd.send();
+	}
+}
+//-->
+</script> 
+<!-- / End JSMD Code -->
+<!-- /ADBP/JSMD -->
+</div>
+
+<div style="font-size:1px;line-height:1px;">
+<div><img src="http://github.com/cookie.crumb" width="1" height="1"></div>
+</div>
+
+<img src="http://i.cdn.turner.com/si/.e/img/4.0/global/pixels/blank_pixel.gif" alt="" id="TargetImageDE" name="TargetImageDE" onload="cnnad_getDEAdHeadCookie(this)" height="1" width="1">
+
+<script language="JavaScript">
+	siTracking.init();
+</script>
+<script language="JavaScript">
+	//ADM
+	cnnad_sendADMData();
+	cnnad_ugsync();
+</script>
+
+<!-- TIIAD -->
+<script type="text/javascript">
+function siQuantcast()
+{
+	var lb = "Time Inc News Business and Sports,Sports Illustrated";
+	var lb_ch = (jsmd.get("m:page.section[0]") ? jsmd.get("m:page.section[0]") : "");
+	lb+=(lb_ch != null && typeof(lb_ch) == "string" && lb_ch.length > 0) ? "." + lb_ch:"";
+	return lb;
+}
+_qoptions={
+	qacct:"p-5dyPa639IrgIw",
+	labels:siQuantcast()
+};
+</script>
+<script type="text/javascript" src="http://edge.quantserve.com/quant.js"></script>
+<noscript><img src="http://pixel.quantserve.com/pixel/p-5dyPa639IrgIw.gif?labels=Time%20Inc%20News%20Business%20and%20Sports,Sports%20Illustrated" style="display: none;" border="0" height="1" width="1" alt="Quantcast"/></noscript> 
+<script src="http://js.revsci.net/gateway/gw.js?csid=H07710&auto=t" type="text/javascript"></script>
+<!-- /TIIAD -->
+
+<script src="http://i.cdn.turner.com/si/.e1d/js/4.1/global/pagelinks.js" type="text/javascript"></script>
+<script src="http://i.cdn.turner.com/si/.e1d/js/4.1/global/subnav.js" type="text/javascript"></script>
+
+   <!-- end contentFooter -->
+   
+   <!--[if IE 6]></div></div><![endif]--><!--[if IE 7]></div></div><![endif]--><!--[if IE 8]></div></div><![endif]-->
+   </body>
+   </html>
\ No newline at end of file
diff --git a/tests/samples/summary-keep-all-images.sample.html b/tests/samples/summary-keep-all-images.sample.html
new file mode 100644
index 00000000..127683fc
--- /dev/null
+++ b/tests/samples/summary-keep-all-images.sample.html
@@ -0,0 +1,29 @@
+<!DOCTYPE html>
+<html lang="en">
+<head></head>
+<body>
+<h2>
+    <span>
+        H2 Headline H2 Headline H2 Headline H2 Headline H2 Headline H2 Headline H2 Headline H2 Headline H2 Headline H2 Headline
+    </span>
+</h2>
+<p>
+    <spa>
+        Text Text Text Text Text Text Text Text Text Text
+    </spa>
+</p>
+<div>
+    <span>
+        <a>
+            <img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAIAAAACUFjqAAABhGlDQ1BJQ0MgcHJvZmlsZQAAKJF9kT1Iw1AUhU9TpSLVDnYQcchQnSyIFXHUKhShQqgVWnUweekfNDEkKS6OgmvBwZ/FqoOLs64OroIg+APiLjgpukiJ9yWFFjFeeLyP8+45vHcfIDSqTLO6xgFNt81MKinm8iti6BUBhBFBPxIys4xZSUrDt77uqZvqLs6z/Pv+rD61YDEgIBLPMMO0ideJpzZtg/M+cZSVZZX4nHjMpAsSP3Jd8fiNc8llgWdGzWxmjjhKLJY6WOlgVjY14knimKrplC/kPFY5b3HWqjXWuid/YbigLy9xndYwUljAIiSIUFBDBVXYiNOuk2IhQ+dJH/+Q65fIpZCrAkaOeWxAg+z6wf/g92ytYmLCSwonge4Xx/kYAUK7QLPuON/HjtM8AYLPwJXe9m80gOlP0uttLXYERLaBi+u2puwBlzvA4JMhm7IrBWkJxSLwfkbflAcGboHeVW9urXOcPgBZmlX6Bjg4BEZLlL3m8+6ezrn929Oa3w9e03KfJqsuOAAAAAlwSFlzAAAuIwAALiMBeKU/dgAAAAd0SU1FB+kBDA8PKt1W5MYAAAAZdEVYdENvbW1lbnQAQ3JlYXRlZCB3aXRoIEdJTVBXgQ4XAAAAFUlEQVQY02P8x+rFgBswMeAFI1UaAJ65AWFYB2G5AAAAAElFTkSuQmCC"
+            />
+         </a>
+    </span>
+</div>
+<p>
+    <spa>
+        Text Text Text Text Text Text Text Text Text Text
+    </spa>
+</p>
+</body>
+</html>
\ No newline at end of file
diff --git a/tests/samples/the-hurricane-rubin-carter-denzel-washington.html b/tests/samples/the-hurricane-rubin-carter-denzel-washington.html
new file mode 100644
index 00000000..4ebddd04
--- /dev/null
+++ b/tests/samples/the-hurricane-rubin-carter-denzel-washington.html
@@ -0,0 +1,1470 @@
+
+<!DOCTYPE html>
+<html id="js-context" class="js-off is-not-modern id--signed-out" data-page-path="/film/2014/apr/24/the-hurricane-rubin-carter-denzel-washington">
+<head>
+<meta charset="utf-8"/>
+<title>The Hurricane: the facts of Rubin Carter's life story are beaten to a pulp | Film | The Guardian</title>
+<meta http-equiv="X-UA-Compatible" content="IE=Edge"/>
+<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
+<meta name="format-detection" content="telephone=no"/>
+<meta name="HandheldFriendly" content="True"/>
+<link rel="dns-prefetch" href="http://assets.guim.co.uk/"/>
+<link rel="dns-prefetch" href="http://i.guim.co.uk/"/>
+<link rel="dns-prefetch" href="http://api.nextgen.guardianapps.co.uk/"/>
+<link rel="dns-prefetch" href="http://hits.theguardian.com/"/>
+<link rel="dns-prefetch" href="http://j.ophan.co.uk/"/>
+<link rel="dns-prefetch" href="http://ophan.theguardian.com/"/>
+<link rel="dns-prefetch" href="http://oas.theguardian.com/"/>
+<link rel="dns-prefetch" href="http://beacon.guim.co.uk/"/>
+<link rel="shortcut icon" type="image/png" href="http://assets.guim.co.uk/images/favicons/79d7ab5a729562cebca9c6a13c324f0e/32x32.ico"/>
+<link rel="apple-touch-icon" sizes="152x152" href="http://assets.guim.co.uk/images/favicons/451963ac2e23633472bf48e2856d3f04/152x152.png"/>
+<link rel="apple-touch-icon" sizes="144x144" href="http://assets.guim.co.uk/images/favicons/1a3f98d8491f8cfdc224089b785da86b/144x144.png"/>
+<link rel="apple-touch-icon" sizes="120x120" href="http://assets.guim.co.uk/images/favicons/cf23080600002e50f5869c72f5a904bd/120x120.png"/>
+<link rel="apple-touch-icon" sizes="114x114" href="http://assets.guim.co.uk/images/favicons/f438f6041a4c1d0289e6debd112880c2/114x114.png"/>
+<link rel="apple-touch-icon" sizes="72x72" href="http://assets.guim.co.uk/images/favicons/b5050517955e7cf1e493ccc53e64ca05/72x72.png"/>
+<link rel="apple-touch-icon-precomposed" href="http://assets.guim.co.uk/images/favicons/4fd650035a2cebafea4e210990874c64/57x57.png"/>
+<link rel="canonical" href="http://www.theguardian.com/film/2014/apr/24/the-hurricane-rubin-carter-denzel-washington"/>
+<meta name="apple-mobile-web-app-title" content="Guardian"/>
+<meta name="application-name" content="The Guardian"/>
+<meta name="msapplication-TileColor" content="#005689"/>
+<meta name="msapplication-TileImage" content="http://assets.guim.co.uk/images/favicons/f06f6996e193d1ddcd614ea852322d25/windows_tile_144_b.png"/>
+<link rel="publisher" href="https://plus.google.com/113000071431138202574"/>
+<meta name="author" content="Alex von Tunzelmann"/>
+<meta name="description" content="Denzel Washington&#x27;s compelling performance gives Norman Jewison&#x27;s biopic punch – despite its many inaccuracies, writes Alex von Tunzelmann"/>
+<meta name="thumbnail" content="//i.guim.co.uk/static/w-620/h--/q-95/sys-images/Arts/Arts_/Pictures/2014/4/23/1398270516995/The-Hurricane-007.jpg"/>
+<meta name="keywords" content="Sport,Film,Denzel Washington"/>
+<meta name="news_keywords" content="Sport,Film,Denzel Washington"/>
+<meta property="og:url" content="http://www.theguardian.com/film/2014/apr/24/the-hurricane-rubin-carter-denzel-washington"/>
+<meta property="article:author" content="http://www.theguardian.com/profile/alexvontunzelmann"/>
+<meta property="og:description" content="Denzel Washington&#x27;s compelling performance gives Norman Jewison&#x27;s biopic punch – despite its many inaccuracies, writes Alex von Tunzelmann"/>
+<meta property="og:image" content="http://static.guim.co.uk/sys-images/Arts/Arts_/Pictures/2014/4/23/1398270516995/The-Hurricane-007.jpg"/>
+<meta property="article:publisher" content="https://www.facebook.com/theguardian"/>
+<meta property="og:type" content="article"/>
+<meta property="article:section" content="Film"/>
+<meta property="article:published_time" content="2014-04-24T09:26:18.000Z"/>
+<meta property="og:title" content="The Hurricane: the facts of Rubin Carter&#x27;s life story are beaten to a pulp"/>
+<meta property="fb:app_id" content="180444840287"/>
+<meta property="article:tag" content="Sport,Film,Denzel Washington"/>
+<meta property="og:site_name" content="the Guardian"/>
+<meta property="article:modified_time" content="2014-06-21T00:41:29.000+01:00"/>
+<meta name="twitter:site" content="@guardian"/>
+<meta name="twitter:app:name:iphone" content="The Guardian"/>
+<meta name="twitter:app:id:iphone" content="409128287"/>
+<meta name="twitter:app:name:googleplay" content="The Guardian"/>
+<meta name="twitter:app:id:googleplay" content="com.guardian"/>
+<meta name="twitter:app:url:googleplay" content="guardian://www.theguardian.com/film/2014/apr/24/the-hurricane-rubin-carter-denzel-washington"/>
+<meta name="twitter:card" content="summary_large_image"/>
+<meta name="google-site-verification" content="LR-FN6c2gIEUoo3k049w1nxyHykmac5ZE3SaUOiKc30"/>
+<!-- scripts and CSS were cut -->
+</head>
+<body id="top" class="has-localnav" itemscope itemtype="http://schema.org/WebPage">
+<div class="site-message js-site-message is-hidden" data-link-name="release message" role="dialog" aria-label="welcome" aria-describedby="site-message__message">
+<div class="gs-container">
+<div class="site-message__inner js-site-message-inner">
+<div class="site-message__media">
+<span class="inline-marque-36 inline-icon u-vertical-align-middle-icon">
+<svg width="36" height="36" viewBox="0 0 36 36"><path d="M21.3 8.8c0-4.9-1.5-5.7-3.3-5.7-1.8 0-3.2.7-3.2 5.7s1.5 5.5 3.2 5.5c1.8-.1 3.3-.6 3.3-5.5m-6.5 18.8c-2.3 0-2.9 1.7-2.9 2.9 0 1.8 1.6 3.4 6.3 3.4 5.3 0 6.8-1.5 6.8-3.4 0-1.7-1.3-2.9-3.4-2.9h-6.8zM10.5 2.4C4.3 5.2 0 11.4 0 18.7c0 4.9 2 9.4 5.2 12.6V31c0-3.2 3.1-4.4 5.9-5-2.6-.6-3.9-2.5-3.9-4.4 0-2.6 2.9-4.8 4.3-5.8l-.2-.1c-2.5-1.4-4.1-3.8-4.1-7 0-2.7 1.2-4.9 3.3-6.3M36 18.8C36 11.4 31.5 5 25.1 2.3c2.1 1.4 3.4 3.5 3.5 6.3l.1.6c0 5.4-4.4 8.2-10.7 8.2-1.6 0-2.7-.1-4.1-.5-.6.4-1.1 1.1-1.1 1.8 0 .9.8 1.6 1.8 1.6h8.8c5.5 0 8.2 2.2 8.2 7.1 0 1.6-.3 3.1-1 4.3 3.3-3.4 5.4-7.9 5.4-12.9"/></svg>
+</span>
+</div>
+<div class="site-message__copy js-site-message-copy u-cf">
+</div>
+<div class="site-message__close">
+<button class="site-message__close-btn js-site-message-close" data-link-name="hide release message">
+<span class="u-h">Close</span>
+<span class="inline-close-icon-white-small inline-icon ">
+<svg xmlns="http://www.w3.org/2000/svg" width="30" height="30"><path fill="#fff" d="M21 9.8l-.8-.8-5.2 4.8L9.8 9l-.8.8 4.8 5.2L9 20.2l.8.8 5.2-4.8 5.2 4.8.8-.8-4.8-5.2L21 9.8"/></svg>
+</span>
+</button>
+</div>
+</div>
+</div>
+</div>
+<a class="u-h skip" href="#maincontent" data-link-name="skip : main content">Skip to main content</a>
+<div class="sticky-nav-mt-test">
+<div class="top-banner-ad-container top-banner-ad-container--desktop top-banner-ad-container--above-nav">
+<div id="dfp-ad--top-above-nav" class="js-ad-slot ad-slot ad-slot--dfp ad-slot--top-above-nav ad-slot--top-banner-ad" data-link-name="ad slot top-above-nav" data-test-id="ad-slot-top-above-nav" data-name="top-above-nav" data-mobile="1,1|88,70|728,90" data-desktop="1,1|88,70|728,90|940,230|900,250|970,250"> </div>
+</div>
+<header id="header" class="l-header u-cf " role="banner" data-link-name="global navigation: header">
+<div class="js-navigation-header navigation-container navigation-container--collapsed">
+<div class="gs-container l-header__inner">
+<div class="l-header-pre u-cf">
+<div class="brand-bar">
+<div class="brand-bar__item brand-bar__item--profile popup-container
+                                    has-popup brand-bar__item--has-control js-profile-nav" data-component="identity-profile">
+<a href="https://profile.theguardian.com/signin" data-link-name="User profile" data-toggle="popup--profile" class="brand-bar__item--action popup__toggle" data-test-id="sign-in-link" aria-haspopup="true">
+<span class="inline-profile-36 inline-icon rounded-icon control__icon-wrapper">
+<svg width="18" height="18"><path fill="#fff" d="M9 7.3c1.6 0 3.4-1.8 3.4-3.9S11.1 0 9 0 5.6 1.3 5.6 3.4s2 3.9 3.4 3.9zm5.9 3.4l-.9-.8c-1.7-.6-3.1-.9-5-.9s-3.3.3-5 .9l-.9.9L1 17.2l.9.8h14.3l.9-.9-2.2-6.4z"/></svg>
+</span>
+<span class="js-profile-info control__info" data-test-id="sign-in-name">sign in</span>
+</a>
+<div class="js-profile-popup"></div>
+</div>
+<div class="brand-bar__item has-popup brand-bar__item--has-control
+                                popup-container brand-bar__item--subscribe" data-component="subscribe">
+<a href="http://subscribe.theguardian.com/?INTCMP=NGW_HEADER_UK_GU_SUBSCRIBE" class="brand-bar__item--action" data-link-name="common.editions.Uk$@51c63b2e : topNav : subscribe" class="brand-bar__item--action">
+<span class="inline-marque-36 inline-icon rounded-icon control__icon-wrapper">
+<svg width="36" height="36" viewBox="0 0 36 36"><path d="M21.3 8.8c0-4.9-1.5-5.7-3.3-5.7-1.8 0-3.2.7-3.2 5.7s1.5 5.5 3.2 5.5c1.8-.1 3.3-.6 3.3-5.5m-6.5 18.8c-2.3 0-2.9 1.7-2.9 2.9 0 1.8 1.6 3.4 6.3 3.4 5.3 0 6.8-1.5 6.8-3.4 0-1.7-1.3-2.9-3.4-2.9h-6.8zM10.5 2.4C4.3 5.2 0 11.4 0 18.7c0 4.9 2 9.4 5.2 12.6V31c0-3.2 3.1-4.4 5.9-5-2.6-.6-3.9-2.5-3.9-4.4 0-2.6 2.9-4.8 4.3-5.8l-.2-.1c-2.5-1.4-4.1-3.8-4.1-7 0-2.7 1.2-4.9 3.3-6.3M36 18.8C36 11.4 31.5 5 25.1 2.3c2.1 1.4 3.4 3.5 3.5 6.3l.1.6c0 5.4-4.4 8.2-10.7 8.2-1.6 0-2.7-.1-4.1-.5-.6.4-1.1 1.1-1.1 1.8 0 .9.8 1.6 1.8 1.6h8.8c5.5 0 8.2 2.2 8.2 7.1 0 1.6-.3 3.1-1 4.3 3.3-3.4 5.4-7.9 5.4-12.9"/></svg>
+</span>
+<span class="control__info">subscribe</span>
+</a>
+</div>
+<div class="brand-bar__item has-popup popup-container brand-bar__item--has-control brand-bar__item--search" data-component="search">
+<a href="https://www.google.co.uk/advanced_search?q=site:www.theguardian.com" data-is-ajax data-link-name="Search icon" class="brand-bar__item--action popup__toggle js-search-toggle" data-toggle="popup--search" aria-haspopup="true">
+<span class="inline-search-36 inline-icon rounded-icon control__icon-wrapper">
+<svg width="18" height="18" viewBox="0 0 18 18"><path d="M6.5 1.6c2.7 0 4.9 2.2 4.9 4.9s-2.2 4.9-4.9 4.9-4.9-2.2-4.9-4.9 2.2-4.9 4.9-4.9m0-1.6C2.9 0 0 2.9 0 6.5S2.9 13 6.5 13 13 10.1 13 6.5 10.1 0 6.5 0zm6.6 11.5l4.9 4.9-1.6 1.6-4.9-4.9v-.8l.8-.8h.8z"/></svg>
+</span>
+<span class="control__info">search</span>
+</a>
+</div>
+<div class="brand-bar__item--right brand-bar__item--right--uk-edition" data-component="guardian-services">
+<div class="brand-bar__item brand-bar__item--jobs">
+<a class="brand-bar__item--action" data-link-name="uk : topNav : jobs" href="http://jobs.theguardian.com/?INTCMP=NGW_TOPNAV_UK_GU_JOBS">jobs</a>
+</div>
+<div class="brand-bar__item brand-bar__item--soulmates">
+<a class="brand-bar__item--action" data-link-name="uk : topNav : soulmates" href="https://soulmates.theguardian.com/?INTCMP=NGW_TOPNAV_UK_GU_SOULMATES">dating</a>
+</div>
+<div class="brand-bar__item has-popup brand-bar__item--has-control brand-bar__item--more">
+<a href="#guardian-services-top-menu" class="brand-bar__item--action popup__toggle" data-toggle="top-bar__popup--more" data-link-name="uk : topNav : more" aria-haspopup="true" aria-controls="guardian-services-top-menu">
+<span class="rounded-icon control__icon-wrapper">
+<!--[if (gt IE 8)&(IEMobile)]><!-->
+<span class="inline-ellipsis-36 inline-icon ">
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="18"><circle cx="3" cy="10" r="3" fill="#fff"/><circle cx="12" cy="10" r="3" fill="#fff"/><circle cx="21" cy="10" r="3" fill="#fff"/></svg>
+</span>
+<!--<![endif]-->
+</span>
+<span class="control__info" data-test-id="sign-in-name">more</span>
+</a>
+<div class="popup is-off top-bar__popup--more" id="guardian-services-top-menu">
+<h3 class="popup__group-header">from the guardian:</h3>
+<ul class="popup__group">
+<li class="popup__item brand-bar__popup--jobs">
+<a class="brand-bar__item--action" data-link-name="uk : topNav : jobs" href="http://jobs.theguardian.com/?INTCMP=NGW_TOPNAV_UK_GU_JOBS">jobs</a>
+</li>
+<li class="popup__item brand-bar__popup--soulmates">
+<a class="brand-bar__item--action" data-link-name="uk : topNav : soulmates" href="https://soulmates.theguardian.com/?INTCMP=NGW_TOPNAV_UK_GU_SOULMATES">dating</a>
+</li>
+<li class="popup__item">
+<a class="brand-bar__item--action" data-link-name="uk : topNav : masterclasses" href="http://www.theguardian.com/guardian-masterclasses?INTCMP=NGW_TOPNAV_UK_GU_MASTERCLASSES">masterclasses</a>
+</li>
+</ul>
+<div class="brand-bar__popup--membership">
+<h3 class="popup__group-header">join us:</h3>
+<ul class="popup__group">
+<li class="popup__item">
+<a class="brand-bar__item--action" data-link-name="uk : topNav : membership" href="https://membership.theguardian.com/?INTCMP=NGW_TOPNAV_UK_GU_MEMBERSHIP">membership</a>
+</li>
+<li class="popup__item">
+<a class="brand-bar__item--action" data-link-name="uk : topNav : subscribe" href="http://subscribe.theguardian.com/?INTCMP=NGW_TOPNAV_UK_GU_SUBSCRIBE">subscribe</a>
+</li>
+</ul>
+</div>
+<div class="brand-bar__popup--edition">
+<h3 class="popup__group-header">change edition:</h3>
+<ul class="popup__group">
+<li class="popup__item">
+<a class="brand-bar__item--action brand-bar__item--inline-action" data-edition="UK" data-link-name="switch to UK edition" href="http://www.theguardian.com/preference/edition/uk" title="Switch to the UK edition">
+<span class="u-h">switch to the </span> UK <span class="u-h"> edition</span>
+</a>
+<a class="brand-bar__item--action brand-bar__item--inline-action" data-edition="US" data-link-name="switch to US edition" href="http://www.theguardian.com/preference/edition/us" title="Switch to the US edition">
+<span class="u-h">switch to the </span> US <span class="u-h"> edition</span>
+</a>
+<a class="brand-bar__item--action brand-bar__item--inline-action" data-edition="AU" data-link-name="switch to AU edition" href="http://www.theguardian.com/preference/edition/au" title="Switch to the AU edition">
+<span class="u-h">switch to the </span> AU <span class="u-h"> edition</span>
+</a>
+</li>
+</ul>
+</div>
+</div>
+</div>
+<div class="brand-bar__item has-popup brand-bar__item--edition" data-component="edition">
+<a href="#guardian-edition-menu" class="brand-bar__item--action brand-bar__item--action-beta popup__toggle" data-link-name="topNav : edition" data-toggle="top-bar__popup--edition" aria-haspopup="true" aria-controls="guardian-edition-menu">
+<span class="brand-bar__edition-name">International</span>
+<small class="brand-bar__beta">beta</small>
+</a>
+<ul class="popup popup__group is-off top-bar__popup--edition" id="guardian-edition-menu">
+<li class="popup__item">
+<a class="brand-bar__item--action" data-edition="UK" data-link-name="switch to UK edition" href="http://www.theguardian.com/preference/edition/uk" title="Switch to the UK edition">
+<span class="u-h">switch to the </span>
+<span class="brand-bar__edition-name">UK edition</span>
+</a>
+</li>
+<li class="popup__item">
+<a class="brand-bar__item--action" data-edition="US" data-link-name="switch to US edition" href="http://www.theguardian.com/preference/edition/us" title="Switch to the US edition">
+<span class="u-h">switch to the </span>
+<span class="brand-bar__edition-name">US edition</span>
+</a>
+</li>
+<li class="popup__item">
+<a class="brand-bar__item--action" data-edition="AU" data-link-name="switch to AU edition" href="http://www.theguardian.com/preference/edition/au" title="Switch to the AU edition">
+<span class="u-h">switch to the </span>
+<span class="brand-bar__edition-name">Australia edition</span>
+</a>
+</li>
+</ul>
+</div>
+</div>
+</div>
+</div>
+<div class="popup popup--search is-off"><div class="js-search-placeholder"></div></div>
+<div class="l-header-main">
+<a href="http://github.com/international" data-link-name="site logo" id="logo" class="logo-wrapper" data-component="logo">
+<span class="u-h">The Guardian</span>
+<!--[if (gt IE 8)&(IEMobile)]><!-->
+<span class="inline-guardian-logo-320 inline-logo ">
+<svg width="320" height="60" viewBox="0 0 320 60"><path fill="#fff" d="M284 45h16v-3l-3-1.5v-20c1.2-.9 2.8-1.1 4.3-1.1 2.8 0 3.7.9 3.7 4.1v17l-3 1.5v3h16v-3l-3-1.5v-19c0-5.7-2.1-8.3-7.1-8.3-4.1 0-8.1 1.5-10.8 4V13h-1l-12.4 2.2v2.7l3.3 1.6v21l-3 1.5v3zM245.3.4c-3 0-5.4 2.4-5.4 5.5 0 3 2.4 5.4 5.4 5.4 2.9 0 5.4-2.4 5.4-5.4-.1-3.1-2.5-5.5-5.4-5.5zM237 15.1v2.8l3 1.6v20.9l-3 1.5V45h16v-3.1l-3-1.5V13.1h-1l-12 2zM223 39c-.7.6-1.7 1.1-3.2 1.1-4 0-5.9-3.3-5.9-10.9 0-8.7 2.4-11.6 5.6-11.6 1.8 0 2.8.6 3.5 1.4v20zm0-24.4c-1.2-.9-3.3-1.4-5-1.4-7.4 0-14.5 4.4-14.5 16.8 0 11.9 7.1 15.7 11.8 15.7 3.8 0 6.4-1.7 7.6-3.4h.3v3.3h.9l11.9-1.4v-2.3l-3-1.8V.6h-1l-12.6 2v2.8l3.6 1.5v7.7zM181 18l3 1.5v20.9l-3 1.5V45h17v-3.1l-4-1.5V24.1c1.8-1.4 4.1-1.9 6.8-1.9.9 0 1.6.2 2.2.3v-9c-.3-.1-.7-.2-1.2-.2-3.3 0-6 2.2-7.8 6.2V13h-1l-12 2v3zm-19.3-.8c3.9 0 5.3 2 5.3 5.9v3.5l-6.1 1.1c-5.9 1.1-10.4 2.9-10.4 9.3 0 5.1 3.5 8.7 8.3 8.7 3.8 0 7.4-1.7 8.7-4.4h.3c.5 3.3 3.3 4.4 6.4 4.4 2.4 0 4.8-.6 5.7-1.6v-2l-2.9-1.5v-18c0-7-5.2-9.4-13.3-9.4-5.3 0-8.6 1.3-11.4 2.6v7.8h4.7l2-6c1.1-.4 2.3-.4 2.7-.4zm2.3 22.9c-1.9 0-4-1.1-4-4.6 0-2.4 2.4-4.7 4.8-5l2.2-.5v8.5s-1.9 1.6-3 1.6zm100.7-22.9c3.9 0 5.3 2 5.3 5.9v3.5l-6.1 1.1c-5.9 1.1-10.4 2.9-10.4 9.3 0 5.1 3.5 8.7 8.3 8.7 3.8 0 7.4-1.7 8.7-4.4h.3c.5 3.3 3.3 4.4 6.4 4.4 2.4 0 4.8-.6 5.7-1.6v-2l-2.9-1.5v-18c0-7-5.2-9.4-13.3-9.4-5.3 0-8.6 1.3-11.4 2.6v7.8h4.7l2-6c1.1-.4 2.3-.4 2.7-.4zm2.3 22.9c-1.9 0-4-1.1-4-4.6 0-2.4 2.4-4.7 4.8-5l2.2-.5v8.5s-1.9 1.6-3 1.6zm-138.7 5.6c.4 0 .9 0 1.3-.1 3.5-.3 6.7-2 8.4-4.2v4.1l12-1.5v-2l-3-2V13h-1l-12 2.3V18l4 1.7V38c-1.1.8-2.4 1.3-4.2 1.3-2.5 0-4.8-.8-4.8-4.3V13h-1l-12 2.5v2.6l4 1.6V36c0 5.4 2.2 9.7 8.3 9.7zM96 38c-1.2 0-2.5-.8-2.5-1.9 0-.8.6-1.7 1.4-2.3 1.6.5 3 .6 5 .6 7.8 0 13.2-3.7 13.2-10.4 0-3-1.3-4.6-3.2-6.4L115 19v-6l-8.2 1.6c-1.9-.7-4.5-1.6-7-1.6-7.8 0-13.2 4.1-13.2 10.8 0 4.1 2 7.1 5 8.8l.2.2c-1.7 1.2-5.3 4-5.3 7.2 0 2.4 1.5 4.8 4.8 5.5-3.4.8-7.3 2.5-7.3 6.5 0 4.1 5.9 8 15.5 8 11.8 0 16.5-5.7 16.5-13 0-6.1-2.8-9-9.5-9H96zm7.5-14c0 5.7-1.3 6.5-3.5 6.5s-4-.8-4-6.5c0-5.8 1.8-7.5 4-7.5s3.5 2 3.5 7.5zM92 50.9c.1-1.5 1.1-3.4 3.7-3.6h8.6c2.5 0 3.7 2 3.7 3.6 0 3.2-2 4.4-8.3 4.4-5.5 0-7.8-2.2-7.7-4.4z"/><path fill="#AAD8F1" d="M83 30c0-13-5.1-16.9-13-16.9-9 0-15 6.2-15 16.4 0 10.5 5.5 16.2 15.8 16.2 5.6 0 9.7-2.7 11.2-4.7v-3c-2.1.7-3.9 1.2-7.7 1.2-5.6 0-9.3-3.2-9.3-9.2h18zM69.9 16.6c2.5 0 3.8 1.8 3.8 9.6l-8.4.7c.1-7.9 1.8-10.3 4.6-10.3zM37 45v-3l-3-1.5V21c1.2-.9 3.2-1.7 4.8-1.7 2.8 0 4.3 1.6 4.2 4.2v17L40 42v3h16v-3l-3-1.5v-19c0-5.7-3.3-8.3-7.7-8.3-4.1 0-8.6 1.3-11.3 3.8V0h-1L21 2v3l4 1.5v34L22 42v3h15zM4 36.4c0 5.7 2.8 9.3 8.9 9.3 3.1 0 6.2-.8 8.1-2.3v-3.8c-.8.3-1.9.5-2.9.5-2.9 0-4.1-1.6-4.1-4.6V19h7v-5h-7V6.5L4 8v6l-4 1v4h4v17.4z"/></svg>
+</span>
+<!--<![endif]-->
+<!--[if (lt IE 9)&(!IEMobile)]>
+                                <span class="inline-logo inline-guardian-logo-320"></span>
+                            <![endif]-->
+<span class="logo__tagline hide-on-mobile">Winner of the Pulitzer prize 2014</span>
+</a>
+</div>
+</div>
+<div class="navigation navigation--has-local-navigation navigation--has-signposting">
+<div class="gs-container">
+<div class="navigation__inner">
+<div class="navigation__scroll">
+<nav class="navigation__container navigation__container--first" data-component="nav" role="navigation" aria-label="Current section">
+<ul class="signposting">
+<li class="signposting__item signposting__item--home">
+<a class="signposting__action" href="http://github.com/international" data-link-name="nav : signposting : home">home</a>
+</li>
+<li class="signposting__item signposting__item--parent">
+<span class="signposting__separator" aria-hidden="true"><span class="signposting__separator__inner">›</span></span>
+<a class="signposting__action" href="http://www.theguardian.com/uk/culture" data-link-name="nav : signposting : culture">culture</a>
+</li>
+<li class="signposting__item signposting__item--current">
+<span class="signposting__separator"><span class="signposting__separator__inner">›</span></span>
+<a class="signposting__action" href="http://www.theguardian.com/film" data-link-name="nav : signposting : culture &gt; film">film</a>
+</li>
+</ul>
+<ul class="local-navigation">
+<li class="local-navigation__item">
+<a class="local-navigation__action" href="http://www.theguardian.com/tv-and-radio" data-link-name="nav : secondary : tv &amp; radio">tv & radio</a>
+</li>
+<li class="local-navigation__item">
+<a class="local-navigation__action" href="http://www.theguardian.com/music" data-link-name="nav : secondary : music">music</a>
+</li>
+<li class="local-navigation__item">
+<a class="local-navigation__action" href="http://www.theguardian.com/technology/games" data-link-name="nav : secondary : games">games</a>
+</li>
+<li class="local-navigation__item">
+<a class="local-navigation__action" href="http://www.theguardian.com/books" data-link-name="nav : secondary : books">books</a>
+</li>
+<li class="local-navigation__item">
+<a class="local-navigation__action" href="http://www.theguardian.com/artanddesign" data-link-name="nav : secondary : art &amp; design">art & design</a>
+</li>
+<li class="local-navigation__item">
+<a class="local-navigation__action" href="http://www.theguardian.com/stage" data-link-name="nav : secondary : stage">stage</a>
+</li>
+<li class="local-navigation__item">
+<a class="local-navigation__action" href="http://www.theguardian.com/music/classicalmusicandopera" data-link-name="nav : secondary : classical">classical</a>
+</li>
+</ul>
+</nav>
+<nav class="navigation__container navigation__container--second" data-component="nav" role="navigation" aria-label="Guardian sections">
+<ul class="top-navigation js-top-navigation">
+<li class="top-navigation__item top-navigation__item--home">
+<a href="http://github.com/international" class="top-navigation__action top-navigation__action--has-icon" data-link-name="nav : primary : home" title="Back to homepage">
+<span class="top-navigation__icon-wrapper">
+<span class="top-navigation__icon top-navigation__icon--home "></span>
+</span>
+<span class="u-h">home</span>
+</a>
+</li>
+<li class="top-navigation__item">
+<a class="top-navigation__action" href="http://www.theguardian.com/uk-news" data-link-name="nav : primary : UK">
+UK
+</a>
+</li>
+<li class="top-navigation__item">
+<a class="top-navigation__action" href="http://www.theguardian.com/politics/general-election-2015" data-link-name="nav : primary : election">
+election
+</a>
+</li>
+<li class="top-navigation__item">
+<a class="top-navigation__action" href="http://www.theguardian.com/world" data-link-name="nav : primary : world">
+world
+</a>
+</li>
+<li class="top-navigation__item">
+<a class="top-navigation__action" href="http://www.theguardian.com/uk/sport" data-link-name="nav : primary : sport">
+sport
+</a>
+</li>
+<li class="top-navigation__item">
+<a class="top-navigation__action" href="http://www.theguardian.com/football" data-link-name="nav : primary : football">
+football
+</a>
+</li>
+<li class="top-navigation__item">
+<a class="top-navigation__action" href="http://www.theguardian.com/uk/commentisfree" data-link-name="nav : primary : opinion">
+opinion
+</a>
+</li>
+<li class="top-navigation__item top-navigation__item--current">
+<a class="top-navigation__action" href="http://www.theguardian.com/uk/culture" data-link-name="nav : primary : culture">
+culture
+<span class="u-h">selected</span>
+</a>
+</li>
+<li class="top-navigation__item">
+<a class="top-navigation__action" href="http://www.theguardian.com/uk/business" data-link-name="nav : primary : business">
+business
+</a>
+</li>
+<li class="top-navigation__item">
+<a class="top-navigation__action" href="http://www.theguardian.com/lifeandstyle" data-link-name="nav : primary : lifestyle">
+lifestyle
+</a>
+</li>
+<li class="top-navigation__item">
+<a class="top-navigation__action" href="http://www.theguardian.com/fashion" data-link-name="nav : primary : fashion">
+fashion
+</a>
+</li>
+<li class="top-navigation__item">
+<a class="top-navigation__action" href="http://www.theguardian.com/uk/environment" data-link-name="nav : primary : environment">
+environment
+</a>
+</li>
+<li class="top-navigation__item">
+<a class="top-navigation__action" href="http://www.theguardian.com/uk/technology" data-link-name="nav : primary : tech">
+tech
+</a>
+</li>
+<li class="top-navigation__item">
+<a class="top-navigation__action" href="http://www.theguardian.com/travel" data-link-name="nav : primary : travel">
+travel
+</a>
+</li>
+</ul>
+</nav>
+</div>
+<a class="navigation-toggle js-navigation-toggle" href="#footer-nav" data-link-name="nav : allSections" data-target-nav="js-navigation-header">
+<i class="burger-icon"></i><span class="navigation-toggle-label navigation-toggle-label--open" aria-haspopup="true" aria-controls="all-sections-popup" aria-label="browse all sections"><span class="navigation-toggle-label__extra navigation-toggle-label__extra--browse">browse </span>all<span class="navigation-toggle-label__extra"> sections</span></span>
+<span class="navigation-toggle-label navigation-toggle-label--close" aria-label="close all sections">close</span>
+</a>
+</div>
+<div id="all-sections-popup" class="navigation__expandable js-mega-nav-placeholder" data-component="all-nav"></div>
+</div>
+</div>
+</div>
+</header>
+</div>
+<div class="top-banner-below-nav-mt-test"></div>
+<div class="top-banner-ad-container top-banner-ad-container--mobile">
+<div id="dfp-ad--top" class="js-ad-slot ad-slot ad-slot--dfp ad-slot--top ad-slot--top-banner-ad" data-link-name="ad slot top" data-test-id="ad-slot-top" data-name="top" data-label="false" data-mobile="1,1|300,50|320,50|88,70"> </div>
+</div>
+<div id="maincontent" tabindex="0"></div>
+<div class="js-breaking-news-placeholder breaking-news breaking-news--hidden breaking-news--fade-in" data-link-name="breaking news" data-component="breaking-news"></div>
+<div class="l-side-margins">
+<article id="article" data-test-id="article-root" class="content content--article tonal tonal--tone-news section-film
+        
+        " itemscope itemtype="http://schema.org/NewsArticle" role="main">
+<header class="content__head tonal__head tonal__head--tone-news
+    ">
+<div class="content__header tonal__header u-cf">
+<div class="gs-container">
+<div class="content__main-column">
+<div class="content__labels">
+<div class="content__section-label">
+<a class="content__section-label__link" data-link-name="article section" href="http://www.theguardian.com/film/sport">Sport</a>
+</div>
+<div class="content__series-label ">
+<a class="content__series-label__link" href="http://www.theguardian.com/film/series/reelhistory">Reel history</a>
+</div>
+</div>
+<h1 class="content__headline js-score" itemprop="headline">
+The Hurricane: the facts of Rubin Carter's life story are beaten to a pulp
+</h1>
+</div>
+</div>
+</div>
+<div class="tonal__standfirst u-cf">
+<div class="gs-container">
+<div class="content__main-column">
+<div class="content__standfirst" data-link-name="standfirst" data-component="standfirst">
+<meta itemprop="description" content="Denzel Washington&#x27;s compelling performance gives Norman Jewison&#x27;s biopic punch – despite its many inaccuracies, writes Alex von Tunzelmann"/>
+Denzel Washington's compelling performance gives Norman Jewison's biopic punch – despite its many inaccuracies
+</div>
+</div>
+</div>
+</div>
+</header>
+<div class="content__main tonal__main tonal__main--tone-news">
+<div class="gs-container">
+<div class="content__main-column content__main-column--article js-content-main-column ">
+<div class="js-football-tabs football-tabs content__mobile-full-width"></div>
+<figure itemprop="associatedMedia image" itemscope itemtype="http://schema.org/ImageObject" data-component="image" class="media-primary media-content">
+<a href="http://github.com/film/2014/apr/24/the-hurricane-rubin-carter-denzel-washington#img-1" class="article__img-container js-gallerythumbs" data-link-name="Launch Article Lightbox" data-is-ajax>
+<div class="">
+<img src="http://i.guim.co.uk/static/w-300/h--/q-95/sys-images/Arts/Arts_/Pictures/2014/4/23/1398270512680/The-Hurricane-004.jpg" srcset="http://github.com/service/http://i.guim.co.uk/static/w-620/h--/q-95/sys-images/Arts/Arts_/Pictures/2014/4/23/1398270516995/The-Hurricane-007.jpg%20620w , http://github.com/%20//i.guim.co.uk/static/w-700/h--/q-95/sys-images/Arts/Arts_/Pictures/2014/4/23/1398270518562/The-Hurricane-008.jpg%20700w , http://github.com/%20//i.guim.co.uk/static/w-645/h--/q-95/sys-images/Arts/Arts_/Pictures/2014/4/23/1398270518562/The-Hurricane-008.jpg%20645w , http://github.com/%20//i.guim.co.uk/static/w-465/h--/q-95/sys-images/Arts/Arts_/Pictures/2014/4/23/1398266760181/The-Hurricane-Boxing-Matc-002.jpg%20465w " sizes="(min-width: 980px) 620px, (min-width: 740px) 700px, (min-width: 660px) 620px, (min-width: 480px) 645px, 465px" class="maxed responsive-img" itemprop="contentUrl representativeOfPage" alt="The Hurricane"/>
+<span class="rounded-icon article__fullscreen"><i class="i i-expand-white"></i><i class="i i-expand-black"></i></span>
+</div>
+</a>
+<figcaption class="caption caption--main caption--img" itemprop="description">
+Packing punch … Denzel Washington in The Hurricane Photograph: Allstar/Cinetext Collection/Sportsphoto/Allstar/Cinetext Collection
+</figcaption>
+</figure>
+<div class="content__meta-container js-football-meta u-cf
+    
+    
+    
+    
+    
+    
+    ">
+<p class="byline" data-link-name="byline" data-component="meta-byline"><span itemscope="" itemtype="http://schema.org/Person" itemprop="author">
+<a rel="author" class="tone-colour" itemprop="url name" data-link-name="auto tag link" href="http://www.theguardian.com/profile/alexvontunzelmann">Alex von Tunzelmann</a></span></p>
+<p class="content__dateline" aria-hidden="true">
+<time itemprop="datePublished" datetime='2014-04-24T10:26:18+0100' data-timestamp="1398331578000" class="content__dateline-wpd js-wpd">
+Thursday 24 April 2014 <span class="content__dateline-time">10.26 BST</span>
+</time>
+<time itemprop="dateModified" datetime='2014-06-21T00:41:29+0100' data-timestamp="1403307689000" class="content__dateline-lm js-lm u-h">
+Last modified on Saturday 21 June 2014 <span class="content__dateline-time">00.41 BST</span>
+</time>
+</p>
+<div class="meta__extras">
+<div class="meta__social" data-component="share">
+<ul class="social social--top u-unstyled u-cf" data-component="social">
+<li class="social__item social__item--facebook" data-link-name="facebook">
+<a class="social__action social-icon-wrapper" data-link-name="social top" href="https://www.facebook.com/sharer/sharer.php?u=http%3A%2F%2Fgu.com%2Fp%2F3zjk4%2Fsfb&amp;ref=responsive" target="_blank" title="Facebook">
+<span class="u-h">Share on Facebook</span>
+<span class="rounded-icon social-icon social-icon--facebook">
+<i class="i-share-facebook--white i"></i>
+</span>
+</a>
+</li>
+<li class="social__item social__item--twitter" data-link-name="twitter">
+<a class="social__action social-icon-wrapper" data-link-name="social top" href="https://twitter.com/intent/tweet?text=The+Hurricane%3A+the+facts+of+Rubin+Carter%27s+life+story+are+beaten+to+a+pulp&amp;url=http%3A%2F%2Fgu.com%2Fp%2F3zjk4%2Fstw" target="_blank" title="Twitter">
+<span class="u-h">Share on Twitter</span>
+<span class="rounded-icon social-icon social-icon--twitter">
+<i class="i-share-twitter--white i"></i>
+</span>
+</a>
+</li>
+<li class="social__item social__item--email" data-link-name="email">
+<a class="social__action social-icon-wrapper" data-link-name="social top" href="/service/mailto:?subject=The%20Hurricane%3A%20the%20facts%20of%20Rubin%20Carter&#x27;s%20life%20story%20are%20beaten%20to%20a%20pulp&amp;body=http%3A%2F%2Fgu.com%2Fp%2F3zjk4%2Fsbl" target="_blank" title="Email">
+<span class="u-h">Share via Email</span>
+<span class="rounded-icon social-icon social-icon--email">
+<i class="i-share-email--white i"></i>
+</span>
+</a>
+</li>
+<li class="social__item social__item--linkedin" data-link-name="linkedin">
+<a class="social__action social-icon-wrapper" data-link-name="social top" href="http://www.linkedin.com/shareArticle?mini=true&amp;title=The+Hurricane%3A+the+facts+of+Rubin+Carter%27s+life+story+are+beaten+to+a+pulp&amp;url=http%3A%2F%2Fgu.com%2Fp%2F3zjk4" target="_blank" title="LinkedIn">
+<span class="u-h">Share on LinkedIn</span>
+<span class="rounded-icon social-icon social-icon--linkedin">
+<i class="i-share-linkedin--white i"></i>
+</span>
+</a>
+</li>
+<li class="social__item social__item--gplus" data-link-name="gplus">
+<a class="social__action social-icon-wrapper" data-link-name="social top" href="https://plus.google.com/share?url=http%3A%2F%2Fgu.com%2Fp%2F3zjk4%2Fsgp&amp;amp;hl=en-GB&amp;amp;wwc=1" target="_blank" title="Google plus">
+<span class="u-h">Share on Google+</span>
+<span class="rounded-icon social-icon social-icon--gplus">
+<i class="i-share-gplus--white i"></i>
+</span>
+</a>
+</li>
+<li class="social__item social__item--whatsapp" data-link-name="whatsapp">
+<a class="social__action social-icon-wrapper" data-link-name="social top" href="/service/whatsapp://send?text=%22The%20Hurricane%3A%20the%20facts%20of%20Rubin%20Carter&#x27;s%20life%20story%20are%20beaten%20to%20a%20pulp%22%20http%3A%2F%2Fgu.com%2Fp%2F3zjk4%2Fswa" target="_blank" title="WhatsApp">
+<span class="u-h">Share on WhatsApp</span>
+<span class="rounded-icon social-icon social-icon--whatsapp">
+<i class="i-share-whatsapp--white i"></i>
+</span>
+</a>
+</li>
+</ul>
+</div>
+<div class="meta__numbers modern-visible">
+<div class="u-h meta__number js-sharecount">
+</div>
+<div class="u-h meta__number" data-discussion-id="/p/3zjk4" data-commentcount-format="content" data-discussion-closed="false">
+</div>
+</div>
+<div class="meta__save-for-later" data-link-name="meta-save-for-later" data-component="meta-save-for-later">
+</div>
+</div>
+</div>
+<div class="content__article-body from-content-api js-article__body" itemprop="articleBody" data-test-id="article-review-body">
+<p><a href="http://www.theguardian.com/film/movie/81975/hurricane" title="" data-link-name="in body link" data-component="in-body-link" class=" u-underline">The Hurricane</a> (1999)<br/>Director: Norman Jewison<br/>Entertainment grade: B<br/>History grade: D–</p>
+<p><a href="http://www.theguardian.com/sport/2014/apr/20/rubin-hurricane-carter-boxer-dies-76" title="" data-link-name="in body link" data-component="in-body-link" class=" u-underline">Rubin &quot;Hurricane&quot; Carter</a>, who died this week, was a boxer in the United States. He was convicted of a 1966 triple homicide in two trials and became a cause celebre, inspiring Bob Dylan's song <a href="http://www.openculture.com/2014/04/bob-dylan-plays-first-live-performance-of-hurricane.html" title="" data-link-name="in body link" data-component="in-body-link" class=" u-underline">Hurricane</a>. The convictions were set aside by a federal court in 1985, on the grounds that they had been &quot;predicated upon an appeal to racism rather than reason&quot;.</p>
+<h2>Fictionalisation</h2>
+<body>
+<figure itemprop="associatedMedia image" itemscope="" itemtype="http://schema.org/ImageObject" class="element element-image img--landscape">
+<img srcset="http://github.com/service/http://i.guim.co.uk/static/w-620/h--/q-95/sys-images/Arts/Arts_/Pictures/2014/4/23/1398267192388/The-Hurricaine-prison-011.jpg%20620w , http://github.com/%20//i.guim.co.uk/static/w-605/h--/q-95/sys-images/Arts/Arts_/Pictures/2014/4/23/1398267192388/The-Hurricaine-prison-011.jpg%20605w , http://github.com/%20//i.guim.co.uk/static/w-445/h--/q-95/sys-images/Arts/Arts_/Pictures/2014/4/23/1398267192388/The-Hurricaine-prison-011.jpg%20445w " sizes="(min-width: 660px) 620px, (min-width: 480px) 605px, 445px" src="http://i.guim.co.uk/static/w-300/h--/q-95/sys-images/Arts/Arts_/Pictures/2014/4/23/1398267192388/The-Hurricaine-prison-011.jpg" alt="The Hurricaine prison" class="gu-image" itemprop="contentUrl"/>
+<figcaption itemprop="description" class="caption caption--img caption caption--img">
+High points … the film's most gripping scenes show Carter's time in prison. Allstar/UNIVERSAL PICTURES/Sportsphoto Ltd./Allstar
+</figcaption>
+</figure>
+</body>
+<p>A title card before the film admits that some characters have been composited or invented, and some incidents fictionalised. That's fair enough, of course – though viewers would do well to keep the disclaimer at the front of their minds throughout. The film's narrative skips back and forth, from Carter (Denzel Washington) protesting in prison in 1973, back to a boxing match in the 1960s (filmed in black and white, with a nod to <a href="http://[http://www.theguardian.com/film/filmblog/2013/may/09/raging-bull-reel-history-martin-scorsese" title="" data-link-name="in body link" data-component="in-body-link" class=" u-underline">Raging Bull</a>), forward to the night of the triple homicide at the Lafayette Bar &amp; Grill in Paterson, New Jersey, in 1966.</p>
+<h2>Record</h2>
+<p>Having established the crime, the film delves into Carter's youth. It is true that he ran away from a juvenile detention centre and joined the army, but in The Hurricane he appears to emerge from it with full honours. In real life, he underwent four court martials for various behavioural and discipline offences and was eventually discharged as &quot;unfit for military service&quot;. He was afterwards convicted of three muggings. Perhaps the film-makers felt that this background made Carter an unsympathetic character – but, in real life, the fact that Carter didn't get on with army authority and had a criminal record was part of his story. Nothing in his background made it any more acceptable that he was wrongfully convicted of three murders.</p>
+<h2>Sport</h2>
+<p>As an alternative narrative, the film chooses to establish Carter's alienation as a black man through a middleweight title fight in 1964. On screen, Carter clearly wins over defender Joey Giardello – but the white judges award the title to the white Giardello anyway. It's one of those incidents that the flimsy opening disclaimer is presumably supposed to cover. In real life, Carter boxed well for the first five rounds, but Giardello took control as the match went on and was awarded a unanimous victory by the judges. <a href="http://nypost.com/2014/04/20/boxer-rubin-hurricane-carter-dies-at-76/" title="" data-link-name="in body link" data-component="in-body-link" class=" u-underline">Carter agreed</a> that Giardello deserved his victory. So upset was Giardello by this inaccurate portrayal that he launched a <a href="http://www.theguardian.com/film/2000/feb/23/news1" title="" data-link-name="in body link" data-component="in-body-link" class=" u-underline">lawsuit</a> against the makers of the film. Reportedly, they settled out of court for an undisclosed sum.</p>
+<h2>Romance</h2>
+<body>
+<figure itemprop="associatedMedia image" itemscope="" itemtype="http://schema.org/ImageObject" class="element element-image img--landscape">
+<img srcset="http://github.com/service/http://i.guim.co.uk/static/w-620/h--/q-95/sys-images/Arts/Arts_/Pictures/2014/4/23/1398269449711/THE-HURRICANE-006.jpg%20620w , http://github.com/%20//i.guim.co.uk/static/w-605/h--/q-95/sys-images/Arts/Arts_/Pictures/2014/4/23/1398269449711/THE-HURRICANE-006.jpg%20605w , http://github.com/%20//i.guim.co.uk/static/w-445/h--/q-95/sys-images/Arts/Arts_/Pictures/2014/4/23/1398269449711/THE-HURRICANE-006.jpg%20445w " sizes="(min-width: 660px) 620px, (min-width: 480px) 605px, 445px" src="http://i.guim.co.uk/static/w-300/h--/q-95/sys-images/Arts/Arts_/Pictures/2014/4/23/1398269449711/THE-HURRICANE-006.jpg" alt="THE HURRICANE" class="gu-image" itemprop="contentUrl"/>
+<figcaption itemprop="description" class="caption caption--img caption caption--img">
+Loyal in love … Debbi Morgan as Carter's wife, Mae Thelma. Allstar/UNIVERSAL PICTURES/Sportsphoto Ltd./Allstar
+</figcaption>
+</figure>
+</body>
+<p>The most gripping parts of The Hurricane show Carter's time in prison. He decides he must give up wanting things, in order that his jailers cannot take anything away from him. At the height of his self-denial, his loyal, adoring wife Mae Thelma visits him. &quot;I want you to divorce me,&quot; he says. &quot;I'm dead. Just bury me. Please.&quot; It's a beautifully acted and affecting scene – but the truth was not quite so noble. <a href="http://www.theguardian.com/world/2014/apr/21/rubin-hurricane-carter" title="" data-link-name="in body link" data-component="in-body-link" class=" u-underline">Thelma divorced Carter</a> on the grounds of his repeated infidelities with supporters.</p>
+<h2>Justice</h2>
+<body>
+<figure itemprop="associatedMedia image" itemscope="" itemtype="http://schema.org/ImageObject" class="element element-image img--landscape">
+<img srcset="http://github.com/service/http://i.guim.co.uk/static/w-620/h--/q-95/sys-images/Arts/Arts_/Pictures/2014/4/23/1398268795613/THE-HURRICANE-VICELLOUS-R-006.jpg%20620w , http://github.com/%20//i.guim.co.uk/static/w-605/h--/q-95/sys-images/Arts/Arts_/Pictures/2014/4/23/1398268795613/THE-HURRICANE-VICELLOUS-R-006.jpg%20605w , http://github.com/%20//i.guim.co.uk/static/w-445/h--/q-95/sys-images/Arts/Arts_/Pictures/2014/4/23/1398268795613/THE-HURRICANE-VICELLOUS-R-006.jpg%20445w " sizes="(min-width: 660px) 620px, (min-width: 480px) 605px, 445px" src="http://i.guim.co.uk/static/w-300/h--/q-95/sys-images/Arts/Arts_/Pictures/2014/4/23/1398268795613/THE-HURRICANE-VICELLOUS-R-006.jpg" alt="THE HURRICANE VICELLOUS REON SHANNON" class="gu-image" itemprop="contentUrl"/>
+<figcaption itemprop="description" class="caption caption--img caption caption--img">
+A new chapter … Vicellous Reon Shannon (right) plays Lesra Martin, who is enchanted by Carter's autobiography Allstar/UNIVERSAL PICTURES/Sportsphoto Ltd./Allstar
+</figcaption>
+</figure>
+</body>
+<p> In Toronto in the 1980s, young Lesra Martin (Vicellous Reon Shannon) buys Carter's autobiography in a second-hand book sale and is enchanted. Martin, a black boy from Brooklyn, lives in a commune of Canadians who seem to be harmless, though even the film's best efforts can't prevent them from seeming a bit weird. The commune is run by three well-meaning white liberals, who set out to free Carter. The screenplay can't decide whether they're heroes or idiots, and makes a right old mess of the facts of the case while it tries to work that out. Fortunately, Washington's performance is so powerful, nuanced and intensely compelling that it carries the film to the finish line, making it watchable despite the growing heaps of inaccuracies. He lost the Oscar in 2000 to Kevin Spacey for American Beauty, but deservedly won a Golden Globe.</p>
+<h2>Verdict</h2>
+<p>The Hurricane goes 15 rounds with history and beats it to a pulp.</p>
+</div>
+<div class="submeta">
+<hr/>
+<div data-link-name="keywords" data-component="keywords">
+<h2 class="submeta__head">Topics</h2>
+<ul class="keyword-list inline-list">
+<li class="inline-list__item ">
+<a class="  button button--small button--tag button--secondary" href="http://www.theguardian.com/film/sport" data-link-name="keyword: film/sport" itemprop="keywords">
+Sport
+</a>
+</li>
+<li class="inline-list__item ">
+<a class="  button button--small button--tag button--secondary" href="http://www.theguardian.com/film/denzelwashington" data-link-name="keyword: film/denzelwashington" itemprop="keywords">
+Denzel Washington
+</a>
+</li>
+</ul>
+</div>
+<hr/>
+<div data-component="share">
+<ul class="social social--bottom u-unstyled u-cf" data-component="social">
+<li class="social__item social__item--facebook" data-link-name="facebook">
+<a class="social__action social-icon-wrapper" data-link-name="social bottom" href="https://www.facebook.com/sharer/sharer.php?u=http%3A%2F%2Fgu.com%2Fp%2F3zjk4%2Fsfb&amp;ref=responsive" target="_blank" title="Facebook">
+<span class="u-h">Share on Facebook</span>
+<span class="rounded-icon social-icon social-icon--facebook">
+<i class="i-share-facebook--white i"></i>
+</span>
+</a>
+</li>
+<li class="social__item social__item--twitter" data-link-name="twitter">
+<a class="social__action social-icon-wrapper" data-link-name="social bottom" href="https://twitter.com/intent/tweet?text=The+Hurricane%3A+the+facts+of+Rubin+Carter%27s+life+story+are+beaten+to+a+pulp&amp;url=http%3A%2F%2Fgu.com%2Fp%2F3zjk4%2Fstw" target="_blank" title="Twitter">
+<span class="u-h">Share on Twitter</span>
+<span class="rounded-icon social-icon social-icon--twitter">
+<i class="i-share-twitter--white i"></i>
+</span>
+</a>
+</li>
+<li class="social__item social__item--email" data-link-name="email">
+<a class="social__action social-icon-wrapper" data-link-name="social bottom" href="/service/mailto:?subject=The%20Hurricane%3A%20the%20facts%20of%20Rubin%20Carter&#x27;s%20life%20story%20are%20beaten%20to%20a%20pulp&amp;body=http%3A%2F%2Fgu.com%2Fp%2F3zjk4%2Fsbl" target="_blank" title="Email">
+<span class="u-h">Share via Email</span>
+<span class="rounded-icon social-icon social-icon--email">
+<i class="i-share-email--white i"></i>
+</span>
+</a>
+</li>
+<li class="social__item social__item--linkedin" data-link-name="linkedin">
+<a class="social__action social-icon-wrapper" data-link-name="social bottom" href="http://www.linkedin.com/shareArticle?mini=true&amp;title=The+Hurricane%3A+the+facts+of+Rubin+Carter%27s+life+story+are+beaten+to+a+pulp&amp;url=http%3A%2F%2Fgu.com%2Fp%2F3zjk4" target="_blank" title="LinkedIn">
+<span class="u-h">Share on LinkedIn</span>
+<span class="rounded-icon social-icon social-icon--linkedin">
+<i class="i-share-linkedin--white i"></i>
+</span>
+</a>
+</li>
+<li class="social__item social__item--gplus" data-link-name="gplus">
+<a class="social__action social-icon-wrapper" data-link-name="social bottom" href="https://plus.google.com/share?url=http%3A%2F%2Fgu.com%2Fp%2F3zjk4%2Fsgp&amp;amp;hl=en-GB&amp;amp;wwc=1" target="_blank" title="Google plus">
+<span class="u-h">Share on Google+</span>
+<span class="rounded-icon social-icon social-icon--gplus">
+<i class="i-share-gplus--white i"></i>
+</span>
+</a>
+</li>
+<li class="social__item social__item--whatsapp" data-link-name="whatsapp">
+<a class="social__action social-icon-wrapper" data-link-name="social bottom" href="/service/whatsapp://send?text=%22The%20Hurricane%3A%20the%20facts%20of%20Rubin%20Carter&#x27;s%20life%20story%20are%20beaten%20to%20a%20pulp%22%20http%3A%2F%2Fgu.com%2Fp%2F3zjk4%2Fswa" target="_blank" title="WhatsApp">
+<span class="u-h">Share on WhatsApp</span>
+<span class="rounded-icon social-icon social-icon--whatsapp">
+<i class="i-share-whatsapp--white i"></i>
+</span>
+</a>
+</li>
+</ul>
+</div>
+</div>
+<div class="after-article js-after-article"></div>
+</div>
+<div class="content__secondary-column js-secondary-column" aria-hidden="true">
+<div class="mpu-container js-mpu-ad-slot"></div>
+<div class="js-components-container"></div>
+</div>
+</div>
+</div>
+</article>
+<div class="content-footer ">
+<div id="comments" class="discussion discussion--not-staff discussion--loading js-comments
+        discussion--closed
+        u-cf" data-discussion-key="/p/3zjk4" data-discussion-closed="true" data-component="discussion">
+<div class="discussion__top-border gs-container">
+<div class="content__main-column">
+<div class="modern-hidden">
+<div class="discussion__heading">
+<div class="container__meta modern-hidden">
+<h2 class="container__meta__title">
+<a href="http://www.theguardian.com/discussion/p/3zjk4" data-link-name="View all comments">
+View all comments &gt;</a>
+</h2>
+</div>
+</div>
+</div>
+<div class="modern-visible">
+<div class="discussion__heading">
+<div class="container__meta">
+<h2 class="container__meta__title">comments <span class="discussion__comment-count js-discussion-comment-count"></span></h2>
+<p class="container__meta__item discussion__meta discussion__meta--open-signed-out"><a class="u-underline" href="https://profile.theguardian.com/signin">Sign in</a>
+or <a class="u-underline" href="https://profile.theguardian.com/register">create your Guardian account</a> to join the discussion.
+</p>
+<p class="container__meta__item discussion__meta discussion__meta--closed">This discussion is closed for comments.</p>
+<p class="container__meta__item discussion__meta discussion__meta--readonly">
+We’re doing some maintenance right now. You can still read comments, but please come back later to add your own.
+</p>
+<p class="container__meta__item d-discussion__error discussion__meta discussion__meta--banned">
+Commenting has been disabled for this account (<a href="http://github.com/community-faqs#321a">why?</a>)
+</p>
+</div>
+</div>
+<div class="discussion__comment-box js-discussion-comment-box--top"></div>
+<div class="discussion__top-comments js-discussion-top-comments"></div>
+<div class="discussion__toolbar js-discussion-toolbar u-cf">
+<div class="discussion__toolbar-dropdown js-comment-order-dropdown">
+<button class="u-button-reset popup__toggle" data-toggle="popup--comments-order" aria-haspopup="true" aria-controls="comments-order-popup">Order by <span class="js-comment-order"></span></button>
+<ul id="comments-order-popup" class="popup popup__group popup--comments-order is-off">
+<li class="popup__item">
+<button class="u-button-reset popup__action" data-order="newest" data-link-name="comments-newest">newest</button>
+</li>
+<li class="popup__item">
+<button class="u-button-reset popup__action" data-order="oldest" data-link-name="comments-oldest">oldest</button>
+</li>
+</ul>
+</div>
+<div class="discussion__toolbar-dropdown hide-until-tablet js-comment-pagesize-dropdown sign-in-required">
+<button class="u-button-reset popup__toggle" data-toggle="popup--comments-pagesize" aria-haspopup="true" aria-controls="comments-pagesize-popup">Show <span class="js-comment-pagesize">25</span></button>
+<ul id="comments-pagesize-popup" class="popup popup__group popup--comments-pagesize is-off">
+<li class="popup__item">
+<button class="u-button-reset popup__action" data-pagesize="25" data-link-name="comments-pagesize-25">25</button>
+</li>
+<li class="popup__item">
+<button class="u-button-reset popup__action" data-pagesize="50" data-link-name="comments-pagesize-50">50</button>
+</li>
+<li class="popup__item">
+<button class="u-button-reset popup__action" data-pagesize="100" data-link-name="comments-pagesize-100">100</button>
+</li>
+<li class="popup__item">
+<button class="u-button-reset popup__action" data-pagesize="All" data-link-name="comments-pagesize-All">All</button>
+</li>
+</ul>
+</div>
+<div class="discussion__toolbar-dropdown js-comment-threading-dropdown">
+<button class="u-button-reset popup__toggle" data-toggle="popup--comments-threading" aria-haspopup="true" aria-controls="comments-order-threading">Threads <span class="js-comment-threading"></span></button>
+<ul id="comments-order-threading" class="popup popup__group popup--comments-threading is-off">
+<li class="popup__item">
+<button class="u-button-reset popup__action" data-threading="collapsed" data-link-name="comments-threading-collapsed">collapsed</button>
+</li>
+<li class="popup__item">
+<button class="u-button-reset popup__action" data-threading="expanded" data-link-name="comments-threading-expanded">expanded</button>
+</li>
+<li class="popup__item">
+<button class="u-button-reset popup__action" data-threading="unthreaded" data-link-name="comments-threading-unthreaded">unthreaded</button>
+</li>
+</ul>
+</div>
+<div class="discussion__pagination discussion__pagination--top js-discussion-pagination"></div>
+</div>
+<div class="preload-msg discussion__loader">Loading comments… <a href=/discussion/p/3zjk4 class="accessible-link">Trouble loading?</a><div class="is-updating"></div></div>
+<div class="discussion__main-comments js-discussion-main-comments"></div>
+<div class="discussion__comment-box discussion__comment-box--bottom js-discussion-comment-box--bottom"></div>
+<button class="discussion__show-button button--show-more button button--large button--primary js-discussion-show-button" data-link-name="more-comments">
+<i class="i i-plus-white"></i>
+View more comments
+</button>
+<script type="text/template" id="tmpl-comment-box">
+                        
+<form class="component js-comment-box d-comment-box">
+
+    <div class="d-comment-box__meta">
+        <span class="d-comment-box__avatar-wrapper"></span>
+        <div class="d-comment-box__meta-text">
+            <span class="d-comment-box__author-label">Signed in as</span>
+            <span class="d-comment-box__author"></span>
+            <span class="i i-reply-grey"></span>
+            <span class="d-comment-box__reply-to-author"></span>
+            <span class="u-fauxlink d-comment-box__show-parent" role="button">Show comment</span>
+            <span class="u-fauxlink d-comment-box__hide-parent" role="button">Hide comment</span>
+        </div>
+    </div>
+    <div class="d-comment-box__parent-comment-wrapper">
+        <div class="d-comment-box__parent-comment-spout"></div>
+        <div class="d-comment-box__parent-comment">
+            <span class="d-comment-box__parent-comment-author"></span>
+            <div class="d-comment-box__parent-comment-body"></div>
+            <span class="u-fauxlink d-comment-box__hide-parent" role="button">Hide comment</span>
+        </div>
+    </div>
+
+    <div class="d-comment-box__content">
+        <div class="d-comment-box__messages"></div>
+        <div class="d-discussion__error d-comment-box__premod">
+            <i class="i i-status-alert"></i>
+            <span class="d-discussion__error-text">Your comments are currently being pre-moderated (<a href="http://github.com/community-faqs#311" target="_blank">why?</a>)</span>
+        </div>
+        <textarea name="body" class="textarea d-comment-box__body" placeholder="Join the discussion…"></textarea>
+        <button type="submit" class="u-button-reset button button--large button--primary submit-input d-comment-box__submit">Post your comment</button>
+        <span class="u-fauxlink d-comment-box__preview" role="button">Preview</span>
+        <span class="u-fauxlink d-comment-box__hide-preview" role="button">Hide preview</span>
+        <span class="u-fauxlink d-comment-box__cancel" role="button">Cancel</span>
+        <ul class="d-comment-box__formatting-controls">
+            <li class="d-comment-box__formatting-bold" title="Bold">B</li>
+            <li class="d-comment-box__formatting-italic" title="Italic">i</li>
+            <li class="d-comment-box__formatting-quote" title="Quote">&#8221;</li>
+            <li class="d-comment-box__formatting-link" title="Link">Link</li>
+        </ul>
+        <div class="d-comment-box__preview-wrapper">
+            <div class="d-comment-box__preview-spout"></div>
+            <div class="d-comment-box__preview-block">
+                <div class="d-comment-box__preview-body"></div>
+            </div>
+        </div>
+    </div>
+</form>
+                        </script>
+</div>
+</div>
+</div>
+</div>
+<div class="fc-container fc-container--commercial-high">
+<div id="dfp-ad--merchandising-high" class="js-ad-slot ad-slot ad-slot--dfp ad-slot--merchandising-high ad-slot--commercial-component-high" data-link-name="ad slot merchandising-high" data-test-id="ad-slot-merchandising-high" data-name="merchandising-high" data-label="false" data-refresh="false" data-mobile="1,1|88,87"> </div>
+</div>
+<aside class="related js-related hide-on-childrens-books-site" role="complementary" data-test-id="related-content">
+</aside>
+<aside class="onward js-onward facia-container facia-container--layout-content tone-news" role="complementary"></aside>
+<div class="js-repositioned-comments content__repositioned-comments"></div>
+<section class="fc-container fc-container--has-toggle" data-link-name="most-popular" data-component="most-popular">
+<div class="fc-container__inner">
+<div class="fc-container__header js-container__header">
+<h2 class="fc-container__header__title">
+<a href="http://www.theguardian.com/most-read/film" data-link-name="Most viewed film">popular</a>
+</h2>
+</div>
+<div class="fc-container__body fc-container--rolled-up-hide js-popular-trails">
+</div>
+</div>
+</section>
+<div class="fc-container fc-container--outbrain hide-on-childrens-books-site">
+<div class="fc-container__inner" data-component="outbrain" data-link-name="outbrain">
+<div class="OUTBRAIN" data-src="http://github.com/DROP_PERMALINK_HERE" data-ob-template="guardian"></div>
+</div>
+</div>
+<div class="fc-container fc-container--commercial">
+<div id="dfp-ad--merchandising" class="js-ad-slot ad-slot ad-slot--dfp ad-slot--merchandising ad-slot--commercial-component" data-link-name="ad slot merchandising" data-test-id="ad-slot-merchandising" data-name="merchandising" data-label="false" data-refresh="false" data-mobile="1,1|88,88"> </div>
+</div>
+</div>
+</div>
+<footer class="l-footer u-cf" data-link-name="footer" data-component="footer">
+<div class="l-footer__primary">
+<div id="footer-nav" class="gs-container">
+<div class="brand-bar u-cf">
+<a href="http://github.com/international" data-link-name="site logo" class="guardian-logo-footer hide-on-mobile">
+<span class="u-h">The Guardian</span>
+<i class="i i-guardian-logo-160"></i>
+</a>
+<a class="brand-bar__item brand-bar__item--action" data-link-name="back to top" href="#top">
+<span class="rounded-icon control__icon-wrapper">
+<span class="inline-arrow-up inline-icon ">
+<svg width="24" height="18" viewBox="0 0 24 18"><path d="M.4 15.3l10.5-8.4L12 6l1.1.9 10.5 8.4-.5.7L12 9.7.9 16l-.5-.7z"/></svg>
+</span>
+</span>
+<span class="control__info">back to top</span>
+</a>
+</div>
+<div class="l-footer__navigation-wrapper u-cf">
+<div class="js-navigation-footer navigation-container navigation-container--collapsed">
+<div class="gs-container navigation">
+<div class="navigation__inner" aria-hidden="true">
+<div class="navigation__scroll">
+<div class="navigation__container navigation__container--second" data-component="footer-nav">
+<ul class="top-navigation js-top-navigation">
+<li class="top-navigation__item top-navigation__item--home">
+<a href="http://github.com/international" class="top-navigation__action top-navigation__action--has-icon" data-link-name="nav : primary : home" title="Back to homepage">
+<span class="top-navigation__icon-wrapper">
+<span class="top-navigation__icon top-navigation__icon--home "></span>
+</span>
+<span class="u-h">home</span>
+</a>
+</li>
+<li class="top-navigation__item">
+<a class="top-navigation__action" href="http://www.theguardian.com/uk-news" data-link-name="nav : primary : UK">
+UK
+</a>
+</li>
+<li class="top-navigation__item">
+<a class="top-navigation__action" href="http://www.theguardian.com/politics/general-election-2015" data-link-name="nav : primary : election">
+election
+</a>
+</li>
+<li class="top-navigation__item">
+<a class="top-navigation__action" href="http://www.theguardian.com/world" data-link-name="nav : primary : world">
+world
+</a>
+</li>
+<li class="top-navigation__item">
+<a class="top-navigation__action" href="http://www.theguardian.com/uk/sport" data-link-name="nav : primary : sport">
+sport
+</a>
+</li>
+<li class="top-navigation__item">
+<a class="top-navigation__action" href="http://www.theguardian.com/football" data-link-name="nav : primary : football">
+football
+</a>
+</li>
+<li class="top-navigation__item">
+<a class="top-navigation__action" href="http://www.theguardian.com/uk/commentisfree" data-link-name="nav : primary : opinion">
+opinion
+</a>
+</li>
+<li class="top-navigation__item top-navigation__item--current">
+<a class="top-navigation__action" href="http://www.theguardian.com/uk/culture" data-link-name="nav : primary : culture">
+culture
+<span class="u-h">selected</span>
+</a>
+</li>
+<li class="top-navigation__item">
+<a class="top-navigation__action" href="http://www.theguardian.com/uk/business" data-link-name="nav : primary : business">
+business
+</a>
+</li>
+<li class="top-navigation__item">
+<a class="top-navigation__action" href="http://www.theguardian.com/lifeandstyle" data-link-name="nav : primary : lifestyle">
+lifestyle
+</a>
+</li>
+<li class="top-navigation__item">
+<a class="top-navigation__action" href="http://www.theguardian.com/fashion" data-link-name="nav : primary : fashion">
+fashion
+</a>
+</li>
+<li class="top-navigation__item">
+<a class="top-navigation__action" href="http://www.theguardian.com/uk/environment" data-link-name="nav : primary : environment">
+environment
+</a>
+</li>
+<li class="top-navigation__item">
+<a class="top-navigation__action" href="http://www.theguardian.com/uk/technology" data-link-name="nav : primary : tech">
+tech
+</a>
+</li>
+<li class="top-navigation__item">
+<a class="top-navigation__action" href="http://www.theguardian.com/travel" data-link-name="nav : primary : travel">
+travel
+</a>
+</li>
+</ul>
+</div>
+</div>
+<a class="navigation-toggle js-navigation-toggle" href="#footer-nav" data-link-name="nav : allSections" data-target-nav="js-navigation-footer">
+<i class="burger-icon"></i>
+<span class="navigation-toggle-label navigation-toggle-label--open" aria-haspopup="true" aria-controls="all-sections-popup" aria-label="view all sections">all<span class="navigation-toggle-label__extra"> sections</span></span>
+<span class="navigation-toggle-label navigation-toggle-label--close" aria-label="close all sections">close</span>
+</a>
+</div>
+<div class="js-mega-nav navigation__expandable" data-component="all-footer-nav" data-link-name="global navigation: footer : sections">
+<nav role="navigation" aria-label="All sections">
+<ul class="global-navigation js-global-navigation">
+<li class="global-navigation__section global-navigation__section--home">
+<a class="global-navigation__title" href="http://github.com/international" data-link-name="nav : globalTop : home">
+home
+</a>
+</li>
+<li class="global-navigation__section">
+<a class="global-navigation__title" href="http://www.theguardian.com/uk-news" data-link-name="nav : globalTop : UK">
+UK
+</a>
+<ul class="global-navigation__children">
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/politics" data-link-name="nav : globalSub : politics">
+politics
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/education" data-link-name="nav : globalSub : education">
+education
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/uk/media" data-link-name="nav : globalSub : media">
+media
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/society" data-link-name="nav : globalSub : society">
+society
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/law" data-link-name="nav : globalSub : law">
+law
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/uk/scotland" data-link-name="nav : globalSub : scotland">
+scotland
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/uk/wales" data-link-name="nav : globalSub : wales">
+wales
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/uk/northernireland" data-link-name="nav : globalSub : northern ireland">
+northern ireland
+</a>
+</li>
+</ul>
+</li>
+<li class="global-navigation__section">
+<a class="global-navigation__title" href="http://www.theguardian.com/politics/general-election-2015" data-link-name="nav : globalTop : election">
+election
+</a>
+</li>
+<li class="global-navigation__section">
+<a class="global-navigation__title" href="http://www.theguardian.com/world" data-link-name="nav : globalTop : world">
+world
+</a>
+<ul class="global-navigation__children">
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/world/europe-news" data-link-name="nav : globalSub : europe">
+europe
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/us-news" data-link-name="nav : globalSub : US">
+US
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/world/americas" data-link-name="nav : globalSub : americas">
+americas
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/world/asia" data-link-name="nav : globalSub : asia">
+asia
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/australia-news" data-link-name="nav : globalSub : australia">
+australia
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/world/africa" data-link-name="nav : globalSub : africa">
+africa
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/world/middleeast" data-link-name="nav : globalSub : middle east">
+middle east
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/cities" data-link-name="nav : globalSub : cities">
+cities
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/global-development" data-link-name="nav : globalSub : development">
+development
+</a>
+</li>
+</ul>
+</li>
+<li class="global-navigation__section">
+<a class="global-navigation__title" href="http://www.theguardian.com/uk/sport" data-link-name="nav : globalTop : sport">
+sport
+</a>
+<ul class="global-navigation__children">
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/football" data-link-name="nav : globalSub : football">
+football
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/sport/cricket" data-link-name="nav : globalSub : cricket">
+cricket
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/sport/rugby-union" data-link-name="nav : globalSub : rugby union">
+rugby union
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/sport/formulaone" data-link-name="nav : globalSub : F1">
+F1
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/sport/tennis" data-link-name="nav : globalSub : tennis">
+tennis
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/sport/golf" data-link-name="nav : globalSub : golf">
+golf
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/sport/cycling" data-link-name="nav : globalSub : cycling">
+cycling
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/sport/boxing" data-link-name="nav : globalSub : boxing">
+boxing
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/sport/horse-racing" data-link-name="nav : globalSub : racing">
+racing
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/sport/rugbyleague" data-link-name="nav : globalSub : rugby league">
+rugby league
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/sport/us-sport" data-link-name="nav : globalSub : US sports">
+US sports
+</a>
+</li>
+</ul>
+</li>
+<li class="global-navigation__section">
+<a class="global-navigation__title" href="http://www.theguardian.com/football" data-link-name="nav : globalTop : football">
+football
+</a>
+<ul class="global-navigation__children">
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/football/live" data-link-name="nav : globalSub : live scores">
+live scores
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/football/tables" data-link-name="nav : globalSub : tables">
+tables
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/football/competitions" data-link-name="nav : globalSub : competitions">
+competitions
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/football/results" data-link-name="nav : globalSub : results">
+results
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/football/fixtures" data-link-name="nav : globalSub : fixtures">
+fixtures
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/football/teams" data-link-name="nav : globalSub : clubs">
+clubs
+</a>
+</li>
+</ul>
+</li>
+<li class="global-navigation__section">
+<a class="global-navigation__title" href="http://www.theguardian.com/uk/commentisfree" data-link-name="nav : globalTop : opinion">
+opinion
+</a>
+<ul class="global-navigation__children">
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/index/contributors" data-link-name="nav : globalSub : columnists">
+columnists
+</a>
+</li>
+</ul>
+</li>
+<li class="global-navigation__section global-navigation__section--current">
+<a class="global-navigation__title" href="http://www.theguardian.com/uk/culture" data-link-name="nav : globalTop : culture">
+culture
+<span class="u-h">selected</span>
+</a>
+<ul class="global-navigation__children">
+<li class="global-navigation__child global-navigation__child--current">
+<a class="global-navigation__action" href="http://www.theguardian.com/film" data-link-name="nav : globalSub : film">
+film
+<span class="u-h">selected</span>
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/tv-and-radio" data-link-name="nav : globalSub : tv &amp; radio">
+tv & radio
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/music" data-link-name="nav : globalSub : music">
+music
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/technology/games" data-link-name="nav : globalSub : games">
+games
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/books" data-link-name="nav : globalSub : books">
+books
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/artanddesign" data-link-name="nav : globalSub : art &amp; design">
+art & design
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/stage" data-link-name="nav : globalSub : stage">
+stage
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/music/classicalmusicandopera" data-link-name="nav : globalSub : classical">
+classical
+</a>
+</li>
+</ul>
+</li>
+<li class="global-navigation__section">
+<a class="global-navigation__title" href="http://www.theguardian.com/uk/business" data-link-name="nav : globalTop : business">
+business
+</a>
+<ul class="global-navigation__children">
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/business/economics" data-link-name="nav : globalSub : economics">
+economics
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/business/banking" data-link-name="nav : globalSub : banking">
+banking
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/business/retail" data-link-name="nav : globalSub : retail">
+retail
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/business/stock-markets" data-link-name="nav : globalSub : markets">
+markets
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/business/eurozone" data-link-name="nav : globalSub : eurozone">
+eurozone
+</a>
+</li>
+</ul>
+</li>
+<li class="global-navigation__section">
+<a class="global-navigation__title" href="http://www.theguardian.com/lifeandstyle" data-link-name="nav : globalTop : lifestyle">
+lifestyle
+</a>
+<ul class="global-navigation__children">
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/lifeandstyle/food-and-drink" data-link-name="nav : globalSub : food">
+food
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/lifeandstyle/health-and-wellbeing" data-link-name="nav : globalSub : health &amp; fitness">
+health & fitness
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/lifeandstyle/love-and-sex" data-link-name="nav : globalSub : love &amp; sex">
+love & sex
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/lifeandstyle/family" data-link-name="nav : globalSub : family">
+family
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/lifeandstyle/women" data-link-name="nav : globalSub : women">
+women
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/lifeandstyle/home-and-garden" data-link-name="nav : globalSub : home &amp; garden">
+home & garden
+</a>
+</li>
+</ul>
+</li>
+<li class="global-navigation__section">
+<a class="global-navigation__title" href="http://www.theguardian.com/fashion" data-link-name="nav : globalTop : fashion">
+fashion
+</a>
+</li>
+<li class="global-navigation__section">
+<a class="global-navigation__title" href="http://www.theguardian.com/uk/environment" data-link-name="nav : globalTop : environment">
+environment
+</a>
+<ul class="global-navigation__children">
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/environment/climate-change" data-link-name="nav : globalSub : climate change">
+climate change
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/environment/wildlife" data-link-name="nav : globalSub : wildlife">
+wildlife
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/environment/energy" data-link-name="nav : globalSub : energy">
+energy
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/environment/pollution" data-link-name="nav : globalSub : pollution">
+pollution
+</a>
+</li>
+</ul>
+</li>
+<li class="global-navigation__section">
+<a class="global-navigation__title" href="http://www.theguardian.com/uk/technology" data-link-name="nav : globalTop : tech">
+tech
+</a>
+</li>
+<li class="global-navigation__section">
+<a class="global-navigation__title" href="http://www.theguardian.com/travel" data-link-name="nav : globalTop : travel">
+travel
+</a>
+<ul class="global-navigation__children">
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/travel/uk" data-link-name="nav : globalSub : UK">
+UK
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/travel/europe" data-link-name="nav : globalSub : europe">
+europe
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/travel/usa" data-link-name="nav : globalSub : US">
+US
+</a>
+</li>
+</ul>
+</li>
+<li class="global-navigation__section">
+<a class="global-navigation__title" href="http://www.theguardian.com/uk/money" data-link-name="nav : globalTop : money">
+money
+</a>
+<ul class="global-navigation__children">
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/money/property" data-link-name="nav : globalSub : property">
+property
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/money/savings" data-link-name="nav : globalSub : savings">
+savings
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/money/pensions" data-link-name="nav : globalSub : pensions">
+pensions
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/money/debt" data-link-name="nav : globalSub : borrowing">
+borrowing
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/money/work-and-careers" data-link-name="nav : globalSub : careers">
+careers
+</a>
+</li>
+</ul>
+</li>
+<li class="global-navigation__section">
+<a class="global-navigation__title" href="http://www.theguardian.com/science" data-link-name="nav : globalTop : science">
+science
+</a>
+</li>
+<li class="global-navigation__section">
+<a class="global-navigation__title" href="http://www.theguardian.com/guardian-professional" data-link-name="nav : globalTop : professional networks">
+professional networks
+</a>
+</li>
+<li class="global-navigation__section">
+<a class="global-navigation__title" href="http://www.theguardian.com/observer" data-link-name="nav : globalTop : the observer">
+the observer
+</a>
+</li>
+<li class="global-navigation__section">
+<a class="global-navigation__title" href="http://www.theguardian.com/theguardian" data-link-name="nav : globalTop : today&#x27;s paper">
+today's paper
+</a>
+<ul class="global-navigation__children">
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/theguardian/mainsection/editorialsandreply" data-link-name="nav : globalSub : editorials and letters">
+editorials and letters
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/tone/obituaries" data-link-name="nav : globalSub : obituaries">
+obituaries
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/theguardian/g2" data-link-name="nav : globalSub : g2">
+g2
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/theguardian/weekend" data-link-name="nav : globalSub : weekend">
+weekend
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/theguardian/theguide" data-link-name="nav : globalSub : the guide">
+the guide
+</a>
+</li>
+<li class="global-navigation__child">
+<a class="global-navigation__action" href="http://www.theguardian.com/theguardian/guardianreview" data-link-name="nav : globalSub : saturday review">
+saturday review
+</a>
+</li>
+</ul>
+</li>
+<li class="global-navigation__section">
+<a class="global-navigation__title" href="http://www.theguardian.com/membership" data-link-name="nav : globalTop : membership">
+membership
+</a>
+</li>
+<li class="global-navigation__section">
+<a class="global-navigation__title" href="http://www.theguardian.com/crosswords" data-link-name="nav : globalTop : crosswords">
+crosswords
+</a>
+</li>
+<li class="global-navigation__section">
+<a class="global-navigation__title" href="http://www.theguardian.com/video" data-link-name="nav : globalTop : video">
+video
+</a>
+</li>
+</ul>
+</nav>
+</div>
+</div>
+</div>
+</div>
+<ul class="breadcrumb signposting">
+<li class="signposting__item signposting__item--parent">
+<div itemscope itemtype="http://data-vocabulary.org/Breadcrumb">
+<a itemprop="url" href="http://www.theguardian.com/uk/culture" data-link-name="/culture" class="signposting__action"><span itemprop="title">Culture</span></a>
+</div>
+</li>
+<li class="signposting__item signposting__item--parent">
+<div itemscope itemtype="http://data-vocabulary.org/Breadcrumb">
+<span class="signposting__separator" aria-hidden="true"><span class="signposting__separator__inner">›</span></span>
+<a itemprop="url" href="http://www.theguardian.com/film" data-link-name="/film" class="signposting__action"><span itemprop="title">Film</span></a>
+</div>
+</li>
+<li class="signposting__item signposting__item--parent">
+<div itemscope itemtype="http://data-vocabulary.org/Breadcrumb">
+<span class="signposting__separator" aria-hidden="true"><span class="signposting__separator__inner">›</span></span>
+<a itemprop="url" href="http://www.theguardian.com/film/sport" data-link-name="/film/sport" class="signposting__action"><span itemprop="title">Sport</span></a>
+</div>
+</li>
+</ul>
+</div>
+</div>
+<div class="l-footer__secondary gs-container" role="contentinfo">
+<ul class="colophon u-cf">
+<li class="colophon__item"><a data-link-name="uk : footer : membership" href="https://membership.theguardian.com/?INTCMP=NGW_FOOTER_UK_GU_MEMBERSHIP">
+membership</a></li>
+<li class="colophon__item"><a data-link-name="uk : footer : jobs" href="http://jobs.theguardian.com/?INTCMP=NGW_FOOTER_UK_GU_JOBS">
+jobs</a></li>
+<li class="colophon__item"><a data-link-name="uk : footer : soulmates" href="https://soulmates.theguardian.com/?INTCMP=NGW_FOOTER_UK_GU_SOULMATES">
+dating</a></li>
+<li class="colophon__item"><a data-link-name="uk : footer : masterclasses" href="http://www.theguardian.com/guardian-masterclasses?INTCMP=NGW_FOOTER_UK_GU_MASTERCLASSES">
+masterclasses</a></li>
+<li class="colophon__item"><a data-link-name="uk : footer : subscribe" href="http://subscribe.theguardian.com/?INTCMP=NGW_FOOTER_UK_GU_SUBSCRIBE">
+subscribe</a></li>
+<li class="colophon__item"><a data-link-name="all topics" href="http://www.theguardian.com/index/subjects/a">all topics</a></li>
+<li class="colophon__item"><a data-link-name="all contributors" href="http://www.theguardian.com/index/contributors">all contributors</a></li>
+<li class="colophon__item"><a data-link-name="uk : footer : about us" href="http://www.theguardian.com/info">
+about us</a></li>
+<li class="colophon__item"><a data-link-name="uk : footer : contact us" href="http://www.theguardian.com/help/contact-us">
+contact us</a></li>
+<li class="colophon__item">
+<form class="js-tech-feedback" action="http://beacon.guim.co.uk/tech-feedback?uri=http://www.theguardian.com/film/2014/apr/24/the-hurricane-rubin-carter-denzel-washington" method="post">
+<input data-link-name="tech feedback" type="submit" value="report technical issue">
+</form>
+</li>
+<li class="colophon__item"><a data-link-name="complaints" href="http://www.theguardian.com/info/complaints-and-corrections">
+complaints &amp; corrections</a></li>
+<li class="colophon__item"><a data-link-name="terms" href="http://www.theguardian.com/help/terms-of-service">terms &amp; conditions</a></li>
+<li class="colophon__item"><a data-link-name="privacy" href="http://www.theguardian.com/info/privacy">privacy policy</a></li>
+<li class="colophon__item"><a data-link-name="cookie" href="http://www.theguardian.com/info/cookies">cookie policy</a></li>
+<li class="colophon__item"><a data-link-name="securedrop" href="https://securedrop.theguardian.com/">securedrop</a></li>
+</ul>
+<div class="l-footer__misc">
+<div class="really-serious-copyright">© 2015 Guardian News and Media Limited or its affiliated companies. All rights reserved.</div>
+</div>
+</div>
+</footer>
+<noscript id="omnitureNoScript">
+<div>
+<img id="omnitureNoScriptImage" alt="" src="http://hits.theguardian.com/b/ss/guardiangu-frontend,guardiangu-network/1/H.25.3/?c3=theguardian.com&c4=Sport%2CFilm%2CDenzel+Washington&v19=frontend&cdp=2&c11=film&c8=2081110&v7=GFE%3Afilm%3AArticle%3Athe-hurricane-rubin-carter-denzel-washington&event=&ns=guardian&c19=frontend&v23=&c56=No+Javascript&c67=nextgenServed&v8=2081110&c9=Article&ch=film&c30=content&c6=Alex+von+Tunzelmann&e27=&g=www.theguardian.com%2Ffilm%2F2014%2Fapr%2F24%2Fthe-hurricane-rubin-carter-denzel-washington&c13=Reel+history&pageName=GFE%3Afilm%3AArticle%3Athe-hurricane-rubin-carter-denzel-washington&c14=6562&c10=&c25=" width="1" height="1" class="u-h"/>
+</div>
+</noscript>
+<script>if(!guardian.isModernBrowser){var analyticsImage=document.createElement("img");analyticsImage.src="http://hits.theguardian.com/b/ss/guardiangu-frontend,guardiangu-network/1/H.25.3/?c3=theguardian.com&c4=Sport%2CFilm%2CDenzel+Washington&v19=frontend&cdp=2&c11=film&c8=2081110&v7=GFE%3Afilm%3AArticle%3Athe-hurricane-rubin-carter-denzel-washington&event=&ns=guardian&c19=frontend&v23=&c56=Partial+Javascript&c67=nextgenServed&v8=2081110&c9=Article&ch=film&c30=content&c6=Alex+von+Tunzelmann&e27=&g=www.theguardian.com%2Ffilm%2F2014%2Fapr%2F24%2Fthe-hurricane-rubin-carter-denzel-washington&c13=Reel+history&pageName=GFE%3Afilm%3AArticle%3Athe-hurricane-rubin-carter-denzel-washington&c14=6562&c10=&c25=";analyticsImage.width="1";analyticsImage.height="1";document.body.appendChild(analyticsImage);var img=new Image();img.src="http://beacon.guim.co.uk/count/pva.gif";var s=document.createElement('script'),sc=document.getElementsByTagName('script')[0];s.src='http://assets.guim.co.uk/javascripts/bootstraps/b8bebbfd475d40dfd6b7dcfb23e8f546/ophan.js';s.aysnc=true;sc.parentNode.insertBefore(s,sc);}</script>
+<noscript>
+<div style="display:inline;">
+<img height="1" width="1" style="border-style:none;" alt="" src="http://googleads.g.doubleclick.net/pagead/viewthroughconversion/971225648/?value=0&amp;guid=ON&amp;script=0"/>
+</div>
+</noscript>
+<img src="http://beacon.guim.co.uk/count/pv.gif" alt="" style="display : none ;" rel="nofollow"/>
+<script>(function(isVeryModern){function insertUserName(){function getCookieValue(a){var d=[],e=document.cookie.split(";");a=RegExp("^\\s*"+a+"=\\s*(.*?)\\s*$");for(var b=0;b<e.length;b++){var f=e[b].match(a);f&&d.push(f[1]);}
+if(d.length>0){return d[0];}
+return null;}
+function decodeBase64(str){return decodeURIComponent(encodeURIComponent(atob(str.replace(/-/g,'+').replace(/_/g,'/').replace(/,/g,'='))));}
+function getUserDisplayNameFromCookie(){var cookieData=getCookieValue('GU_U');var userData=cookieData?JSON.parse(decodeBase64(cookieData.split('.')[0])):null;if(userData){return userData[2];}
+return null;}
+var userDisplayName=getUserDisplayNameFromCookie();if(userDisplayName){document.getElementsByClassName('js-profile-info')[0].innerHTML=userDisplayName;document.getElementsByClassName('js-profile-nav')[0].classList.add('is-signed-in');if(window.guardian.config.switches.becomeAMember){var $register=document.getElementsByClassName('js-profile-register')[0];$register.parentElement.removeChild($register);}}}
+insertUserName();})(guardian.isModernBrowser&&'atob'in window&&'classList'in document.documentElement);</script>
+<div id="dfp-ad--pageskin-inread" class="js-ad-slot ad-slot ad-slot--dfp ad-slot--pageskin-inread ad-slot--page-skin" data-link-name="ad slot pageskin-inread" data-test-id="ad-slot-pageskin-inread" data-name="pageskin-inread" data-label="false" data-refresh="false" data-out-of-page="true" data-wide="1,1"> </div>
+</body>
+</html>
diff --git a/tests/samples/too-many-images.sample.html b/tests/samples/too-many-images.sample.html
new file mode 100644
index 00000000..db14745a
--- /dev/null
+++ b/tests/samples/too-many-images.sample.html
@@ -0,0 +1,2154 @@
+<!DOCTYPE html>
+<html class='v2' dir='ltr'>
+<head>
+<meta content='width=1100' name='viewport'/>
+<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'/>
+<script type="text/javascript">(function() { var b=window,f="chrome",g="jstiming",k="tick";(function(){function d(a){this.t={};this.tick=function(a,d,c){var e=void 0!=c?c:(new Date).getTime();this.t[a]=[e,d];if(void 0==c)try{b.console.timeStamp("CSI/"+a)}catch(h){}};this[k]("start",null,a)}var a;b.performance&&(a=b.performance.timing);var n=a?new d(a.responseStart):new d;b.jstiming={Timer:d,load:n};if(a){var c=a.navigationStart,h=a.responseStart;0<c&&h>=c&&(b[g].srt=h-c)}if(a){var e=b[g].load;0<c&&h>=c&&(e[k]("_wtsrt",void 0,c),e[k]("wtsrt_","_wtsrt",h),e[k]("tbsd_","wtsrt_"))}try{a=null,
+b[f]&&b[f].csi&&(a=Math.floor(b[f].csi().pageT),e&&0<c&&(e[k]("_tbnd",void 0,b[f].csi().startE),e[k]("tbnd_","_tbnd",c))),null==a&&b.gtbExternal&&(a=b.gtbExternal.pageT()),null==a&&b.external&&(a=b.external.pageT,e&&0<c&&(e[k]("_tbnd",void 0,b.external.startE),e[k]("tbnd_","_tbnd",c))),a&&(b[g].pt=a)}catch(p){}})();b.tickAboveFold=function(d){var a=0;if(d.offsetParent){do a+=d.offsetTop;while(d=d.offsetParent)}d=a;750>=d&&b[g].load[k]("aft")};var l=!1;function m(){l||(l=!0,b[g].load[k]("firstScrollTime"))}b.addEventListener?b.addEventListener("scroll",m,!1):b.attachEvent("onscroll",m);
+ })();</script>
+<meta content='blogger' name='generator'/>
+<link href='http://www.melposen.blogspot.no/favicon.ico' rel='icon' type='image/x-icon'/>
+<link href='http://melposen.blogspot.com/2012/02/saftsentrifuge.html' rel='canonical'/>
+<link rel="alternate" type="application/atom+xml" title="melposen.blogspot.no - Atom" href="http://melposen.blogspot.com/feeds/posts/default" />
+<link rel="alternate" type="application/rss+xml" title="melposen.blogspot.no - RSS" href="http://melposen.blogspot.com/feeds/posts/default?alt=rss" />
+<link rel="service.post" type="application/atom+xml" title="melposen.blogspot.no - Atom" href="http://www.blogger.com/feeds/113083922500631895/posts/default" />
+
+<link rel="alternate" type="application/atom+xml" title="melposen.blogspot.no - Atom" href="http://melposen.blogspot.com/feeds/4645670632029404989/comments/default" />
+<!--[if IE]><script type="text/javascript" src="https://www.blogger.com/static/v1/jsbin/850967532-ieretrofit.js"></script>
+<![endif]-->
+<link rel="image_src" href="http://4.bp.blogspot.com/-N9t4bbHB3gk/T0YHqMtaqsI/AAAAAAAAAeg/Ij0rS0lzf2E/s72-c/hr1861dvd.jpg" />
+<!--[if IE]> <script> (function() { var html5 = ("abbr,article,aside,audio,canvas,datalist,details," + "figure,footer,header,hgroup,mark,menu,meter,nav,output," + "progress,section,time,video").split(','); for (var i = 0; i < html5.length; i++) { document.createElement(html5[i]); } try { document.execCommand('BackgroundImageCache', false, true); } catch(e) {} })(); </script> <![endif]-->
+<title>melposen.blogspot.no: SAFTSENTRIFUGE</title>
+<script type="text/javascript">
+if (navigator.userAgent.indexOf('MSIE 6') == -1) {
+  WebFontConfig = {
+    google: { families: [ 'Coming Soon' ],
+    api: '//themes.googleusercontent.com/fonts/css?kit=myblyOycMnPMGjfPG-DzPz33tNpxd7DqNl3prDFz23I'
+    },
+    loading: function() {
+      if (window.jstiming) window.jstiming.load.tick('webfontLoading');
+    },
+    active: function() {
+      if (window.jstiming) window.jstiming.load.tick('webfontActive');
+    }
+  };
+  (function() {
+    var wf = document.createElement('script');
+    wf.src = '//ajax.googleapis.com/ajax/libs/webfont/1/webfont.js';
+    wf.type = 'text/javascript';
+    wf.async = 'true';
+    var s = document.getElementsByTagName('script')[0];
+    s.parentNode.insertBefore(wf, s);
+  })();
+} else {
+  document.documentElement.className = 'wf-inactive';
+}
+</script>
+<style type="text/css">
+.wf-inactive body, .wf-comingsoon-n4-loading body, .wf-comingsoon-n4-inactive body {
+  font-family: cursive;
+}
+.wf-inactive .header h1, .wf-comingsoon-n4-loading .header h1, .wf-comingsoon-n4-inactive .header h1 {
+  font-family: cursive;
+}
+.wf-inactive .header .description, .wf-comingsoon-n4-loading .header .description, .wf-comingsoon-n4-inactive .header .description {
+  font-family: cursive;
+}
+.wf-inactive .tabs-inner .widget li a, .wf-comingsoon-n4-loading .tabs-inner .widget li a, .wf-comingsoon-n4-inactive .tabs-inner .widget li a {
+  font-family: cursive;
+}
+.wf-inactive .main-inner .widget h2.date-header, .wf-comingsoon-n4-loading .main-inner .widget h2.date-header, .wf-comingsoon-n4-inactive .main-inner .widget h2.date-header, .wf-inactive .main-inner .widget h2.date-header span, .wf-comingsoon-n4-loading .main-inner .widget h2.date-header span, .wf-comingsoon-n4-inactive .main-inner .widget h2.date-header span {
+  font-family: cursive;
+}
+.wf-inactive h3.post-title, .wf-comingsoon-n4-loading h3.post-title, .wf-comingsoon-n4-inactive h3.post-title, .wf-inactive h4, .wf-comingsoon-n4-loading h4, .wf-comingsoon-n4-inactive h4, .wf-inactive h3.post-title a, .wf-comingsoon-n4-loading h3.post-title a, .wf-comingsoon-n4-inactive h3.post-title a {
+  font-family: cursive;
+}
+.wf-inactive h2, .wf-comingsoon-n4-loading h2, .wf-comingsoon-n4-inactive h2 {
+  font-family: cursive;
+}
+.wf-inactive .sidebar .widget, .wf-comingsoon-n4-loading .sidebar .widget, .wf-comingsoon-n4-inactive .sidebar .widget {
+  font-family: cursive;
+}
+</style>
+<link type='text/css' rel='stylesheet' href='https://www.blogger.com/static/v1/widgets/2356033831-widget_css_2_bundle.css' />
+<link type='text/css' rel='stylesheet' href='http://www.google.com/uds/css/gsearch.css' />
+<link type='text/css' rel='stylesheet' href='http://www.blogger.com/dyn-css/authorization.css?targetBlogID=113083922500631895&zx=fee3d931-f250-4fd5-9add-b1d2cf16b778' />
+<style id='page-skin-1' type='text/css'><!--
+/*
+-----------------------------------------------
+Blogger Template Style
+Name:     Awesome Inc.
+Designer: Tina Chen
+URL:      tinachen.org
+----------------------------------------------- */
+/* Variable definitions
+====================
+<Variable name="keycolor" description="Main Color" type="color" default="#ffffff"/>
+<Group description="Page" selector="body">
+<Variable name="body.font" description="Font" type="font"
+default="normal normal 13px Arial, Tahoma, Helvetica, FreeSans, sans-serif"/>
+<Variable name="body.background.color" description="Background Color" type="color" default="#000000"/>
+<Variable name="body.text.color" description="Text Color" type="color" default="#ffffff"/>
+</Group>
+<Group description="Links" selector=".main-inner">
+<Variable name="link.color" description="Link Color" type="color" default="#888888"/>
+<Variable name="link.visited.color" description="Visited Color" type="color" default="#444444"/>
+<Variable name="link.hover.color" description="Hover Color" type="color" default="#cccccc"/>
+</Group>
+<Group description="Blog Title" selector=".header h1">
+<Variable name="header.font" description="Title Font" type="font"
+default="normal bold 40px Arial, Tahoma, Helvetica, FreeSans, sans-serif"/>
+<Variable name="header.text.color" description="Title Color" type="color" default="#333333" />
+<Variable name="header.background.color" description="Header Background" type="color" default="transparent" />
+</Group>
+<Group description="Blog Description" selector=".header .description">
+<Variable name="description.font" description="Font" type="font"
+default="normal normal 14px Arial, Tahoma, Helvetica, FreeSans, sans-serif"/>
+<Variable name="description.text.color" description="Text Color" type="color"
+default="#333333" />
+</Group>
+<Group description="Tabs Text" selector=".tabs-inner .widget li a">
+<Variable name="tabs.font" description="Font" type="font"
+default="normal bold 14px Arial, Tahoma, Helvetica, FreeSans, sans-serif"/>
+<Variable name="tabs.text.color" description="Text Color" type="color" default="#333333"/>
+<Variable name="tabs.selected.text.color" description="Selected Color" type="color" default="#1fbb0c"/>
+</Group>
+<Group description="Tabs Background" selector=".tabs-outer .PageList">
+<Variable name="tabs.background.color" description="Background Color" type="color" default="#141414"/>
+<Variable name="tabs.selected.background.color" description="Selected Color" type="color" default="#444444"/>
+<Variable name="tabs.border.color" description="Border Color" type="color" default="#aab123"/>
+</Group>
+<Group description="Date Header" selector=".main-inner .widget h2.date-header, .main-inner .widget h2.date-header span">
+<Variable name="date.font" description="Font" type="font"
+default="normal normal 14px Arial, Tahoma, Helvetica, FreeSans, sans-serif"/>
+<Variable name="date.text.color" description="Text Color" type="color" default="#666666"/>
+<Variable name="date.border.color" description="Border Color" type="color" default="#aab123"/>
+</Group>
+<Group description="Post Title" selector="h3.post-title, h4, h3.post-title a">
+<Variable name="post.title.font" description="Font" type="font"
+default="normal bold 22px Arial, Tahoma, Helvetica, FreeSans, sans-serif"/>
+<Variable name="post.title.text.color" description="Text Color" type="color" default="#333333"/>
+</Group>
+<Group description="Post Background" selector=".post">
+<Variable name="post.background.color" description="Background Color" type="color" default="#fefdfa" />
+<Variable name="post.border.color" description="Border Color" type="color" default="#aab123" />
+<Variable name="post.border.bevel.color" description="Bevel Color" type="color" default="#aab123"/>
+</Group>
+<Group description="Gadget Title" selector="h2">
+<Variable name="widget.title.font" description="Font" type="font"
+default="normal bold 14px Arial, Tahoma, Helvetica, FreeSans, sans-serif"/>
+<Variable name="widget.title.text.color" description="Text Color" type="color" default="#333333"/>
+</Group>
+<Group description="Gadget Text" selector=".sidebar .widget">
+<Variable name="widget.font" description="Font" type="font"
+default="normal normal 14px Arial, Tahoma, Helvetica, FreeSans, sans-serif"/>
+<Variable name="widget.text.color" description="Text Color" type="color" default="#333333"/>
+<Variable name="widget.alternate.text.color" description="Alternate Color" type="color" default="#666666"/>
+</Group>
+<Group description="Gadget Links" selector=".sidebar .widget">
+<Variable name="widget.link.color" description="Link Color" type="color" default="#1fbb0c"/>
+<Variable name="widget.link.visited.color" description="Visited Color" type="color" default="#1fbb0c"/>
+<Variable name="widget.link.hover.color" description="Hover Color" type="color" default="#1fbb0c"/>
+</Group>
+<Group description="Gadget Background" selector=".sidebar .widget">
+<Variable name="widget.background.color" description="Background Color" type="color" default="#141414"/>
+<Variable name="widget.border.color" description="Border Color" type="color" default="#222222"/>
+<Variable name="widget.border.bevel.color" description="Bevel Color" type="color" default="#000000"/>
+</Group>
+<Group description="Sidebar Background" selector=".column-left-inner .column-right-inner">
+<Variable name="widget.outer.background.color" description="Background Color" type="color" default="transparent" />
+</Group>
+<Group description="Images" selector=".main-inner">
+<Variable name="image.background.color" description="Background Color" type="color" default="transparent"/>
+<Variable name="image.border.color" description="Border Color" type="color" default="transparent"/>
+</Group>
+<Group description="Feed" selector=".blog-feeds">
+<Variable name="feed.text.color" description="Text Color" type="color" default="#333333"/>
+</Group>
+<Group description="Feed Links" selector=".blog-feeds">
+<Variable name="feed.link.color" description="Link Color" type="color" default="#1fbb0c"/>
+<Variable name="feed.link.visited.color" description="Visited Color" type="color" default="#1fbb0c"/>
+<Variable name="feed.link.hover.color" description="Hover Color" type="color" default="#1fbb0c"/>
+</Group>
+<Group description="Pager" selector=".blog-pager">
+<Variable name="pager.background.color" description="Background Color" type="color" default="#fefdfa" />
+</Group>
+<Group description="Footer" selector=".footer-outer">
+<Variable name="footer.background.color" description="Background Color" type="color" default="#fefdfa" />
+<Variable name="footer.text.color" description="Text Color" type="color" default="#333333" />
+</Group>
+<Variable name="title.shadow.spread" description="Title Shadow" type="length" default="-1px"/>
+<Variable name="body.background" description="Body Background" type="background"
+color="#fcfbf5"
+default="$(color) none repeat scroll top left"/>
+<Variable name="body.background.gradient.cap" description="Body Gradient Cap" type="url"
+default="none"/>
+<Variable name="body.background.size" description="Body Background Size" type="string" default="auto"/>
+<Variable name="tabs.background.gradient" description="Tabs Background Gradient" type="url"
+default="none"/>
+<Variable name="header.background.gradient" description="Header Background Gradient" type="url" default="none" />
+<Variable name="header.padding.top" description="Header Top Padding" type="length" default="22px" />
+<Variable name="header.margin.top" description="Header Top Margin" type="length" default="0" />
+<Variable name="header.margin.bottom" description="Header Bottom Margin" type="length" default="0" />
+<Variable name="widget.padding.top" description="Widget Padding Top" type="length" default="8px" />
+<Variable name="widget.padding.side" description="Widget Padding Side" type="length" default="15px" />
+<Variable name="widget.outer.margin.top" description="Widget Top Margin" type="length" default="0" />
+<Variable name="widget.outer.background.gradient" description="Gradient" type="url" default="none" />
+<Variable name="widget.border.radius" description="Gadget Border Radius" type="length" default="0" />
+<Variable name="outer.shadow.spread" description="Outer Shadow Size" type="length" default="0" />
+<Variable name="date.header.border.radius.top" description="Date Header Border Radius Top" type="length" default="0" />
+<Variable name="date.header.position" description="Date Header Position" type="length" default="15px" />
+<Variable name="date.space" description="Date Space" type="length" default="30px" />
+<Variable name="date.position" description="Date Float" type="string" default="static" />
+<Variable name="date.padding.bottom" description="Date Padding Bottom" type="length" default="0" />
+<Variable name="date.border.size" description="Date Border Size" type="length" default="0" />
+<Variable name="date.background" description="Date Background" type="background" color="transparent"
+default="$(color) none no-repeat scroll top left" />
+<Variable name="date.first.border.radius.top" description="Date First top radius" type="length" default="5px" />
+<Variable name="date.last.space.bottom" description="Date Last Space Bottom" type="length"
+default="20px" />
+<Variable name="date.last.border.radius.bottom" description="Date Last bottom radius" type="length" default="5px" />
+<Variable name="post.first.padding.top" description="First Post Padding Top" type="length" default="0" />
+<Variable name="image.shadow.spread" description="Image Shadow Size" type="length" default="0"/>
+<Variable name="image.border.radius" description="Image Border Radius" type="length" default="0"/>
+<Variable name="separator.outdent" description="Separator Outdent" type="length" default="15px" />
+<Variable name="title.separator.border.size" description="Widget Title Border Size" type="length" default="1px" />
+<Variable name="list.separator.border.size" description="List Separator Border Size" type="length" default="1px" />
+<Variable name="shadow.spread" description="Shadow Size" type="length" default="0"/>
+<Variable name="startSide" description="Side where text starts in blog language" type="automatic" default="left"/>
+<Variable name="endSide" description="Side where text ends in blog language" type="automatic" default="right"/>
+<Variable name="date.side" description="Side where date header is placed" type="string" default="right"/>
+<Variable name="pager.border.radius.top" description="Pager Border Top Radius" type="length" default="5px" />
+<Variable name="pager.space.top" description="Pager Top Space" type="length" default="1em" />
+<Variable name="footer.background.gradient" description="Background Gradient" type="url" default="none" />
+<Variable name="mobile.background.size" description="Mobile Background Size" type="string"
+default="auto"/>
+<Variable name="mobile.background.overlay" description="Mobile Background Overlay" type="string"
+default="transparent none repeat scroll top left"/>
+<Variable name="mobile.button.color" description="Mobile Button Color" type="color" default="#ffffff" />
+*/
+/* Content
+----------------------------------------------- */
+body {
+font: normal normal 18px Coming Soon;
+color: #333333;
+background: #fcfbf5 url(http://themes.googleusercontent.com/image?id=0BwVBOzw_-hbMNjViMzQ0ZDEtMWU1NS00ZTBkLWFjY2EtZjM5YmU4OTA2MjBm) repeat-x fixed top center /* Credit: Jason Morrow (http://jasonmorrow.etsy.com) */;
+}
+html body .content-outer {
+min-width: 0;
+max-width: 100%;
+width: 100%;
+}
+a:link {
+text-decoration: none;
+color: #1fbb0c;
+}
+a:visited {
+text-decoration: none;
+color: #1fbb0c;
+}
+a:hover {
+text-decoration: underline;
+color: #1fbb0c;
+}
+.body-fauxcolumn-outer .cap-top {
+position: absolute;
+z-index: 1;
+height: 276px;
+width: 100%;
+background: transparent none repeat-x scroll top left;
+_background-image: none;
+}
+/* Columns
+----------------------------------------------- */
+.content-inner {
+padding: 0;
+}
+.header-inner .section {
+margin: 0 16px;
+}
+.tabs-inner .section {
+margin: 0 16px;
+}
+.main-inner {
+padding-top: 30px;
+}
+.main-inner .column-center-inner,
+.main-inner .column-left-inner,
+.main-inner .column-right-inner {
+padding: 0 5px;
+}
+*+html body .main-inner .column-center-inner {
+margin-top: -30px;
+}
+#layout .main-inner .column-center-inner {
+margin-top: 0;
+}
+/* Header
+----------------------------------------------- */
+.header-outer {
+margin: 0 0 0 0;
+background: transparent none repeat scroll 0 0;
+}
+.Header h1 {
+font: normal bold 30px Coming Soon;
+color: #333333;
+text-shadow: 0 0 -1px #000000;
+}
+.Header h1 a {
+color: #333333;
+}
+.Header .description {
+font: normal bold 30px Coming Soon;
+color: #ca88b3;
+}
+.header-inner .Header .titlewrapper,
+.header-inner .Header .descriptionwrapper {
+padding-left: 0;
+padding-right: 0;
+margin-bottom: 0;
+}
+.header-inner .Header .titlewrapper {
+padding-top: 22px;
+}
+/* Tabs
+----------------------------------------------- */
+.tabs-outer {
+overflow: hidden;
+position: relative;
+background: #b3ca88 url(http://www.blogblog.com/1kt/awesomeinc/tabs_gradient_light.png) repeat scroll 0 0;
+}
+#layout .tabs-outer {
+overflow: visible;
+}
+.tabs-cap-top, .tabs-cap-bottom {
+position: absolute;
+width: 100%;
+border-top: 1px solid #908d6a;
+}
+.tabs-cap-bottom {
+bottom: 0;
+}
+.tabs-inner .widget li a {
+display: inline-block;
+margin: 0;
+padding: .6em 1.5em;
+font: normal normal 13px Coming Soon;
+color: #1fbb0c;
+border-top: 1px solid #908d6a;
+border-bottom: 1px solid #908d6a;
+border-left: 1px solid #908d6a;
+height: 16px;
+line-height: 16px;
+}
+.tabs-inner .widget li:last-child a {
+border-right: 1px solid #908d6a;
+}
+.tabs-inner .widget li.selected a, .tabs-inner .widget li a:hover {
+background: #63704b url(http://www.blogblog.com/1kt/awesomeinc/tabs_gradient_light.png) repeat-x scroll 0 -100px;
+color: #ffffff;
+}
+/* Headings
+----------------------------------------------- */
+h2 {
+font: normal bold 14px Coming Soon;
+color: #ca88b3;
+}
+/* Widgets
+----------------------------------------------- */
+.main-inner .section {
+margin: 0 27px;
+padding: 0;
+}
+.main-inner .column-left-outer,
+.main-inner .column-right-outer {
+margin-top: 0;
+}
+#layout .main-inner .column-left-outer,
+#layout .main-inner .column-right-outer {
+margin-top: 0;
+}
+.main-inner .column-left-inner,
+.main-inner .column-right-inner {
+background: transparent none repeat 0 0;
+-moz-box-shadow: 0 0 0 rgba(0, 0, 0, .2);
+-webkit-box-shadow: 0 0 0 rgba(0, 0, 0, .2);
+-goog-ms-box-shadow: 0 0 0 rgba(0, 0, 0, .2);
+box-shadow: 0 0 0 rgba(0, 0, 0, .2);
+-moz-border-radius: 5px;
+-webkit-border-radius: 5px;
+-goog-ms-border-radius: 5px;
+border-radius: 5px;
+}
+#layout .main-inner .column-left-inner,
+#layout .main-inner .column-right-inner {
+margin-top: 0;
+}
+.sidebar .widget {
+font: normal normal 14px Coming Soon;
+color: #333333;
+}
+.sidebar .widget a:link {
+color: #ca88b3;
+}
+.sidebar .widget a:visited {
+color: #ca88b3;
+}
+.sidebar .widget a:hover {
+color: #ca88b3;
+}
+.sidebar .widget h2 {
+text-shadow: 0 0 -1px #000000;
+}
+.main-inner .widget {
+background-color: #fefdfa;
+border: 1px solid #aab123;
+padding: 0 15px 15px;
+margin: 20px -16px;
+-moz-box-shadow: 0 0 20px rgba(0, 0, 0, .2);
+-webkit-box-shadow: 0 0 20px rgba(0, 0, 0, .2);
+-goog-ms-box-shadow: 0 0 20px rgba(0, 0, 0, .2);
+box-shadow: 0 0 20px rgba(0, 0, 0, .2);
+-moz-border-radius: 5px;
+-webkit-border-radius: 5px;
+-goog-ms-border-radius: 5px;
+border-radius: 5px;
+}
+.main-inner .widget h2 {
+margin: 0 -0;
+padding: .6em 0 .5em;
+border-bottom: 1px solid transparent;
+}
+.footer-inner .widget h2 {
+padding: 0 0 .4em;
+border-bottom: 1px solid transparent;
+}
+.main-inner .widget h2 + div, .footer-inner .widget h2 + div {
+border-top: 1px solid #aab123;
+padding-top: 8px;
+}
+.main-inner .widget .widget-content {
+margin: 0 -0;
+padding: 7px 0 0;
+}
+.main-inner .widget ul, .main-inner .widget #ArchiveList ul.flat {
+margin: -8px -15px 0;
+padding: 0;
+list-style: none;
+}
+.main-inner .widget #ArchiveList {
+margin: -8px 0 0;
+}
+.main-inner .widget ul li, .main-inner .widget #ArchiveList ul.flat li {
+padding: .5em 15px;
+text-indent: 0;
+color: #666666;
+border-top: 0 solid #aab123;
+border-bottom: 1px solid transparent;
+}
+.main-inner .widget #ArchiveList ul li {
+padding-top: .25em;
+padding-bottom: .25em;
+}
+.main-inner .widget ul li:first-child, .main-inner .widget #ArchiveList ul.flat li:first-child {
+border-top: none;
+}
+.main-inner .widget ul li:last-child, .main-inner .widget #ArchiveList ul.flat li:last-child {
+border-bottom: none;
+}
+.post-body {
+position: relative;
+}
+.main-inner .widget .post-body ul {
+padding: 0 2.5em;
+margin: .5em 0;
+list-style: disc;
+}
+.main-inner .widget .post-body ul li {
+padding: 0.25em 0;
+margin-bottom: .25em;
+color: #333333;
+border: none;
+}
+.footer-inner .widget ul {
+padding: 0;
+list-style: none;
+}
+.widget .zippy {
+color: #666666;
+}
+/* Posts
+----------------------------------------------- */
+body .main-inner .Blog {
+padding: 0;
+margin-bottom: 1em;
+background-color: transparent;
+border: none;
+-moz-box-shadow: 0 0 0 rgba(0, 0, 0, 0);
+-webkit-box-shadow: 0 0 0 rgba(0, 0, 0, 0);
+-goog-ms-box-shadow: 0 0 0 rgba(0, 0, 0, 0);
+box-shadow: 0 0 0 rgba(0, 0, 0, 0);
+}
+.main-inner .section:last-child .Blog:last-child {
+padding: 0;
+margin-bottom: 1em;
+}
+.main-inner .widget h2.date-header {
+margin: 0 -15px 1px;
+padding: 0 0 0 0;
+font: normal normal 13px Coming Soon;
+color: #ca88b3;
+background: transparent none no-repeat scroll top left;
+border-top: 0 solid #1fbb0c;
+border-bottom: 1px solid transparent;
+-moz-border-radius-topleft: 0;
+-moz-border-radius-topright: 0;
+-webkit-border-top-left-radius: 0;
+-webkit-border-top-right-radius: 0;
+border-top-left-radius: 0;
+border-top-right-radius: 0;
+position: static;
+bottom: 100%;
+right: 15px;
+text-shadow: 0 0 -1px #000000;
+}
+.main-inner .widget h2.date-header span {
+font: normal normal 13px Coming Soon;
+display: block;
+padding: .5em 15px;
+border-left: 0 solid #1fbb0c;
+border-right: 0 solid #1fbb0c;
+}
+.date-outer {
+position: relative;
+margin: 30px 0 20px;
+padding: 0 15px;
+background-color: #fefdfa;
+border: 1px solid #ca88b3;
+-moz-box-shadow: 0 0 20px rgba(0, 0, 0, .2);
+-webkit-box-shadow: 0 0 20px rgba(0, 0, 0, .2);
+-goog-ms-box-shadow: 0 0 20px rgba(0, 0, 0, .2);
+box-shadow: 0 0 20px rgba(0, 0, 0, .2);
+-moz-border-radius: 5px;
+-webkit-border-radius: 5px;
+-goog-ms-border-radius: 5px;
+border-radius: 5px;
+}
+.date-outer:first-child {
+margin-top: 0;
+}
+.date-outer:last-child {
+margin-bottom: 20px;
+-moz-border-radius-bottomleft: 5px;
+-moz-border-radius-bottomright: 5px;
+-webkit-border-bottom-left-radius: 5px;
+-webkit-border-bottom-right-radius: 5px;
+-goog-ms-border-bottom-left-radius: 5px;
+-goog-ms-border-bottom-right-radius: 5px;
+border-bottom-left-radius: 5px;
+border-bottom-right-radius: 5px;
+}
+.date-posts {
+margin: 0 -0;
+padding: 0 0;
+clear: both;
+}
+.post-outer, .inline-ad {
+border-top: 1px solid #ca88b3;
+margin: 0 -0;
+padding: 15px 0;
+}
+.post-outer {
+padding-bottom: 10px;
+}
+.post-outer:first-child {
+padding-top: 0;
+border-top: none;
+}
+.post-outer:last-child, .inline-ad:last-child {
+border-bottom: none;
+}
+.post-body {
+position: relative;
+}
+.post-body img {
+padding: 8px;
+background: #ffffff;
+border: 1px solid #1fbb0c;
+-moz-box-shadow: 0 0 20px rgba(0, 0, 0, .2);
+-webkit-box-shadow: 0 0 20px rgba(0, 0, 0, .2);
+box-shadow: 0 0 20px rgba(0, 0, 0, .2);
+-moz-border-radius: 5px;
+-webkit-border-radius: 5px;
+border-radius: 5px;
+}
+h3.post-title, h4 {
+font: normal normal 30px Coming Soon;
+color: #ca88b3;
+}
+h3.post-title a {
+font: normal normal 30px Coming Soon;
+color: #ca88b3;
+}
+h3.post-title a:hover {
+color: #1fbb0c;
+text-decoration: underline;
+}
+.post-header {
+margin: 0 0 1em;
+}
+.post-body {
+line-height: 1.4;
+}
+.post-outer h2 {
+color: #333333;
+}
+.post-footer {
+margin: 1.5em 0 0;
+}
+#blog-pager {
+padding: 15px;
+font-size: 120%;
+background-color: #fefdfa;
+border: 1px solid #aab123;
+-moz-box-shadow: 0 0 20px rgba(0, 0, 0, .2);
+-webkit-box-shadow: 0 0 20px rgba(0, 0, 0, .2);
+-goog-ms-box-shadow: 0 0 20px rgba(0, 0, 0, .2);
+box-shadow: 0 0 20px rgba(0, 0, 0, .2);
+-moz-border-radius: 5px;
+-webkit-border-radius: 5px;
+-goog-ms-border-radius: 5px;
+border-radius: 5px;
+-moz-border-radius-topleft: 5px;
+-moz-border-radius-topright: 5px;
+-webkit-border-top-left-radius: 5px;
+-webkit-border-top-right-radius: 5px;
+-goog-ms-border-top-left-radius: 5px;
+-goog-ms-border-top-right-radius: 5px;
+border-top-left-radius: 5px;
+border-top-right-radius-topright: 5px;
+margin-top: 1em;
+}
+.blog-feeds, .post-feeds {
+margin: 1em 0;
+text-align: center;
+color: #333333;
+}
+.blog-feeds a, .post-feeds a {
+color: #1fbb0c;
+}
+.blog-feeds a:visited, .post-feeds a:visited {
+color: #1fbb0c;
+}
+.blog-feeds a:hover, .post-feeds a:hover {
+color: #1fbb0c;
+}
+.post-outer .comments {
+margin-top: 2em;
+}
+/* Comments
+----------------------------------------------- */
+.comments .comments-content .icon.blog-author {
+background-repeat: no-repeat;
+background-image: url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABIAAAASCAYAAABWzo5XAAAAAXNSR0IArs4c6QAAAAZiS0dEAP8A/wD/oL2nkwAAAAlwSFlzAAALEgAACxIB0t1+/AAAAAd0SU1FB9sLFwMeCjjhcOMAAAD+SURBVDjLtZSvTgNBEIe/WRRnm3U8RC1neQdsm1zSBIU9VVF1FkUguQQsD9ITmD7ECZIJSE4OZo9stoVjC/zc7ky+zH9hXwVwDpTAWWLrgS3QAe8AZgaAJI5zYAmc8r0G4AHYHQKVwII8PZrZFsBFkeRCABYiMh9BRUhnSkPTNCtVXYXURi1FpBDgArj8QU1eVXUzfnjv7yP7kwu1mYrkWlU33vs1QNu2qU8pwN0UpKoqokjWwCztrMuBhEhmh8bD5UDqur75asbcX0BGUB9/HAMB+r32hznJgXy2v0sGLBcyAJ1EK3LFcbo1s91JeLwAbwGYu7TP/3ZGfnXYPgAVNngtqatUNgAAAABJRU5ErkJggg==);
+}
+.comments .comments-content .loadmore a {
+border-top: 1px solid #908d6a;
+border-bottom: 1px solid #908d6a;
+}
+.comments .continue {
+border-top: 2px solid #908d6a;
+}
+/* Footer
+----------------------------------------------- */
+.footer-outer {
+margin: -20px 0 -1px;
+padding: 20px 0 0;
+color: #333333;
+overflow: hidden;
+}
+.footer-fauxborder-left {
+border-top: 1px solid #aab123;
+background: #fefdfa none repeat scroll 0 0;
+-moz-box-shadow: 0 0 20px rgba(0, 0, 0, .2);
+-webkit-box-shadow: 0 0 20px rgba(0, 0, 0, .2);
+-goog-ms-box-shadow: 0 0 20px rgba(0, 0, 0, .2);
+box-shadow: 0 0 20px rgba(0, 0, 0, .2);
+margin: 0 -20px;
+}
+/* Mobile
+----------------------------------------------- */
+body.mobile {
+background-size: auto;
+}
+.mobile .body-fauxcolumn-outer {
+background: transparent none repeat scroll top left;
+}
+*+html body.mobile .main-inner .column-center-inner {
+margin-top: 0;
+}
+.mobile .main-inner .widget {
+padding: 0 0 15px;
+}
+.mobile .main-inner .widget h2 + div,
+.mobile .footer-inner .widget h2 + div {
+border-top: none;
+padding-top: 0;
+}
+.mobile .footer-inner .widget h2 {
+padding: 0.5em 0;
+border-bottom: none;
+}
+.mobile .main-inner .widget .widget-content {
+margin: 0;
+padding: 7px 0 0;
+}
+.mobile .main-inner .widget ul,
+.mobile .main-inner .widget #ArchiveList ul.flat {
+margin: 0 -15px 0;
+}
+.mobile .main-inner .widget h2.date-header {
+right: 0;
+}
+.mobile .date-header span {
+padding: 0.4em 0;
+}
+.mobile .date-outer:first-child {
+margin-bottom: 0;
+border: 1px solid #ca88b3;
+-moz-border-radius-topleft: 5px;
+-moz-border-radius-topright: 5px;
+-webkit-border-top-left-radius: 5px;
+-webkit-border-top-right-radius: 5px;
+-goog-ms-border-top-left-radius: 5px;
+-goog-ms-border-top-right-radius: 5px;
+border-top-left-radius: 5px;
+border-top-right-radius: 5px;
+}
+.mobile .date-outer {
+border-color: #ca88b3;
+border-width: 0 1px 1px;
+}
+.mobile .date-outer:last-child {
+margin-bottom: 0;
+}
+.mobile .main-inner {
+padding: 0;
+}
+.mobile .header-inner .section {
+margin: 0;
+}
+.mobile .post-outer, .mobile .inline-ad {
+padding: 5px 0;
+}
+.mobile .tabs-inner .section {
+margin: 0 10px;
+}
+.mobile .main-inner .widget h2 {
+margin: 0;
+padding: 0;
+}
+.mobile .main-inner .widget h2.date-header span {
+padding: 0;
+}
+.mobile .main-inner .widget .widget-content {
+margin: 0;
+padding: 7px 0 0;
+}
+.mobile #blog-pager {
+border: 1px solid transparent;
+background: #fefdfa none repeat scroll 0 0;
+}
+.mobile .main-inner .column-left-inner,
+.mobile .main-inner .column-right-inner {
+background: transparent none repeat 0 0;
+-moz-box-shadow: none;
+-webkit-box-shadow: none;
+-goog-ms-box-shadow: none;
+box-shadow: none;
+}
+.mobile .date-posts {
+margin: 0;
+padding: 0;
+}
+.mobile .footer-fauxborder-left {
+margin: 0;
+border-top: inherit;
+}
+.mobile .main-inner .section:last-child .Blog:last-child {
+margin-bottom: 0;
+}
+.mobile-index-contents {
+color: #333333;
+}
+.mobile .mobile-link-button {
+background: #1fbb0c url(http://www.blogblog.com/1kt/awesomeinc/tabs_gradient_light.png) repeat scroll 0 0;
+}
+.mobile-link-button a:link, .mobile-link-button a:visited {
+color: #ffffff;
+}
+.mobile .tabs-inner .PageList .widget-content {
+background: transparent;
+border-top: 1px solid;
+border-color: #908d6a;
+color: #1fbb0c;
+}
+.mobile .tabs-inner .PageList .widget-content .pagelist-arrow {
+border-left: 1px solid #908d6a;
+}
+
+--></style>
+<style id='template-skin-1' type='text/css'><!--
+body {
+min-width: 1130px;
+}
+.content-outer, .content-fauxcolumn-outer, .region-inner {
+min-width: 1130px;
+max-width: 1130px;
+_width: 1130px;
+}
+.main-inner .columns {
+padding-left: 0px;
+padding-right: 370px;
+}
+.main-inner .fauxcolumn-center-outer {
+left: 0px;
+right: 370px;
+/* IE6 does not respect left and right together */
+_width: expression(this.parentNode.offsetWidth -
+parseInt("0px") -
+parseInt("370px") + 'px');
+}
+.main-inner .fauxcolumn-left-outer {
+width: 0px;
+}
+.main-inner .fauxcolumn-right-outer {
+width: 370px;
+}
+.main-inner .column-left-outer {
+width: 0px;
+right: 100%;
+margin-left: -0px;
+}
+.main-inner .column-right-outer {
+width: 370px;
+margin-right: -370px;
+}
+#layout {
+min-width: 0;
+}
+#layout .content-outer {
+min-width: 0;
+width: 800px;
+}
+#layout .region-inner {
+min-width: 0;
+width: auto;
+}
+--></style>
+<script type="text/javascript">var a="indexOf",b="&m=1",e="(^|&)m=",f="?",g="?m=1";function h(){var c=window.location.href,d=c.split(f);switch(d.length){case 1:return c+g;case 2:return 0<=d[1].search(e)?null:c+b;default:return null}}var k=navigator.userAgent;if(-1!=k[a]("Mobile")&&-1!=k[a]("WebKit")&&-1==k[a]("iPad")||-1!=k[a]("Opera Mini")||-1!=k[a]("IEMobile")){var l=h();l&&window.location.replace(l)};
+</script><script type="text/javascript">
+if (window.jstiming) window.jstiming.load.tick('headEnd');
+</script></head>
+<body class='loading'>
+<div class='navbar section' id='navbar'><div class='widget Navbar' id='Navbar1'><script type="text/javascript">
+    function setAttributeOnload(object, attribute, val) {
+      if(window.addEventListener) {
+        window.addEventListener('load',
+          function(){ object[attribute] = val; }, false);
+      } else {
+        window.attachEvent('onload', function(){ object[attribute] = val; });
+      }
+    }
+  </script>
+<div id="navbar-iframe-container"></div>
+<script type="text/javascript" src="https://apis.google.com/js/plusone.js"></script>
+<script type="text/javascript">
+        gapi.load("gapi.iframes:gapi.iframes.style.bubble", function() {
+          if (gapi.iframes && gapi.iframes.getContext) {
+            gapi.iframes.getContext().openChild({
+                url: '//www.blogger.com/navbar.g?targetBlogID\075113083922500631895\46blogName\75melposen.blogspot.no\46publishMode\75PUBLISH_MODE_BLOGSPOT\46navbarType\75LIGHT\46layoutType\75LAYOUTS\46searchRoot\75http://melposen.blogspot.com/search\46blogLocale\75no\46v\0752\46homepageUrl\75http://melposen.blogspot.com/\46targetPostID\0754645670632029404989\46blogPostOrPageUrl\75http://melposen.blogspot.com/2012/02/saftsentrifuge.html\46vt\75-6326341907690819742',
+                where: document.getElementById("navbar-iframe-container"),
+                id: "navbar-iframe"
+            });
+          }
+        });
+      </script><script type="text/javascript">
+(function() {
+var script = document.createElement('script');
+script.type = 'text/javascript';
+script.src = '//pagead2.googlesyndication.com/pagead/js/google_top_exp.js';
+var head = document.getElementsByTagName('head')[0];
+if (head) {
+head.appendChild(script);
+}})();
+</script>
+</div></div>
+<div class='body-fauxcolumns'>
+<div class='fauxcolumn-outer body-fauxcolumn-outer'>
+<div class='cap-top'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+<div class='fauxborder-left'>
+<div class='fauxborder-right'></div>
+<div class='fauxcolumn-inner'>
+</div>
+</div>
+<div class='cap-bottom'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+</div>
+</div>
+<div class='content'>
+<div class='content-fauxcolumns'>
+<div class='fauxcolumn-outer content-fauxcolumn-outer'>
+<div class='cap-top'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+<div class='fauxborder-left'>
+<div class='fauxborder-right'></div>
+<div class='fauxcolumn-inner'>
+</div>
+</div>
+<div class='cap-bottom'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+</div>
+</div>
+<div class='content-outer'>
+<div class='content-cap-top cap-top'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+<div class='fauxborder-left content-fauxborder-left'>
+<div class='fauxborder-right content-fauxborder-right'></div>
+<div class='content-inner'>
+<header>
+<div class='header-outer'>
+<div class='header-cap-top cap-top'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+<div class='fauxborder-left header-fauxborder-left'>
+<div class='fauxborder-right header-fauxborder-right'></div>
+<div class='region-inner header-inner'>
+<div class='header section' id='header'><div class='widget Header' id='Header1'>
+<div id='header-inner'>
+<a href='http://www.melposen.blogspot.no/' style='display: block'>
+<img alt='melposen.blogspot.no' height='649px; ' id='Header1_headerimg' src='http://2.bp.blogspot.com/-i8wCpCukDwY/UT4_vwhy6-I/AAAAAAAAA90/wodEtmrnOpU/s1600/melposen2.jpg' style='display: block' width='1124px; '/>
+</a>
+<div class='descriptionwrapper'>
+<p class='description'><span>
+</span></p>
+</div>
+</div>
+</div></div>
+</div>
+</div>
+<div class='header-cap-bottom cap-bottom'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+</div>
+</header>
+<div class='tabs-outer'>
+<div class='tabs-cap-top cap-top'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+<div class='fauxborder-left tabs-fauxborder-left'>
+<div class='fauxborder-right tabs-fauxborder-right'></div>
+<div class='region-inner tabs-inner'>
+<div class='tabs section' id='crosscol'></div>
+<div class='tabs section' id='crosscol-overflow'></div>
+</div>
+</div>
+<div class='tabs-cap-bottom cap-bottom'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+</div>
+<div class='main-outer'>
+<div class='main-cap-top cap-top'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+<div class='fauxborder-left main-fauxborder-left'>
+<div class='fauxborder-right main-fauxborder-right'></div>
+<div class='region-inner main-inner'>
+<div class='columns fauxcolumns'>
+<div class='fauxcolumn-outer fauxcolumn-center-outer'>
+<div class='cap-top'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+<div class='fauxborder-left'>
+<div class='fauxborder-right'></div>
+<div class='fauxcolumn-inner'>
+</div>
+</div>
+<div class='cap-bottom'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+</div>
+<div class='fauxcolumn-outer fauxcolumn-left-outer'>
+<div class='cap-top'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+<div class='fauxborder-left'>
+<div class='fauxborder-right'></div>
+<div class='fauxcolumn-inner'>
+</div>
+</div>
+<div class='cap-bottom'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+</div>
+<div class='fauxcolumn-outer fauxcolumn-right-outer'>
+<div class='cap-top'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+<div class='fauxborder-left'>
+<div class='fauxborder-right'></div>
+<div class='fauxcolumn-inner'>
+</div>
+</div>
+<div class='cap-bottom'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+</div>
+<!-- corrects IE6 width calculation -->
+<div class='columns-inner'>
+<div class='column-center-outer'>
+<div class='column-center-inner'>
+<div class='main section' id='main'><div class='widget Blog' id='Blog1'>
+<div class='blog-posts hfeed'>
+<!-- google_ad_section_start(name=default) -->
+
+          <div class="date-outer">
+
+<h2 class='date-header'><span>torsdag 23. februar 2012</span></h2>
+
+          <div class="date-posts">
+
+<div class='post-outer'>
+<div class='post hentry' itemprop='blogPost' itemscope='itemscope' itemtype='http://schema.org/BlogPosting'>
+<meta content='http://4.bp.blogspot.com/-N9t4bbHB3gk/T0YHqMtaqsI/AAAAAAAAAeg/Ij0rS0lzf2E/s320/hr1861dvd.jpg' itemprop='image_url'/>
+<meta content='113083922500631895' itemprop='blogId'/>
+<meta content='4645670632029404989' itemprop='postId'/>
+<a name='4645670632029404989'></a>
+<h3 class='post-title entry-title' itemprop='name'>
+SAFTSENTRIFUGE
+</h3>
+<div class='post-header'>
+<div class='post-header-line-1'></div>
+</div>
+<div class='post-body entry-content' id='post-body-4645670632029404989' itemprop='description articleBody'>
+Jeg har kjøpt meg en saftsentrifuge, eller jucie maker om du vil. Den er helt fantastisk genial. Frukten (ikke appelsiner og kiwi) bare skylles og puttes rett i maskinen, man trenger ikke skrelle eller fjerne steiner og kjernehus.&nbsp; 5 om dagen har aldri vært så enkelt som nå. Alt bare moses rett ned i maskinen og så får jeg ferskpresset knallgod jucie. Jeg tror vi har hatt mer frukt de siste 2-3 dagene enn de siste 2-3 månedene! Restene som blir til overs ligner på en slags pure, og kan f.eks brukes i kaker eller sauser. Det har jeg ikke testet ut enda, og det blir egentlig ganske lite svinn i forhold til hvor mange frukter man putter oppi. <br />
+<br />
+<div class="separator" style="clear: both; text-align: center;">
+<a href="http://4.bp.blogspot.com/-N9t4bbHB3gk/T0YHqMtaqsI/AAAAAAAAAeg/Ij0rS0lzf2E/s1600/hr1861dvd.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="320" src="http://4.bp.blogspot.com/-N9t4bbHB3gk/T0YHqMtaqsI/AAAAAAAAAeg/Ij0rS0lzf2E/s320/hr1861dvd.jpg" width="320" /></a></div>
+<br />
+<div class="separator" style="clear: both; text-align: center;">
+<a href="http://4.bp.blogspot.com/-yZLECoiTimE/T0YGhFxjexI/AAAAAAAAAeQ/CZnfKZrA1bY/s1600/bilde%281%29.JPG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="298" src="http://4.bp.blogspot.com/-yZLECoiTimE/T0YGhFxjexI/AAAAAAAAAeQ/CZnfKZrA1bY/s400/bilde%281%29.JPG" width="400" /></a><a href="http://1.bp.blogspot.com/-Zleaols1Z8A/T0YHKB7llwI/AAAAAAAAAeY/SMlj9ySuL50/s1600/bilde.JPG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="320" src="http://1.bp.blogspot.com/-Zleaols1Z8A/T0YHKB7llwI/AAAAAAAAAeY/SMlj9ySuL50/s320/bilde.JPG" width="236" /></a></div>
+<div style='clear: both;'></div>
+</div>
+<div class='post-footer'>
+<div class='post-footer-line post-footer-line-1'><span class='post-author vcard'>
+Lagt inn av
+<span class='fn' itemprop='author' itemscope='itemscope' itemtype='http://schema.org/Person'>
+<meta content='http://www.blogger.com/profile/03297498913659149933' itemprop='url'/>
+<a class='g-profile' href='http://www.blogger.com/profile/03297498913659149933' rel='author' title='author profile'>
+<span itemprop='name'>Renate</span>
+</a>
+</span>
+</span>
+<span class='post-timestamp'>
+kl.
+<meta content='http://melposen.blogspot.com/2012/02/saftsentrifuge.html' itemprop='url'/>
+<a class='timestamp-link' href='http://www.melposen.blogspot.no/2012/02/saftsentrifuge.html' rel='bookmark' title='permanent link'><abbr class='published' itemprop='datePublished' title='2012-02-23T01:33:00-08:00'>01:33</abbr></a>
+</span>
+<span class='post-comment-link'>
+</span>
+<span class='post-icons'>
+<span class='item-control blog-admin pid-1103627731'>
+<a href='http://www.blogger.com/post-edit.g?blogID=113083922500631895&postID=4645670632029404989&from=pencil' title='Rediger innlegg'>
+<img alt='' class='icon-action' height='18' src='http://img2.blogblog.com/img/icon18_edit_allbkg.gif' width='18'/>
+</a>
+</span>
+</span>
+<div class='post-share-buttons goog-inline-block'>
+<a class='goog-inline-block share-button sb-email' href='http://www.blogger.com/share-post.g?blogID=113083922500631895&postID=4645670632029404989&target=email' target='_blank' title='Send dette via e-post'><span class='share-button-link-text'>Send dette via e-post</span></a><a class='goog-inline-block share-button sb-blog' href='http://www.blogger.com/share-post.g?blogID=113083922500631895&postID=4645670632029404989&target=blog' onclick='window.open(this.href, "_blank", "height=270,width=475"); return false;' target='_blank' title='Blogg dette!'><span class='share-button-link-text'>Blogg dette!</span></a><a class='goog-inline-block share-button sb-twitter' href='http://www.blogger.com/share-post.g?blogID=113083922500631895&postID=4645670632029404989&target=twitter' target='_blank' title='Del på Twitter'><span class='share-button-link-text'>Del på Twitter</span></a><a class='goog-inline-block share-button sb-facebook' href='http://www.blogger.com/share-post.g?blogID=113083922500631895&postID=4645670632029404989&target=facebook' onclick='window.open(this.href, "_blank", "height=430,width=640"); return false;' target='_blank' title='Del på Facebook'><span class='share-button-link-text'>Del på Facebook</span></a><div class='goog-inline-block dummy-container'><g:plusone source='blogger:blog:plusone' href='http://melposen.blogspot.com/2012/02/saftsentrifuge.html' size='medium' width='300' annotation='inline'/></div>
+</div>
+</div>
+<div class='post-footer-line post-footer-line-2'><span class='post-labels'>
+Etiketter:
+<a href='http://www.melposen.blogspot.no/search/label/Jucie' rel='tag'>Jucie</a>,
+<a href='http://www.melposen.blogspot.no/search/label/Saft' rel='tag'>Saft</a>,
+<a href='http://www.melposen.blogspot.no/search/label/Smoothie' rel='tag'>Smoothie</a>
+</span>
+</div>
+<div class='post-footer-line post-footer-line-3'><span class='post-location'>
+</span>
+</div>
+</div>
+</div>
+<div class='comments' id='comments'>
+<a name='comments'></a>
+<h4>Ingen kommentarer:</h4>
+<div id='Blog1_comments-block-wrapper'>
+<dl class='avatar-comment-indent' id='comments-block'>
+</dl>
+</div>
+<p class='comment-footer'>
+<div class='comment-form'>
+<a name='comment-form'></a>
+<h4 id='comment-post-message'>Legg inn en kommentar</h4>
+<p>
+</p>
+<a href='http://www.blogger.com/comment-iframe.g?blogID=113083922500631895&postID=4645670632029404989' id='comment-editor-src'></a>
+<iframe allowtransparency='true' class='blogger-iframe-colorize blogger-comment-from-post' frameborder='0' height='410' id='comment-editor' name='comment-editor' src='' width='100%'></iframe>
+<script type="text/javascript" src="https://www.blogger.com/static/v1/jsbin/3121195295-comment_from_post_iframe.js"></script>
+<script type='text/javascript'>
+      BLOG_CMT_createIframe('//www.blogger.com/rpc_relay.html', '00084580039004564474');
+    </script>
+</div>
+</p>
+<div id='backlinks-container'>
+<div id='Blog1_backlinks-container'>
+</div>
+</div>
+</div>
+</div>
+
+        </div></div>
+
+<!-- google_ad_section_end -->
+</div>
+<div class='blog-pager' id='blog-pager'>
+<span id='blog-pager-newer-link'>
+<a class='blog-pager-newer-link' href='http://www.melposen.blogspot.no/2012/03/after-eight-sjokoladekake-med.html' id='Blog1_blog-pager-newer-link' title='Nyere innlegg'>Nyere innlegg</a>
+</span>
+<span id='blog-pager-older-link'>
+<a class='blog-pager-older-link' href='http://www.melposen.blogspot.no/2012/02/hvordan-lage-en-saftig-kalkun-for.html' id='Blog1_blog-pager-older-link' title='Eldre innlegg'>Eldre innlegg</a>
+</span>
+<a class='home-link' href='http://www.melposen.blogspot.no/'>Start</a>
+</div>
+<div class='clear'></div>
+<div class='post-feeds'>
+<div class='feed-links'>
+Abonner på:
+<a class='feed-link' href='http://melposen.blogspot.com/feeds/4645670632029404989/comments/default' target='_blank' type='application/atom+xml'>Legg inn kommentarer (Atom)</a>
+</div>
+</div>
+<script type="text/javascript">window.___gcfg = {'lang': 'no'};</script>
+</div></div>
+</div>
+</div>
+<div class='column-left-outer'>
+<div class='column-left-inner'>
+<aside>
+</aside>
+</div>
+</div>
+<div class='column-right-outer'>
+<div class='column-right-inner'>
+<aside>
+<div class='sidebar section' id='sidebar-right-1'><div class='widget Profile' id='Profile1'>
+<h2>Om meg</h2>
+<div class='widget-content'>
+<a href='http://www.blogger.com/profile/03297498913659149933'><img alt='Mitt foto' class='profile-img' height='60' src='http://2.bp.blogspot.com/-h--fALO9bmw/TrEhF5izc8I/AAAAAAAAAW4/yAhIgIauGTA/s220/belocka-i-oreh-v-zzubah.jpg' width='80'/></a>
+<dl class='profile-datablock'>
+<dt class='profile-data'>
+<a class='profile-name-link g-profile' href='http://www.blogger.com/profile/03297498913659149933' rel='author' style='background-image: url(http://www.blogger.com/img/logo-16.png);'>
+Renate
+</a>
+</dt>
+<dd class='profile-textblock'>Hei! Jeg heter Renate og er 22 år, og kommer fra sørlandsbyen Farsund.
+Takk for at du stakk innom, legg gjerne igjen en kommentar!
+Kontakt: renate1907@hotmail.com</dd>
+</dl>
+<a class='profile-link' href='http://www.blogger.com/profile/03297498913659149933' rel='author'>Vis hele profilen min</a>
+<div class='clear'></div>
+<span class='widget-item-control'>
+<span class='item-control blog-admin'>
+<a class='quickedit' href='http://www.blogger.com/rearrange?blogID=113083922500631895&widgetType=Profile&widgetId=Profile1&action=editWidget&sectionId=sidebar-right-1' onclick='return _WidgetManager._PopupConfig(document.getElementById("Profile1"));' target='configProfile1' title='Rediger'>
+<img alt='' height='18' src='http://img1.blogblog.com/img/icon18_wrench_allbkg.png' width='18'/>
+</a>
+</span>
+</span>
+<div class='clear'></div>
+</div>
+</div><div class='widget Image' id='Image5'>
+<div class='widget-content'>
+<a href='http://melposen.blogspot.no/2013/01/rocky-road-brownies.html'>
+<img alt='' height='201' id='Image5_img' src='http://3.bp.blogspot.com/-CALgjDMsy_M/URTVNdS7Y2I/AAAAAAAAA6k/KzBZAVc4S1A/s300/DSC_0472.JPG' width='300'/>
+</a>
+<br/>
+<span class='caption'>Rocky road brownies</span>
+</div>
+<div class='clear'></div>
+<span class='widget-item-control'>
+<span class='item-control blog-admin'>
+<a class='quickedit' href='http://www.blogger.com/rearrange?blogID=113083922500631895&widgetType=Image&widgetId=Image5&action=editWidget&sectionId=sidebar-right-1' onclick='return _WidgetManager._PopupConfig(document.getElementById("Image5"));' target='configImage5' title='Rediger'>
+<img alt='' height='18' src='http://img1.blogblog.com/img/icon18_wrench_allbkg.png' width='18'/>
+</a>
+</span>
+</span>
+<div class='clear'></div>
+</div><div class='widget Image' id='Image7'>
+<div class='widget-content'>
+<a href='http://melposen.blogspot.no/2011/11/bryllupskake-del-2.html'>
+<img alt='' height='201' id='Image7_img' src='http://2.bp.blogspot.com/-X4KOM0FhiqU/URTVgeY6gUI/AAAAAAAAA68/W7WrTuOaGD0/s300/DSC_1391.JPG' width='300'/>
+</a>
+<br/>
+<span class='caption'>Ostekake med jordbær og druer</span>
+</div>
+<div class='clear'></div>
+<span class='widget-item-control'>
+<span class='item-control blog-admin'>
+<a class='quickedit' href='http://www.blogger.com/rearrange?blogID=113083922500631895&widgetType=Image&widgetId=Image7&action=editWidget&sectionId=sidebar-right-1' onclick='return _WidgetManager._PopupConfig(document.getElementById("Image7"));' target='configImage7' title='Rediger'>
+<img alt='' height='18' src='http://img1.blogblog.com/img/icon18_wrench_allbkg.png' width='18'/>
+</a>
+</span>
+</span>
+<div class='clear'></div>
+</div><div class='widget Image' id='Image6'>
+<div class='widget-content'>
+<a href='http://melposen.blogspot.no/2011/10/freia-kake.html'>
+<img alt='' height='215' id='Image6_img' src='http://3.bp.blogspot.com/-g7lgbe4JzWc/URTVXcrry8I/AAAAAAAAA6w/nXoDdRnxmPs/s300/kake3.JPG' width='300'/>
+</a>
+<br/>
+<span class='caption'>Dronningsens drøm sjokoladekake</span>
+</div>
+<div class='clear'></div>
+<span class='widget-item-control'>
+<span class='item-control blog-admin'>
+<a class='quickedit' href='http://www.blogger.com/rearrange?blogID=113083922500631895&widgetType=Image&widgetId=Image6&action=editWidget&sectionId=sidebar-right-1' onclick='return _WidgetManager._PopupConfig(document.getElementById("Image6"));' target='configImage6' title='Rediger'>
+<img alt='' height='18' src='http://img1.blogblog.com/img/icon18_wrench_allbkg.png' width='18'/>
+</a>
+</span>
+</span>
+<div class='clear'></div>
+</div><div class='widget CustomSearch' id='CustomSearch1'>
+<h2 class='title'>Søk i denne bloggen</h2>
+<div class='widget-content'>
+<div id='CustomSearch1_form'>
+<span class='cse-status'>Laster inn...</span>
+</div>
+</div>
+<style type='text/css'>
+      #uds-searchControl .gs-result .gs-title,
+      #uds-searchControl .gs-result .gs-title *,
+      #uds-searchControl .gsc-results .gsc-trailing-more-results,
+      #uds-searchControl .gsc-results .gsc-trailing-more-results * {
+        color:#1fbb0c;
+      }
+
+      #uds-searchControl .gs-result .gs-title a:visited,
+      #uds-searchControl .gs-result .gs-title a:visited * {
+        color:#1fbb0c;
+      }
+
+      #uds-searchControl .gs-relativePublishedDate,
+      #uds-searchControl .gs-publishedDate {
+        color: #ca88b3;
+      }
+
+      #uds-searchControl .gs-result a.gs-visibleUrl,
+      #uds-searchControl .gs-result .gs-visibleUrl {
+        color: #1fbb0c;
+      }
+
+      #uds-searchControl .gsc-results {
+        border-color: #908d6a;
+        background-color: #fefdfa;
+      }
+
+      #uds-searchControl .gsc-tabhActive {
+        border-color: #908d6a;
+        border-top-color: #908d6a;
+        background-color: #fefdfa;
+        color: #333333;
+      }
+
+      #uds-searchControl .gsc-tabhInactive {
+        border-color: #908d6a;
+        background-color: transparent;
+        color: #1fbb0c;
+      }
+
+      #uds-searchClearResults {
+        border-color: #908d6a;
+      }
+
+      #uds-searchClearResults:hover {
+        border-color: #908d6a;
+      }
+
+      #uds-searchControl .gsc-cursor-page {
+        color: #1fbb0c;
+      }
+
+      #uds-searchControl .gsc-cursor-current-page {
+        color: #333333;
+      }
+    </style>
+<div class='clear'></div>
+<span class='widget-item-control'>
+<span class='item-control blog-admin'>
+<a class='quickedit' href='http://www.blogger.com/rearrange?blogID=113083922500631895&widgetType=CustomSearch&widgetId=CustomSearch1&action=editWidget&sectionId=sidebar-right-1' onclick='return _WidgetManager._PopupConfig(document.getElementById("CustomSearch1"));' target='configCustomSearch1' title='Rediger'>
+<img alt='' height='18' src='http://img1.blogblog.com/img/icon18_wrench_allbkg.png' width='18'/>
+</a>
+</span>
+</span>
+<div class='clear'></div>
+</div><div class='widget Label' id='Label1'>
+<h2>Etiketter</h2>
+<div class='widget-content cloud-label-widget-content'>
+<span class='label-size label-size-1'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Boller'>Boller</a>
+<span class='label-count' dir='ltr'>(1)</span>
+</span>
+<span class='label-size label-size-1'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Brownies'>Brownies</a>
+<span class='label-count' dir='ltr'>(1)</span>
+</span>
+<span class='label-size label-size-3'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Bryllupskaker'>Bryllupskaker</a>
+<span class='label-count' dir='ltr'>(5)</span>
+</span>
+<span class='label-size label-size-1'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Br%C3%B8d'>Brød</a>
+<span class='label-count' dir='ltr'>(1)</span>
+</span>
+<span class='label-size label-size-3'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Bursdagskaker'>Bursdagskaker</a>
+<span class='label-count' dir='ltr'>(4)</span>
+</span>
+<span class='label-size label-size-4'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Dessert'>Dessert</a>
+<span class='label-count' dir='ltr'>(8)</span>
+</span>
+<span class='label-size label-size-1'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Dressing'>Dressing</a>
+<span class='label-count' dir='ltr'>(1)</span>
+</span>
+<span class='label-size label-size-2'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/D%C3%A5p'>Dåp</a>
+<span class='label-count' dir='ltr'>(2)</span>
+</span>
+<span class='label-size label-size-3'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Festmiddag'>Festmiddag</a>
+<span class='label-count' dir='ltr'>(4)</span>
+</span>
+<span class='label-size label-size-1'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Fisk'>Fisk</a>
+<span class='label-count' dir='ltr'>(1)</span>
+</span>
+<span class='label-size label-size-2'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Formkaker'>Formkaker</a>
+<span class='label-count' dir='ltr'>(2)</span>
+</span>
+<span class='label-size label-size-3'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Fromasjkaker'>Fromasjkaker</a>
+<span class='label-count' dir='ltr'>(3)</span>
+</span>
+<span class='label-size label-size-4'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Frukt%20og%20B%C3%A6r'>Frukt og Bær</a>
+<span class='label-count' dir='ltr'>(9)</span>
+</span>
+<span class='label-size label-size-3'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Gj%C3%A6rbakst'>Gjærbakst</a>
+<span class='label-count' dir='ltr'>(3)</span>
+</span>
+<span class='label-size label-size-2'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Gr%C3%B8nnsaker'>Grønnsaker</a>
+<span class='label-count' dir='ltr'>(2)</span>
+</span>
+<span class='label-size label-size-1'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Icing'>Icing</a>
+<span class='label-count' dir='ltr'>(1)</span>
+</span>
+<span class='label-size label-size-3'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Iskake'>Iskake</a>
+<span class='label-count' dir='ltr'>(3)</span>
+</span>
+<span class='label-size label-size-1'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Iskrem'>Iskrem</a>
+<span class='label-count' dir='ltr'>(1)</span>
+</span>
+<span class='label-size label-size-1'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Jucie'>Jucie</a>
+<span class='label-count' dir='ltr'>(1)</span>
+</span>
+<span class='label-size label-size-3'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Julebakst'>Julebakst</a>
+<span class='label-count' dir='ltr'>(5)</span>
+</span>
+<span class='label-size label-size-1'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Julemat'>Julemat</a>
+<span class='label-count' dir='ltr'>(1)</span>
+</span>
+<span class='label-size label-size-4'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Kaker%20med%20frukt%2Fb%C3%A6r'>Kaker med frukt/bær</a>
+<span class='label-count' dir='ltr'>(7)</span>
+</span>
+<span class='label-size label-size-2'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Konfekt'>Konfekt</a>
+<span class='label-count' dir='ltr'>(2)</span>
+</span>
+<span class='label-size label-size-1'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Konfirmasjon'>Konfirmasjon</a>
+<span class='label-count' dir='ltr'>(1)</span>
+</span>
+<span class='label-size label-size-3'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Kosemat'>Kosemat</a>
+<span class='label-count' dir='ltr'>(4)</span>
+</span>
+<span class='label-size label-size-1'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Lapper'>Lapper</a>
+<span class='label-count' dir='ltr'>(1)</span>
+</span>
+<span class='label-size label-size-2'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/LCHF'>LCHF</a>
+<span class='label-count' dir='ltr'>(2)</span>
+</span>
+<span class='label-size label-size-2'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Makroner'>Makroner</a>
+<span class='label-count' dir='ltr'>(2)</span>
+</span>
+<span class='label-size label-size-2'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Marengs'>Marengs</a>
+<span class='label-count' dir='ltr'>(2)</span>
+</span>
+<span class='label-size label-size-3'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Marsipankaker'>Marsipankaker</a>
+<span class='label-count' dir='ltr'>(5)</span>
+</span>
+<span class='label-size label-size-5'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Middagstips'>Middagstips</a>
+<span class='label-count' dir='ltr'>(16)</span>
+</span>
+<span class='label-size label-size-3'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Muffins'>Muffins</a>
+<span class='label-count' dir='ltr'>(4)</span>
+</span>
+<span class='label-size label-size-2'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Oreo%20oppskrifter'>Oreo oppskrifter</a>
+<span class='label-count' dir='ltr'>(2)</span>
+</span>
+<span class='label-size label-size-3'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Ostekaker'>Ostekaker</a>
+<span class='label-count' dir='ltr'>(3)</span>
+</span>
+<span class='label-size label-size-1'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Pepperkakehus'>Pepperkakehus</a>
+<span class='label-count' dir='ltr'>(1)</span>
+</span>
+<span class='label-size label-size-1'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Pizza'>Pizza</a>
+<span class='label-count' dir='ltr'>(1)</span>
+</span>
+<span class='label-size label-size-1'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Rullekaker'>Rullekaker</a>
+<span class='label-count' dir='ltr'>(1)</span>
+</span>
+<span class='label-size label-size-2'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Saft'>Saft</a>
+<span class='label-count' dir='ltr'>(2)</span>
+</span>
+<span class='label-size label-size-1'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Salater'>Salater</a>
+<span class='label-count' dir='ltr'>(1)</span>
+</span>
+<span class='label-size label-size-5'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Sjokoladekaker'>Sjokoladekaker</a>
+<span class='label-count' dir='ltr'>(13)</span>
+</span>
+<span class='label-size label-size-1'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Smoothie'>Smoothie</a>
+<span class='label-count' dir='ltr'>(1)</span>
+</span>
+<span class='label-size label-size-3'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Sm%C3%A5kaker'>Småkaker</a>
+<span class='label-count' dir='ltr'>(3)</span>
+</span>
+<span class='label-size label-size-2'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Sorbet'>Sorbet</a>
+<span class='label-count' dir='ltr'>(2)</span>
+</span>
+<span class='label-size label-size-3'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Supper'>Supper</a>
+<span class='label-count' dir='ltr'>(3)</span>
+</span>
+<span class='label-size label-size-1'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Trinn%20for%20trinn'>Trinn for trinn</a>
+<span class='label-count' dir='ltr'>(1)</span>
+</span>
+<span class='label-size label-size-1'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Vaffler'>Vaffler</a>
+<span class='label-count' dir='ltr'>(1)</span>
+</span>
+<span class='label-size label-size-1'>
+<a dir='ltr' href='http://www.melposen.blogspot.no/search/label/Vafler'>Vafler</a>
+<span class='label-count' dir='ltr'>(1)</span>
+</span>
+<div class='clear'></div>
+<span class='widget-item-control'>
+<span class='item-control blog-admin'>
+<a class='quickedit' href='http://www.blogger.com/rearrange?blogID=113083922500631895&widgetType=Label&widgetId=Label1&action=editWidget&sectionId=sidebar-right-1' onclick='return _WidgetManager._PopupConfig(document.getElementById("Label1"));' target='configLabel1' title='Rediger'>
+<img alt='' height='18' src='http://img1.blogblog.com/img/icon18_wrench_allbkg.png' width='18'/>
+</a>
+</span>
+</span>
+<div class='clear'></div>
+</div>
+</div><div class='widget Image' id='Image4'>
+<div class='widget-content'>
+<a href='http://melposen.blogspot.no/2012/08/jeg-har-prvd-perfeksonjere-disse.html'>
+<img alt='' height='167' id='Image4_img' src='http://3.bp.blogspot.com/-NG1s_vgWQts/URTUc6GR91I/AAAAAAAAA6Y/dJYSYBWL61Y/s250/DSC_2334.JPG' width='250'/>
+</a>
+<br/>
+<span class='caption'>Mandelmakroner</span>
+</div>
+<div class='clear'></div>
+<span class='widget-item-control'>
+<span class='item-control blog-admin'>
+<a class='quickedit' href='http://www.blogger.com/rearrange?blogID=113083922500631895&widgetType=Image&widgetId=Image4&action=editWidget&sectionId=sidebar-right-1' onclick='return _WidgetManager._PopupConfig(document.getElementById("Image4"));' target='configImage4' title='Rediger'>
+<img alt='' height='18' src='http://img1.blogblog.com/img/icon18_wrench_allbkg.png' width='18'/>
+</a>
+</span>
+</span>
+<div class='clear'></div>
+</div><div class='widget Image' id='Image3'>
+<div class='widget-content'>
+<a href='http://melposen.blogspot.no/2012/05/17-mai-kake-nigellas-guinness-kake.html'>
+<img alt='' height='167' id='Image3_img' src='http://3.bp.blogspot.com/-dB0V8bwFEIo/URTUUqkW0AI/AAAAAAAAA6M/-jy72QhNavY/s250/DSC_1818.JPG' width='250'/>
+</a>
+<br/>
+<span class='caption'>Guiness sjokoladekake</span>
+</div>
+<div class='clear'></div>
+<span class='widget-item-control'>
+<span class='item-control blog-admin'>
+<a class='quickedit' href='http://www.blogger.com/rearrange?blogID=113083922500631895&widgetType=Image&widgetId=Image3&action=editWidget&sectionId=sidebar-right-1' onclick='return _WidgetManager._PopupConfig(document.getElementById("Image3"));' target='configImage3' title='Rediger'>
+<img alt='' height='18' src='http://img1.blogblog.com/img/icon18_wrench_allbkg.png' width='18'/>
+</a>
+</span>
+</span>
+<div class='clear'></div>
+</div><div class='widget Image' id='Image1'>
+<div class='widget-content'>
+<a href='http://melposen.blogspot.no/2012/08/bryllupshjerte.html'>
+<img alt='' height='167' id='Image1_img' src='http://1.bp.blogspot.com/-_3CY4n85iGg/URTUIIxiNZI/AAAAAAAAA6A/fw0hRjA_yxw/s250/DSC_0142.JPG' width='250'/>
+</a>
+<br/>
+<span class='caption'>Bløtkake med MMF</span>
+</div>
+<div class='clear'></div>
+<span class='widget-item-control'>
+<span class='item-control blog-admin'>
+<a class='quickedit' href='http://www.blogger.com/rearrange?blogID=113083922500631895&widgetType=Image&widgetId=Image1&action=editWidget&sectionId=sidebar-right-1' onclick='return _WidgetManager._PopupConfig(document.getElementById("Image1"));' target='configImage1' title='Rediger'>
+<img alt='' height='18' src='http://img1.blogblog.com/img/icon18_wrench_allbkg.png' width='18'/>
+</a>
+</span>
+</span>
+<div class='clear'></div>
+</div><div class='widget Image' id='Image2'>
+<div class='widget-content'>
+<a href='http://matbloggsentralen.com/'>
+<img alt='' height='161' id='Image2_img' src='http://3.bp.blogspot.com/-H0-Dsofw9BE/Tso6RMESkCI/AAAAAAAAAYQ/wJlgYWzUgqw/s200/116392785119183.728.1209353033.png' width='120'/>
+</a>
+<br/>
+</div>
+<div class='clear'></div>
+<span class='widget-item-control'>
+<span class='item-control blog-admin'>
+<a class='quickedit' href='http://www.blogger.com/rearrange?blogID=113083922500631895&widgetType=Image&widgetId=Image2&action=editWidget&sectionId=sidebar-right-1' onclick='return _WidgetManager._PopupConfig(document.getElementById("Image2"));' target='configImage2' title='Rediger'>
+<img alt='' height='18' src='http://img1.blogblog.com/img/icon18_wrench_allbkg.png' width='18'/>
+</a>
+</span>
+</span>
+<div class='clear'></div>
+</div><div class='widget BlogList' id='BlogList1'>
+<h2 class='title'>Min bloggliste</h2>
+<div class='widget-content'>
+<div class='blog-list-container' id='BlogList1_container'>
+<ul id='BlogList1_blogs'>
+</ul>
+<div class='clear'></div>
+<span class='widget-item-control'>
+<span class='item-control blog-admin'>
+<a class='quickedit' href='http://www.blogger.com/rearrange?blogID=113083922500631895&widgetType=BlogList&widgetId=BlogList1&action=editWidget&sectionId=sidebar-right-1' onclick='return _WidgetManager._PopupConfig(document.getElementById("BlogList1"));' target='configBlogList1' title='Rediger'>
+<img alt='' height='18' src='http://img1.blogblog.com/img/icon18_wrench_allbkg.png' width='18'/>
+</a>
+</span>
+</span>
+<div class='clear'></div>
+</div>
+</div>
+</div><div class='widget Followers' id='Followers1'>
+<h2 class='title'>Følgere</h2>
+<div class='widget-content'>
+<div id='Followers1-wrapper'>
+<div style='margin-right:2px;'>
+<script type="text/javascript">
+        if (!window.google || !google.friendconnect) {
+          document.write('<script type="text/javascript"' +
+              'src="http://www.google.com/friendconnect/script/friendconnect.js">' +
+              '</scr' + 'ipt>');
+        }
+      </script>
+<script type="text/javascript">
+      if (!window.registeredBloggerCallbacks) {
+        window.registeredBloggerCallbacks = true;
+
+
+
+
+        gadgets.rpc.register('requestReload', function() {
+          document.location.reload();
+        });
+
+
+        gadgets.rpc.register('requestSignOut', function(siteId) {
+
+          google.friendconnect.container.openSocialSiteId = siteId;
+          google.friendconnect.requestSignOut();
+        });
+      }
+    </script>
+<script type="text/javascript">
+
+    function registerGetBlogUrls() {
+      gadgets.rpc.register('getBlogUrls', function() {
+        var holder = {};
+
+
+
+
+              holder.currentPost = "http://www.blogger.com/feeds/113083922500631895/posts/default/4645670632029404989";
+
+
+
+              holder.currentComments = "http://www.blogger.com/feeds/113083922500631895/4645670632029404989/comments/default";
+
+            holder.currentPostUrl = "";
+            holder.currentPostId = 4645670632029404989
+
+
+
+            holder.postFeed = "http://www.blogger.com/feeds/113083922500631895/posts/default";
+
+
+
+            holder.commentFeed = "http://www.blogger.com/feeds/113083922500631895/comments/default";
+
+          holder.currentBlogUrl = "http://melposen.blogspot.com/";
+          holder.currentBlogId = "113083922500631895";
+
+        return holder;
+      });
+    }
+  </script>
+<script type="text/javascript">
+  if (!window.registeredCommonBloggerCallbacks) {
+    window.registeredCommonBloggerCallbacks = true;
+
+    gadgets.rpc.register('resize_iframe', function(height) {
+      var el = document.getElementById(this['f']);
+      if (el) {
+        el.style.height = height + 'px';
+      }
+    });
+
+
+    gadgets.rpc.register('set_pref', function() {});
+
+    registerGetBlogUrls();
+  }
+  </script>
+<div id="div-1a7iitxk5upxu" style="width: 100%; "></div>
+<script type="text/javascript">
+    var skin = {};
+    skin['FACE_SIZE'] = '32';
+    skin['HEIGHT'] = "260";
+    skin['TITLE'] = "F\xf8lgere";
+    skin['BORDER_COLOR'] = "transparent";
+    skin['ENDCAP_BG_COLOR'] = "transparent";
+    skin['ENDCAP_TEXT_COLOR'] = "#333333";
+    skin['ENDCAP_LINK_COLOR'] = "#1fbb0c";
+    skin['ALTERNATE_BG_COLOR'] = "transparent";
+
+    skin['CONTENT_BG_COLOR'] = "transparent";
+    skin['CONTENT_LINK_COLOR'] = "#1fbb0c";
+    skin['CONTENT_TEXT_COLOR'] = "#333333";
+    skin['CONTENT_SECONDARY_LINK_COLOR'] = "#1fbb0c";
+    skin['CONTENT_SECONDARY_TEXT_COLOR'] = "#666666";
+    skin['CONTENT_HEADLINE_COLOR'] = "#ca88b3";
+    skin['FONT_FACE'] = "normal normal 18px Coming Soon";
+    google.friendconnect.container.setParentUrl("/");
+    google.friendconnect.container["renderMembersGadget"](
+    {id: "div-1a7iitxk5upxu",
+     height: 260,
+
+
+
+     site: "00084580039004564474",
+
+     locale: 'no' },
+     skin);
+  </script>
+</div>
+</div>
+<div class='clear'></div>
+<span class='widget-item-control'>
+<span class='item-control blog-admin'>
+<a class='quickedit' href='http://www.blogger.com/rearrange?blogID=113083922500631895&widgetType=Followers&widgetId=Followers1&action=editWidget&sectionId=sidebar-right-1' onclick='return _WidgetManager._PopupConfig(document.getElementById("Followers1"));' target='configFollowers1' title='Rediger'>
+<img alt='' height='18' src='http://img1.blogblog.com/img/icon18_wrench_allbkg.png' width='18'/>
+</a>
+</span>
+</span>
+<div class='clear'></div>
+</div>
+</div><div class='widget BlogArchive' id='BlogArchive1'>
+<h2>Bloggarkiv</h2>
+<div class='widget-content'>
+<div class='ltr' id='ArchiveList'>
+<div id='BlogArchive1_ArchiveList'>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/search?updated-min=2013-01-01T00:00:00-08:00&amp;updated-max=2014-01-01T00:00:00-08:00&amp;max-results=26'>2013</a>
+<span class='post-count' dir='ltr'>(26)</span>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2013_12_01_archive.html'>desember</a>
+<span class='post-count' dir='ltr'>(2)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2013_08_01_archive.html'>august</a>
+<span class='post-count' dir='ltr'>(5)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2013_07_01_archive.html'>juli</a>
+<span class='post-count' dir='ltr'>(2)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2013_06_01_archive.html'>juni</a>
+<span class='post-count' dir='ltr'>(1)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2013_05_01_archive.html'>mai</a>
+<span class='post-count' dir='ltr'>(3)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2013_04_01_archive.html'>april</a>
+<span class='post-count' dir='ltr'>(4)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2013_03_01_archive.html'>mars</a>
+<span class='post-count' dir='ltr'>(3)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2013_02_01_archive.html'>februar</a>
+<span class='post-count' dir='ltr'>(2)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2013_01_01_archive.html'>januar</a>
+<span class='post-count' dir='ltr'>(4)</span>
+</li>
+</ul>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate expanded'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/search?updated-min=2012-01-01T00:00:00-08:00&amp;updated-max=2013-01-01T00:00:00-08:00&amp;max-results=44'>2012</a>
+<span class='post-count' dir='ltr'>(44)</span>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2012_12_01_archive.html'>desember</a>
+<span class='post-count' dir='ltr'>(4)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2012_11_01_archive.html'>november</a>
+<span class='post-count' dir='ltr'>(3)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2012_10_01_archive.html'>oktober</a>
+<span class='post-count' dir='ltr'>(5)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2012_09_01_archive.html'>september</a>
+<span class='post-count' dir='ltr'>(2)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2012_08_01_archive.html'>august</a>
+<span class='post-count' dir='ltr'>(3)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2012_07_01_archive.html'>juli</a>
+<span class='post-count' dir='ltr'>(3)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2012_06_01_archive.html'>juni</a>
+<span class='post-count' dir='ltr'>(1)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2012_05_01_archive.html'>mai</a>
+<span class='post-count' dir='ltr'>(8)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2012_04_01_archive.html'>april</a>
+<span class='post-count' dir='ltr'>(1)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2012_03_01_archive.html'>mars</a>
+<span class='post-count' dir='ltr'>(4)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate expanded'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2012_02_01_archive.html'>februar</a>
+<span class='post-count' dir='ltr'>(4)</span>
+<ul class='posts'>
+<li><a href='http://www.melposen.blogspot.no/2012/02/saftsentrifuge.html'>SAFTSENTRIFUGE</a></li>
+<li><a href='http://www.melposen.blogspot.no/2012/02/hvordan-lage-en-saftig-kalkun-for.html'>Hvordan lage en saftig kalkun, for amatører</a></li>
+<li><a href='http://www.melposen.blogspot.no/2012/02/charlotte-russe.html'>Charlotte Russe</a></li>
+<li><a href='http://www.melposen.blogspot.no/2012/02/eple-blabr-kake.html'>Eple / blåbær kake</a></li>
+</ul>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2012_01_01_archive.html'>januar</a>
+<span class='post-count' dir='ltr'>(6)</span>
+</li>
+</ul>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/search?updated-min=2011-01-01T00:00:00-08:00&amp;updated-max=2012-01-01T00:00:00-08:00&amp;max-results=33'>2011</a>
+<span class='post-count' dir='ltr'>(33)</span>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2011_12_01_archive.html'>desember</a>
+<span class='post-count' dir='ltr'>(4)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2011_11_01_archive.html'>november</a>
+<span class='post-count' dir='ltr'>(9)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2011_10_01_archive.html'>oktober</a>
+<span class='post-count' dir='ltr'>(6)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2011_09_01_archive.html'>september</a>
+<span class='post-count' dir='ltr'>(4)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2011_04_01_archive.html'>april</a>
+<span class='post-count' dir='ltr'>(2)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2011_03_01_archive.html'>mars</a>
+<span class='post-count' dir='ltr'>(1)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2011_02_01_archive.html'>februar</a>
+<span class='post-count' dir='ltr'>(6)</span>
+</li>
+</ul>
+<ul class='hierarchy'>
+<li class='archivedate collapsed'>
+<a class='toggle'>
+<span class='zippy'></span>
+</a>
+<a class='post-count-link' href='http://www.melposen.blogspot.no/2011_01_01_archive.html'>januar</a>
+<span class='post-count' dir='ltr'>(1)</span>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+</div>
+<div class='clear'></div>
+<span class='widget-item-control'>
+<span class='item-control blog-admin'>
+<a class='quickedit' href='http://www.blogger.com/rearrange?blogID=113083922500631895&widgetType=BlogArchive&widgetId=BlogArchive1&action=editWidget&sectionId=sidebar-right-1' onclick='return _WidgetManager._PopupConfig(document.getElementById("BlogArchive1"));' target='configBlogArchive1' title='Rediger'>
+<img alt='' height='18' src='http://img1.blogblog.com/img/icon18_wrench_allbkg.png' width='18'/>
+</a>
+</span>
+</span>
+<div class='clear'></div>
+</div>
+</div><div class='widget PageList' id='PageList1'>
+<h2>Sider</h2>
+<div class='widget-content'>
+<ul>
+<li><a href='http://www.melposen.blogspot.no/'>Start</a></li>
+</ul>
+<div class='clear'></div>
+<span class='widget-item-control'>
+<span class='item-control blog-admin'>
+<a class='quickedit' href='http://www.blogger.com/rearrange?blogID=113083922500631895&widgetType=PageList&widgetId=PageList1&action=editWidget&sectionId=sidebar-right-1' onclick='return _WidgetManager._PopupConfig(document.getElementById("PageList1"));' target='configPageList1' title='Rediger'>
+<img alt='' height='18' src='http://img1.blogblog.com/img/icon18_wrench_allbkg.png' width='18'/>
+</a>
+</span>
+</span>
+<div class='clear'></div>
+</div>
+</div></div>
+</aside>
+</div>
+</div>
+</div>
+<div style='clear: both'></div>
+<!-- columns -->
+</div>
+<!-- main -->
+</div>
+</div>
+<div class='main-cap-bottom cap-bottom'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+</div>
+<footer>
+<div class='footer-outer'>
+<div class='footer-cap-top cap-top'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+<div class='fauxborder-left footer-fauxborder-left'>
+<div class='fauxborder-right footer-fauxborder-right'></div>
+<div class='region-inner footer-inner'>
+<div class='foot section' id='footer-1'></div>
+<table border='0' cellpadding='0' cellspacing='0' class='section-columns columns-2'>
+<tbody>
+<tr>
+<td class='first columns-cell'>
+<div class='foot section' id='footer-2-1'></div>
+</td>
+<td class='columns-cell'>
+<div class='foot section' id='footer-2-2'></div>
+</td>
+</tr>
+</tbody>
+</table>
+<!-- outside of the include in order to lock Attribution widget -->
+<div class='foot section' id='footer-3'><div class='widget Attribution' id='Attribution1'>
+<div class='widget-content' style='text-align: center;'>
+Awesome Inc.-mal. Malbilder av <a href='http://jasonmorrow.etsy.com/' target='_blank'>Jason Morrow</a>. Drevet av <a href='http://www.blogger.com/' target='_blank'>Blogger</a>.
+</div>
+<div class='clear'></div>
+<span class='widget-item-control'>
+<span class='item-control blog-admin'>
+<a class='quickedit' href='http://www.blogger.com/rearrange?blogID=113083922500631895&widgetType=Attribution&widgetId=Attribution1&action=editWidget&sectionId=footer-3' onclick='return _WidgetManager._PopupConfig(document.getElementById("Attribution1"));' target='configAttribution1' title='Rediger'>
+<img alt='' height='18' src='http://img1.blogblog.com/img/icon18_wrench_allbkg.png' width='18'/>
+</a>
+</span>
+</span>
+<div class='clear'></div>
+</div></div>
+</div>
+</div>
+<div class='footer-cap-bottom cap-bottom'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+</div>
+</footer>
+<!-- content -->
+</div>
+</div>
+<div class='content-cap-bottom cap-bottom'>
+<div class='cap-left'></div>
+<div class='cap-right'></div>
+</div>
+</div>
+</div>
+<script type='text/javascript'>
+    window.setTimeout(function() {
+        document.body.className = document.body.className.replace('loading', '');
+      }, 10);
+  </script>
+<script type="text/javascript">
+if (window.jstiming) window.jstiming.load.tick('widgetJsBefore');
+</script><script type="text/javascript" src="https://www.blogger.com/static/v1/widgets/645744161-widgets.js"></script>
+<script type="text/javascript" src="https://apis.google.com/js/plusone.js"></script>
+<script type="text/javascript" src="http://www.google.com/jsapi"></script>
+<script type='text/javascript'>
+if (typeof(BLOG_attachCsiOnload) != 'undefined' && BLOG_attachCsiOnload != null) { window['blogger_templates_experiment_id'] = "templatesV2";window['blogger_blog_id'] = '113083922500631895';BLOG_attachCsiOnload('item_'); }_WidgetManager._Init('//www.blogger.com/rearrange?blogID\x3d113083922500631895','//www.melposen.blogspot.no/2012/02/saftsentrifuge.html','113083922500631895');
+_WidgetManager._SetDataContext([{'name': 'blog', 'data': {'blogId': '113083922500631895', 'bloggerUrl': 'http://www.blogger.com/', 'title': 'melposen.blogspot.no', 'pageType': 'item', 'url': 'http://www.melposen.blogspot.no/2012/02/saftsentrifuge.html', 'canonicalUrl': 'http://melposen.blogspot.com/2012/02/saftsentrifuge.html', 'canonicalHomepageUrl': 'http://melposen.blogspot.com/', 'homepageUrl': 'http://www.melposen.blogspot.no/', 'blogspotFaviconUrl': 'http://www.melposen.blogspot.no/favicon.ico', 'enabledCommentProfileImages': true, 'adultContent': false, 'disableAdSenseWidget': false, 'analyticsAccountNumber': '', 'searchLabel': '', 'searchQuery': '', 'pageName': 'SAFTSENTRIFUGE', 'pageTitle': 'melposen.blogspot.no: SAFTSENTRIFUGE', 'encoding': 'UTF-8', 'locale': 'no', 'localeUnderscoreDelimited': 'no', 'isPrivate': false, 'isMobile': false, 'isMobileRequest': false, 'mobileClass': '', 'isPrivateBlog': false, 'languageDirection': 'ltr', 'feedLinks': '\74link rel\75\42alternate\42 type\75\42application/atom+xml\42 title\75\42melposen.blogspot.no - Atom\42 href\75\42http://melposen.blogspot.com/feeds/posts/default\42 /\76\n\74link rel\75\42alternate\42 type\75\42application/rss+xml\42 title\75\42melposen.blogspot.no - RSS\42 href\75\42http://melposen.blogspot.com/feeds/posts/default?alt\75rss\42 /\76\n\74link rel\75\42service.post\42 type\75\42application/atom+xml\42 title\75\42melposen.blogspot.no - Atom\42 href\75\42http://www.blogger.com/feeds/113083922500631895/posts/default\42 /\76\n\n\74link rel\75\42alternate\42 type\75\42application/atom+xml\42 title\75\42melposen.blogspot.no - Atom\42 href\75\42http://melposen.blogspot.com/feeds/4645670632029404989/comments/default\42 /\76\n', 'meTag': '', 'openIdOpTag': '', 'postImageThumbnailUrl': 'http://4.bp.blogspot.com/-N9t4bbHB3gk/T0YHqMtaqsI/AAAAAAAAAeg/Ij0rS0lzf2E/s72-c/hr1861dvd.jpg', 'imageSrcTag': '\74link rel\75\42image_src\42 href\75\42http://4.bp.blogspot.com/-N9t4bbHB3gk/T0YHqMtaqsI/AAAAAAAAAeg/Ij0rS0lzf2E/s72-c/hr1861dvd.jpg\42 /\76\n', 'latencyHeadScript': '\74script type\75\42text/javascript\42\76(function() { var b\75window,f\75\42chrome\42,g\75\42jstiming\42,k\75\42tick\42;(function(){function d(a){this.t\75{};this.tick\75function(a,d,c){var e\75void 0!\75c?c:(new Date).getTime();this.t[a]\75[e,d];if(void 0\75\75c)try{b.console.timeStamp(\42CSI/\42+a)}catch(h){}};this[k](\42start\42,null,a)}var a;b.performance\46\46(a\75b.performance.timing);var n\75a?new d(a.responseStart):new d;b.jstiming\75{Timer:d,load:n};if(a){var c\75a.navigationStart,h\75a.responseStart;0\74c\46\46h\76\75c\46\46(b[g].srt\75h-c)}if(a){var e\75b[g].load;0\74c\46\46h\76\75c\46\46(e[k](\42_wtsrt\42,void 0,c),e[k](\42wtsrt_\42,\42_wtsrt\42,h),e[k](\42tbsd_\42,\42wtsrt_\42))}try{a\75null,\nb[f]\46\46b[f].csi\46\46(a\75Math.floor(b[f].csi().pageT),e\46\0460\74c\46\46(e[k](\42_tbnd\42,void 0,b[f].csi().startE),e[k](\42tbnd_\42,\42_tbnd\42,c))),null\75\75a\46\46b.gtbExternal\46\46(a\75b.gtbExternal.pageT()),null\75\75a\46\46b.external\46\46(a\75b.external.pageT,e\46\0460\74c\46\46(e[k](\42_tbnd\42,void 0,b.external.startE),e[k](\42tbnd_\42,\42_tbnd\42,c))),a\46\46(b[g].pt\75a)}catch(p){}})();b.tickAboveFold\75function(d){var a\0750;if(d.offsetParent){do a+\75d.offsetTop;while(d\75d.offsetParent)}d\75a;750\76\75d\46\46b[g].load[k](\42aft\42)};var l\75!1;function m(){l||(l\75!0,b[g].load[k](\42firstScrollTime\42))}b.addEventListener?b.addEventListener(\42scroll\42,m,!1):b.attachEvent(\42onscroll\42,m);\n })();\74/script\076', 'mobileHeadScript': '', 'ieCssRetrofitLinks': '\74!--[if IE]\76\74script type\75\42text/javascript\42 src\75\42https://www.blogger.com/static/v1/jsbin/850967532-ieretrofit.js\42\76\74/script\76\n\74![endif]--\076', 'view': '', 'dynamicViewsCommentsSrc': '//www.blogblog.com/dynamicviews/4224c15c4e7c9321/js/comments.js', 'dynamicViewsScriptSrc': '//www.blogblog.com/dynamicviews/d6f37bb30c327165', 'plusOneApiSrc': 'https://apis.google.com/js/plusone.js', 'testHtml5CssSrc': 'https://www.blogger.com/static/v1/widgets/2412321170-css_bundle_html5.css', 'sf': 'n', 'tf': ''}}, {'name': 'skin', 'data': {'vars': {'date_border_size': '0', 'description_font': 'normal bold 30px Coming Soon', 'body_background_gradient_cap': 'none', 'tabs_selected_background_color': '#63704b', 'footer_background_gradient': 'none', 'date_background': 'transparent none no-repeat scroll top left', 'widget_border_radius': '5px', 'post_title_text_color': '#ca88b3', 'widget_font': 'normal normal 14px Coming Soon', 'widget_link_hover_color': '#ca88b3', 'link_visited_color': '#1fbb0c', 'mobile_background_size': 'auto', 'date_space': '30px', 'post_title_font': 'normal normal 30px Coming Soon', 'tabs_text_color': '#1fbb0c', 'title_separator_border_size': '1px', 'header_background_gradient': 'none', 'widget_outer_background_gradient': 'none', 'widget_link_color': '#ca88b3', 'mobile_button_color': '#ffffff', 'widget_title_font': 'normal bold 14px Coming Soon', 'widget_outer_background_color': 'transparent', 'date_last_border_radius_bottom': '5px', 'widget_outer_margin_top': '0', 'shadow_spread': '20px', 'title_shadow_spread': '-1px', 'outer_shadow_spread': '0', 'keycolor': '#b3ca88', 'image_shadow_spread': '20px', 'image_background_color': '#ffffff', 'header_font': 'normal bold 30px Coming Soon', 'header_background_color': 'transparent', 'date_header_border_radius_top': '0', 'widget_border_bevel_color': 'transparent', 'tabs_border_color': '#908d6a', 'widget_background_color': '#fefdfa', 'date_position': 'static', 'post_first_padding_top': '0', 'tabs_background_gradient': 'url(http://www.blogblog.com/1kt/awesomeinc/tabs_gradient_light.png)', 'mobile_background_overlay': 'transparent none repeat scroll top left', 'date_border_color': '#1fbb0c', 'tabs_background_color': '#b3ca88', 'widget_link_visited_color': '#ca88b3', 'feed_text_color': '#333333', 'header_margin_top': '0', 'link_hover_color': '#1fbb0c', 'date_last_space_bottom': '20px', 'header_margin_bottom': '0', 'date_padding_bottom': '0', 'list_separator_border_size': '0', 'description_text_color': '#ca88b3', 'body_text_color': '#333333', 'header_padding_top': '22px', 'post_border_color': '#ca88b3', 'footer_text_color': '#333333', 'post_background_color': '#fefdfa', 'pager_space_top': '1em', 'endSide': 'right', 'startSide': 'left', 'body_background_size': 'auto', 'tabs_font': 'normal normal 13px Coming Soon', 'body_font': 'normal normal 18px Coming Soon', 'date_first_border_radius_top': '5px', 'date_header_position': '15px', 'date_text_color': '#ca88b3', 'widget_title_text_color': '#ca88b3', 'image_border_color': '#1fbb0c', 'separator_outdent': '0', 'widget_text_color': '#333333', 'link_color': '#1fbb0c', 'widget_border_color': '#aab123', 'pager_border_radius_top': '5px', 'feed_link_color': '#1fbb0c', 'header_text_color': '#333333', 'footer_background_color': '#fefdfa', 'widget_padding_top': '8px', 'feed_link_visited_color': '#1fbb0c', 'feed_link_hover_color': '#1fbb0c', 'date_font': 'normal normal 13px Coming Soon', 'tabs_selected_text_color': '#ffffff', 'date_side': 'right', 'post_border_bevel_color': '#ca88b3', 'widget_padding_side': '15px', 'body_background_color': '#fcfbf5', 'image_border_radius': '5px', 'pager_background_color': '#fefdfa', 'widget_alternate_text_color': '#666666'}, 'override': ''}}, {'name': 'view', 'data': {'classic': {'name': 'classic', 'url': '/?view\75classic'}, 'flipcard': {'name': 'flipcard', 'url': '/?view\75flipcard'}, 'magazine': {'name': 'magazine', 'url': '/?view\75magazine'}, 'mosaic': {'name': 'mosaic', 'url': '/?view\75mosaic'}, 'sidebar': {'name': 'sidebar', 'url': '/?view\75sidebar'}, 'snapshot': {'name': 'snapshot', 'url': '/?view\75snapshot'}, 'timeslide': {'name': 'timeslide', 'url': '/?view\75timeslide'}}}]);
+_WidgetManager._RegisterWidget('_ProfileView', new _WidgetInfo('Profile1', 'sidebar-right-1', null, document.getElementById('Profile1'), {}, 'displayModeFull'));
+_WidgetManager._RegisterWidget('_ImageView', new _WidgetInfo('Image5', 'sidebar-right-1', null, document.getElementById('Image5'), {'resize': false}, 'displayModeFull'));
+_WidgetManager._RegisterWidget('_ImageView', new _WidgetInfo('Image7', 'sidebar-right-1', null, document.getElementById('Image7'), {'resize': false}, 'displayModeFull'));
+_WidgetManager._RegisterWidget('_ImageView', new _WidgetInfo('Image6', 'sidebar-right-1', null, document.getElementById('Image6'), {'resize': false}, 'displayModeFull'));
+_WidgetManager._RegisterWidget('_CustomSearchView', new _WidgetInfo('CustomSearch1', 'sidebar-right-1', null, document.getElementById('CustomSearch1'), {'title': 'Søk i denne bloggen', 'includeBlog': true, 'includePostLinks': true, 'includeWeb': false, 'linkLists': [{'id': 'BlogList1', 'title': 'Min bloggliste'}], 'blogUrl': 'http://melposen.blogspot.com/', 'loadingMsg': 'Laster inn...', 'thisBlogMsg': 'Denne bloggen', 'linkedFromHereMsg': 'Koblet her', 'theWebMsg': 'Internett', 'backgroundColor': '#fefdfa', 'textColor': '#333333', 'dateColor': '#ca88b3', 'linkColor': '#1fbb0c', 'urlColor': '#1fbb0c', 'visitedLinkColor': '#1fbb0c', 'borderColor': '#908d6a', 'activeBorderColor': '#908d6a'}, 'displayModeFull'));
+_WidgetManager._RegisterWidget('_LabelView', new _WidgetInfo('Label1', 'sidebar-right-1', null, document.getElementById('Label1'), {}, 'displayModeFull'));
+_WidgetManager._RegisterWidget('_ImageView', new _WidgetInfo('Image4', 'sidebar-right-1', null, document.getElementById('Image4'), {'resize': false}, 'displayModeFull'));
+_WidgetManager._RegisterWidget('_ImageView', new _WidgetInfo('Image3', 'sidebar-right-1', null, document.getElementById('Image3'), {'resize': false}, 'displayModeFull'));
+_WidgetManager._RegisterWidget('_ImageView', new _WidgetInfo('Image1', 'sidebar-right-1', null, document.getElementById('Image1'), {'resize': false}, 'displayModeFull'));
+_WidgetManager._RegisterWidget('_ImageView', new _WidgetInfo('Image2', 'sidebar-right-1', null, document.getElementById('Image2'), {'resize': false}, 'displayModeFull'));
+_WidgetManager._RegisterWidget('_BlogListView', new _WidgetInfo('BlogList1', 'sidebar-right-1', null, document.getElementById('BlogList1'), {'numItemsToShow': 0, 'totalItems': 0}, 'displayModeFull'));
+_WidgetManager._RegisterWidget('_FollowersView', new _WidgetInfo('Followers1', 'sidebar-right-1', null, document.getElementById('Followers1'), {}, 'displayModeFull'));
+_WidgetManager._RegisterWidget('_BlogArchiveView', new _WidgetInfo('BlogArchive1', 'sidebar-right-1', null, document.getElementById('BlogArchive1'), {'languageDirection': 'ltr'}, 'displayModeFull'));
+_WidgetManager._RegisterWidget('_PageListView', new _WidgetInfo('PageList1', 'sidebar-right-1', null, document.getElementById('PageList1'), {'title': 'Sider', 'links': [{'href': 'http://www.melposen.blogspot.no/', 'title': 'Start', 'isCurrentPage': false}], 'mobile': false}, 'displayModeFull'));
+_WidgetManager._RegisterWidget('_AttributionView', new _WidgetInfo('Attribution1', 'footer-3', null, document.getElementById('Attribution1'), {'attribution': 'Awesome Inc.-mal. Malbilder av \74a href\75\47http://jasonmorrow.etsy.com\47 target\75\47_blank\47\76Jason Morrow\74/a\76. Drevet av \74a href\75\47http://www.blogger.com\47 target\75\47_blank\47\76Blogger\74/a\76.'}, 'displayModeFull'));
+_WidgetManager._RegisterWidget('_HeaderView', new _WidgetInfo('Header1', 'header', null, document.getElementById('Header1'), {}, 'displayModeFull'));
+_WidgetManager._RegisterWidget('_NavbarView', new _WidgetInfo('Navbar1', 'navbar', null, document.getElementById('Navbar1'), {}, 'displayModeFull'));
+_WidgetManager._RegisterWidget('_BlogView', new _WidgetInfo('Blog1', 'main', null, document.getElementById('Blog1'), {'cmtInteractionsEnabled': false, 'lightboxEnabled': true, 'lightboxModuleUrl': 'https://www.blogger.com/static/v1/jsbin/2363484432-lbx__no.js', 'lightboxCssUrl': 'https://www.blogger.com/static/v1/v-css/4138445517-lightbox_bundle.css'}, 'displayModeFull'));
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/tests/samples/utf-8-kanji.sample.html b/tests/samples/utf-8-kanji.sample.html
new file mode 100644
index 00000000..fa1b6527
--- /dev/null
+++ b/tests/samples/utf-8-kanji.sample.html
@@ -0,0 +1,60 @@
+<!DOCTYPE html>
+<html lang="ja">
+  <body>
+    <div>
+      <article>
+        <div>
+          草枕
+          夏目漱石
+
+
+          一
+
+          　山路を登りながら、こう考えた。
+          　智に働けば角が立つ。情に棹させば流される。意地を通せば窮屈だ。とかくに人の世は住みにくい。
+          　住みにくさが高じると、安い所へ引き越したくなる。どこへ越しても住みにくいと悟った時、詩が生れて、画が出来る。
+          　人の世を作ったものは神でもなければ鬼でもない。やはり向う三軒両隣りにちらちらするただの人である。ただの人が作った人の世が住みにくいからとて、越す国はあるまい。あれば人でなしの国へ行くばかりだ。人でなしの国は人の世よりもなお住みにくかろう。
+          　越す事のならぬ世が住みにくければ、住みにくい所をどれほどか、寛容て、束の間の命を、束の間でも住みよくせねばならぬ。ここに詩人という天職が出来て、ここに画家という使命が降る。あらゆる芸術の士は人の世を長閑にし、人の心を豊かにするが故に尊とい。
+          　住みにくき世から、住みにくき煩いを引き抜いて、ありがたい世界をまのあたりに写すのが詩である、画である。あるは音楽と彫刻である。こまかに云えば写さないでもよい。ただまのあたりに見れば、そこに詩も生き、歌も湧く。着想を紙に落さぬとも※(「王＋膠のつくり」、第3水準1-88-22)鏘の音は胸裏に起る。丹青は画架に向って塗抹せんでも五彩の絢爛は自から心眼に映る。ただおのが住む世を、かく観じ得て、霊台方寸のカメラに澆季溷濁の俗界を清くうららかに収め得れば足る。この故に無声の詩人には一句なく、無色の画家には尺※(「糸＋賺のつくり」、第3水準1-90-17)なきも、かく人世を観じ得るの点において、かく煩悩を解脱するの点において、かく清浄界に出入し得るの点において、またこの不同不二の乾坤を建立し得るの点において、我利私慾の覊絆を掃蕩するの点において、――千金の子よりも、万乗の君よりも、あらゆる俗界の寵児よりも幸福である。
+          　世に住むこと二十年にして、住むに甲斐ある世と知った。二十五年にして明暗は表裏のごとく、日のあたる所にはきっと影がさすと悟った。三十の今日はこう思うている。――喜びの深きとき憂いよいよ深く、楽みの大いなるほど苦しみも大きい。これを切り放そうとすると身が持てぬ。片づけようとすれば世が立たぬ。金は大事だ、大事なものが殖えれば寝る間も心配だろう。恋はうれしい、嬉しい恋が積もれば、恋をせぬ昔がかえって恋しかろ。閣僚の肩は数百万人の足を支えている。背中には重い天下がおぶさっている。うまい物も食わねば惜しい。少し食えば飽き足らぬ。存分食えばあとが不愉快だ。……
+          　余の考がここまで漂流して来た時に、余の右足は突然坐りのわるい角石の端を踏み損くなった。平衡を保つために、すわやと前に飛び出した左足が、仕損じの埋め合せをすると共に、余の腰は具合よく方三尺ほどな岩の上に卸りた。肩にかけた絵の具箱が腋の下から躍り出しただけで、幸いと何の事もなかった。
+          　立ち上がる時に向うを見ると、路から左の方にバケツを伏せたような峰が聳えている。杉か檜か分からないが根元から頂きまでことごとく蒼黒い中に、山桜が薄赤くだんだらに棚引いて、続ぎ目が確と見えぬくらい靄が濃い。少し手前に禿山が一つ、群をぬきんでて眉に逼る。禿げた側面は巨人の斧で削り去ったか、鋭どき平面をやけに谷の底に埋めている。天辺に一本見えるのは赤松だろう。枝の間の空さえ判然している。行く手は二丁ほどで切れているが、高い所から赤い毛布が動いて来るのを見ると、登ればあすこへ出るのだろう。路はすこぶる難義だ。
+          　土をならすだけならさほど手間も入るまいが、土の中には大きな石がある。土は平らにしても石は平らにならぬ。石は切り砕いても、岩は始末がつかぬ。掘崩した土の上に悠然と峙って、吾らのために道を譲る景色はない。向うで聞かぬ上は乗り越すか、廻らなければならん。巌のない所でさえ歩るきよくはない。左右が高くって、中心が窪んで、まるで一間幅を三角に穿って、その頂点が真中を貫いていると評してもよい。路を行くと云わんより川底を渉ると云う方が適当だ。固より急ぐ旅でないから、ぶらぶらと七曲りへかかる。
+          　たちまち足の下で雲雀の声がし出した。谷を見下したが、どこで鳴いてるか影も形も見えぬ。ただ声だけが明らかに聞える。せっせと忙しく、絶間なく鳴いている。方幾里の空気が一面に蚤に刺されていたたまれないような気がする。あの鳥の鳴く音には瞬時の余裕もない。のどかな春の日を鳴き尽くし、鳴きあかし、また鳴き暮らさなければ気が済まんと見える。その上どこまでも登って行く、いつまでも登って行く。雲雀はきっと雲の中で死ぬに相違ない。登り詰めた揚句は、流れて雲に入って、漂うているうちに形は消えてなくなって、ただ声だけが空の裡に残るのかも知れない。
+          　巌角を鋭どく廻って、按摩なら真逆様に落つるところを、際どく右へ切れて、横に見下すと、菜の花が一面に見える。雲雀はあすこへ落ちるのかと思った。いいや、あの黄金の原から飛び上がってくるのかと思った。次には落ちる雲雀と、上る雲雀が十文字にすれ違うのかと思った。最後に、落ちる時も、上る時も、また十文字に擦れ違うときにも元気よく鳴きつづけるだろうと思った。
+          　春は眠くなる。猫は鼠を捕る事を忘れ、人間は借金のある事を忘れる。時には自分の魂の居所さえ忘れて正体なくなる。ただ菜の花を遠く望んだときに眼が醒める。雲雀の声を聞いたときに魂のありかが判然する。雲雀の鳴くのは口で鳴くのではない、魂全体が鳴くのだ。魂の活動が声にあらわれたもののうちで、あれほど元気のあるものはない。ああ愉快だ。こう思って、こう愉快になるのが詩である。
+          　たちまちシェレーの雲雀の詩を思い出して、口のうちで覚えたところだけ暗誦して見たが、覚えているところは二三句しかなかった。その二三句のなかにこんなのがある。
+          　　We look before and after
+          　　　　And pine for what is not:
+          　　Our sincerest laughter
+          　　　　With some pain is fraught;
+          Our sweetest songs are those that tell of saddest thought.
+          「前をみては、後えを見ては、物欲しと、あこがるるかなわれ。腹からの、笑といえど、苦しみの、そこにあるべし。うつくしき、極みの歌に、悲しさの、極みの想、籠るとぞ知れ」
+          　なるほどいくら詩人が幸福でも、あの雲雀のように思い切って、一心不乱に、前後を忘却して、わが喜びを歌う訳には行くまい。西洋の詩は無論の事、支那の詩にも、よく万斛の愁などと云う字がある。詩人だから万斛で素人なら一合で済むかも知れぬ。して見ると詩人は常の人よりも苦労性で、凡骨の倍以上に神経が鋭敏なのかも知れん。超俗の喜びもあろうが、無量の悲も多かろう。そんならば詩人になるのも考え物だ。
+          　しばらくは路が平で、右は雑木山、左は菜の花の見つづけである。足の下に時々蒲公英を踏みつける。鋸のような葉が遠慮なく四方へのして真中に黄色な珠を擁護している。菜の花に気をとられて、踏みつけたあとで、気の毒な事をしたと、振り向いて見ると、黄色な珠は依然として鋸のなかに鎮座している。呑気なものだ。また考えをつづける。
+          　詩人に憂はつきものかも知れないが、あの雲雀を聞く心持になれば微塵の苦もない。菜の花を見ても、ただうれしくて胸が躍るばかりだ。蒲公英もその通り、桜も――桜はいつか見えなくなった。こう山の中へ来て自然の景物に接すれば、見るものも聞くものも面白い。面白いだけで別段の苦しみも起らぬ。起るとすれば足が草臥れて、旨いものが食べられぬくらいの事だろう。
+          　しかし苦しみのないのはなぜだろう。ただこの景色を一幅の画として観、一巻の詩として読むからである。画であり詩である以上は地面を貰って、開拓する気にもならねば、鉄道をかけて一儲けする了見も起らぬ。ただこの景色が――腹の足しにもならぬ、月給の補いにもならぬこの景色が景色としてのみ、余が心を楽ませつつあるから苦労も心配も伴わぬのだろう。自然の力はここにおいて尊とい。吾人の性情を瞬刻に陶冶して醇乎として醇なる詩境に入らしむるのは自然である。
+          　恋はうつくしかろ、孝もうつくしかろ、忠君愛国も結構だろう。しかし自身がその局に当れば利害の旋風に捲き込まれて、うつくしき事にも、結構な事にも、目は眩んでしまう。したがってどこに詩があるか自身には解しかねる。
+          　これがわかるためには、わかるだけの余裕のある第三者の地位に立たねばならぬ。三者の地位に立てばこそ芝居は観て面白い。小説も見て面白い。芝居を見て面白い人も、小説を読んで面白い人も、自己の利害は棚へ上げている。見たり読んだりする間だけは詩人である。
+          　それすら、普通の芝居や小説では人情を免かれぬ。苦しんだり、怒ったり、騒いだり、泣いたりする。見るものもいつかその中に同化して苦しんだり、怒ったり、騒いだり、泣いたりする。取柄は利慾が交らぬと云う点に存するかも知れぬが、交らぬだけにその他の情緒は常よりは余計に活動するだろう。それが嫌だ。
+          　苦しんだり、怒ったり、騒いだり、泣いたりは人の世につきものだ。余も三十年の間それを仕通して、飽々した。飽き飽きした上に芝居や小説で同じ刺激を繰り返しては大変だ。余が欲する詩はそんな世間的の人情を鼓舞するようなものではない。俗念を放棄して、しばらくでも塵界を離れた心持ちになれる詩である。いくら傑作でも人情を離れた芝居はない、理非を絶した小説は少かろう。どこまでも世間を出る事が出来ぬのが彼らの特色である。ことに西洋の詩になると、人事が根本になるからいわゆる詩歌の純粋なるものもこの境を解脱する事を知らぬ。どこまでも同情だとか、愛だとか、正義だとか、自由だとか、浮世の勧工場にあるものだけで用を弁じている。いくら詩的になっても地面の上を馳けてあるいて、銭の勘定を忘れるひまがない。シェレーが雲雀を聞いて嘆息したのも無理はない。
+          　うれしい事に東洋の詩歌はそこを解脱したのがある。採菊東籬下、悠然見南山。ただそれぎりの裏に暑苦しい世の中をまるで忘れた光景が出てくる。垣の向うに隣りの娘が覗いてる訳でもなければ、南山に親友が奉職している次第でもない。超然と出世間的に利害損得の汗を流し去った心持ちになれる。独坐幽篁裏、弾琴復長嘯、深林人不知、明月来相照。ただ二十字のうちに優に別乾坤を建立している。この乾坤の功徳は「不如帰」や「金色夜叉」の功徳ではない。汽船、汽車、権利、義務、道徳、礼義で疲れ果てた後に、すべてを忘却してぐっすり寝込むような功徳である。
+          　二十世紀に睡眠が必要ならば、二十世紀にこの出世間的の詩味は大切である。惜しい事に今の詩を作る人も、詩を読む人もみんな、西洋人にかぶれているから、わざわざ呑気な扁舟を泛べてこの桃源に溯るものはないようだ。余は固より詩人を職業にしておらんから、王維や淵明の境界を今の世に布教して広げようと云う心掛も何もない。ただ自分にはこう云う感興が演芸会よりも舞踏会よりも薬になるように思われる。ファウストよりも、ハムレットよりもありがたく考えられる。こうやって、ただ一人絵の具箱と三脚几を担いで春の山路をのそのそあるくのも全くこれがためである。淵明、王維の詩境を直接に自然から吸収して、すこしの間でも非人情の天地に逍遥したいからの願。一つの酔興だ。
+          　もちろん人間の一分子だから、いくら好きでも、非人情はそう長く続く訳には行かぬ。淵明だって年が年中南山を見詰めていたのでもあるまいし、王維も好んで竹藪の中に蚊帳を釣らずに寝た男でもなかろう。やはり余った菊は花屋へ売りこかして、生えた筍は八百屋へ払い下げたものと思う。こう云う余もその通り。いくら雲雀と菜の花が気に入ったって、山のなかへ野宿するほど非人情が募ってはおらん。こんな所でも人間に逢う。じんじん端折りの頬冠りや、赤い腰巻の姉さんや、時には人間より顔の長い馬にまで逢う。百万本の檜に取り囲まれて、海面を抜く何百尺かの空気を呑んだり吐いたりしても、人の臭いはなかなか取れない。それどころか、山を越えて落ちつく先の、今宵の宿は那古井の温泉場だ。
+          　ただ、物は見様でどうでもなる。レオナルド・ダ・ヴィンチが弟子に告げた言に、あの鐘の音を聞け、鐘は一つだが、音はどうとも聞かれるとある。一人の男、一人の女も見様次第でいかようとも見立てがつく。どうせ非人情をしに出掛けた旅だから、そのつもりで人間を見たら、浮世小路の何軒目に狭苦しく暮した時とは違うだろう。よし全く人情を離れる事が出来んでも、せめて御能拝見の時くらいは淡い心持ちにはなれそうなものだ。能にも人情はある。七騎落でも、墨田川でも泣かぬとは保証が出来ん。しかしあれは情三分芸七分で見せるわざだ。我らが能から享けるありがた味は下界の人情をよくそのままに写す手際から出てくるのではない。そのままの上へ芸術という着物を何枚も着せて、世の中にあるまじき悠長な振舞をするからである。
+          　しばらくこの旅中に起る出来事と、旅中に出逢う人間を能の仕組と能役者の所作に見立てたらどうだろう。まるで人情を棄てる訳には行くまいが、根が詩的に出来た旅だから、非人情のやりついでに、なるべく節倹してそこまでは漕ぎつけたいものだ。南山や幽篁とは性の違ったものに相違ないし、また雲雀や菜の花といっしょにする事も出来まいが、なるべくこれに近づけて、近づけ得る限りは同じ観察点から人間を視てみたい。芭蕉と云う男は枕元へ馬が尿するのをさえ雅な事と見立てて発句にした。余もこれから逢う人物を――百姓も、町人も、村役場の書記も、爺さんも婆さんも――ことごとく大自然の点景として描き出されたものと仮定して取こなして見よう。もっとも画中の人物と違って、彼らはおのがじし勝手な真似をするだろう。しかし普通の小説家のようにその勝手な真似の根本を探ぐって、心理作用に立ち入ったり、人事葛藤の詮議立てをしては俗になる。動いても構わない。画中の人間が動くと見れば差し支ない。画中の人物はどう動いても平面以外に出られるものではない。平面以外に飛び出して、立方的に働くと思えばこそ、こっちと衝突したり、利害の交渉が起ったりして面倒になる。面倒になればなるほど美的に見ている訳に行かなくなる。これから逢う人間には超然と遠き上から見物する気で、人情の電気がむやみに双方で起らないようにする。そうすれば相手がいくら働いても、こちらの懐には容易に飛び込めない訳だから、つまりは画の前へ立って、画中の人物が画面の中をあちらこちらと騒ぎ廻るのを見るのと同じ訳になる。間三尺も隔てていれば落ちついて見られる。あぶな気なしに見られる。言を換えて云えば、利害に気を奪われないから、全力を挙げて彼らの動作を芸術の方面から観察する事が出来る。余念もなく美か美でないかと鑒識する事が出来る。
+          　ここまで決心をした時、空があやしくなって来た。煮え切れない雲が、頭の上へ靠垂れ懸っていたと思ったが、いつのまにか、崩れ出して、四方はただ雲の海かと怪しまれる中から、しとしとと春の雨が降り出した。菜の花は疾くに通り過して、今は山と山の間を行くのだが、雨の糸が濃かでほとんど霧を欺くくらいだから、隔たりはどれほどかわからぬ。時々風が来て、高い雲を吹き払うとき、薄黒い山の背が右手に見える事がある。何でも谷一つ隔てて向うが脈の走っている所らしい。左はすぐ山の裾と見える。深く罩める雨の奥から松らしいものが、ちょくちょく顔を出す。出すかと思うと、隠れる。雨が動くのか、木が動くのか、夢が動くのか、何となく不思議な心持ちだ。
+          　路は存外広くなって、かつ平だから、あるくに骨は折れんが、雨具の用意がないので急ぐ。帽子から雨垂れがぽたりぽたりと落つる頃、五六間先きから、鈴の音がして、黒い中から、馬子がふうとあらわれた。
+          「ここらに休む所はないかね」
+          「もう十五丁行くと茶屋がありますよ。だいぶ濡れたね」
+          　まだ十五丁かと、振り向いているうちに、馬子の姿は影画のように雨につつまれて、またふうと消えた。
+          　糠のように見えた粒は次第に太く長くなって、今は一筋ごとに風に捲かれる様までが目に入る。羽織はとくに濡れ尽して肌着に浸み込んだ水が、身体の温度で生暖く感ぜられる。気持がわるいから、帽を傾けて、すたすた歩行く。
+          　茫々たる薄墨色の世界を、幾条の銀箭が斜めに走るなかを、ひたぶるに濡れて行くわれを、われならぬ人の姿と思えば、詩にもなる、句にも咏まれる。有体なる己れを忘れ尽して純客観に眼をつくる時、始めてわれは画中の人物として、自然の景物と美しき調和を保つ。ただ降る雨の心苦しくて、踏む足の疲れたるを気に掛ける瞬間に、われはすでに詩中の人にもあらず、画裡の人にもあらず。依然として市井の一豎子に過ぎぬ。雲煙飛動の趣も眼に入らぬ。落花啼鳥の情けも心に浮ばぬ。蕭々として独り春山を行く吾の、いかに美しきかはなおさらに解せぬ。初めは帽を傾けて歩行た。後にはただ足の甲のみを見詰めてあるいた。終りには肩をすぼめて、恐る恐る歩行た。雨は満目の樹梢を揺かして四方より孤客に逼る。非人情がちと強過ぎたようだ。
+        </div>
+      </article>
+    </div>
+
+    <div>
+      <a href="https://www.aozora.gr.jp/cards/000148/card776.html">青空文庫 - 図書カード：No.776</a>
+    </div>
+</html>
diff --git a/tests/test_article_only.py b/tests/test_article_only.py
new file mode 100644
index 00000000..fe322121
--- /dev/null
+++ b/tests/test_article_only.py
@@ -0,0 +1,233 @@
+import os
+import time
+import unittest
+
+from readability import Document
+from functools import wraps
+
+
+class TimeoutException(Exception):
+    """Exception raised when a function exceeds its time limit."""
+    pass
+
+
+def timeout(seconds):
+    """Decorator to enforce a timeout on function execution."""
+    def decorator(func):
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            start_time = time.perf_counter()
+            result = func(*args, **kwargs)
+            end_time = time.perf_counter()
+            elapsed_time = end_time - start_time
+            if elapsed_time > seconds:
+                raise TimeoutException(
+                    f"Function '{func.__name__}' exceeded time limit of {seconds} seconds "
+                    f"with an execution time of {elapsed_time:.4f} seconds"
+                )
+            return result
+        return wrapper
+    return decorator
+
+
+SAMPLES = os.path.join(os.path.dirname(__file__), "samples")
+
+
+def load_sample(filename):
+    """Helper to get the content out of the sample files"""
+    with open(os.path.join(SAMPLES, filename)) as f:
+        html = f.read()
+    return html
+
+
+class TestArticleOnly(unittest.TestCase):
+    """The option to not get back a full html doc should work
+
+    Given a full html document, the call can request just divs of processed
+    content. In this way the developer can then wrap the article however they
+    want in their own view or application.
+
+    """
+
+    def test_si_sample(self):
+        """Using the si sample, load article with only opening body element"""
+        sample = load_sample("si-game.sample.html")
+        doc = Document(
+            sample,
+            url="http://sportsillustrated.cnn.com/baseball/mlb/gameflash/2012/04/16/40630_preview.html",
+        )
+        res = doc.summary()
+        self.assertEqual("<html><body><div><div class", res[0:27])
+
+    def test_si_sample_html_partial(self):
+        """Using the si sample, make sure we can get the article alone."""
+        sample = load_sample("si-game.sample.html")
+        doc = Document(
+            sample,
+            url="http://sportsillustrated.cnn.com/baseball/mlb/gameflash/2012/04/16/40630_preview.html",
+        )
+        res = doc.summary(html_partial=True)
+        self.assertEqual('<div><div class="', res[0:17])
+
+    def test_too_many_images_sample_html_partial(self):
+        """Using the too-many-images sample, make sure we still get the article."""
+        sample = load_sample("too-many-images.sample.html")
+        doc = Document(sample)
+        res = doc.summary(html_partial=True)
+        self.assertEqual('<div><div class="post-body', res[0:26])
+
+    def test_wrong_link_issue_49(self):
+        """We shouldn't break on bad HTML."""
+        sample = load_sample("the-hurricane-rubin-carter-denzel-washington.html")
+        doc = Document(sample)
+        res = doc.summary(html_partial=True)
+        self.assertEqual('<div><div class="content__article-body ', res[0:39])
+
+    def test_best_elem_is_root_and_passing(self):
+        sample = (
+            '<html class="article" id="body">'
+            "   <body>"
+            "       <p>1234567890123456789012345</p>"
+            "   </body>"
+            "</html>"
+        )
+        doc = Document(sample)
+        doc.summary()
+
+    def test_correct_cleanup(self):
+        sample = """
+        <html>
+            <body>
+                <section>test section</section>
+                <article class="">
+<p>Lot of text here.</p>
+                <div id="advertisement"><a href="http://github.com/link">Ad</a></div>
+<p>More text is written here, and contains punctuation and dots.</p>
+</article>
+                <aside id="comment1"/>
+                <div id="comment2">
+                    <a href="http://github.com/asd">spam</a>
+                    <a href="http://github.com/asd">spam</a>
+                    <a href="http://github.com/asd">spam</a>
+                </div>
+                <div id="comment3"/>
+                <aside id="comment4">A small comment.</aside>
+                <div id="comment5"><p>The comment is also helpful, but it's
+                    still not the correct item to be extracted.</p>
+                    <p>It's even longer than the article itself!"</p></div>
+            </body>
+        </html>
+        """
+        doc = Document(sample)
+        s = doc.summary()
+        # print(s)
+        assert "punctuation" in s
+        assert not "comment" in s
+        assert not "aside" in s
+
+    # Many spaces make some regexes run forever
+    @timeout(3)
+    def test_many_repeated_spaces(self):
+        long_space = " " * 1000000
+        sample = "<html><body><p>foo" + long_space + "</p></body></html>"
+
+        doc = Document(sample)
+        s = doc.summary()
+
+        assert "foo" in s
+
+    def test_not_self_closing(self):
+        sample = '<h2><a href="#"></a>foobar</h2>'
+        doc = Document(sample)
+        assert (
+            '<body id="readabilityBody"><h2><a href="#"></a>foobar</h2></body>'
+            == doc.summary()
+        )
+
+    def test_utf8_kanji(self):
+        """Using the UTF-8 kanji sample, load article which is written in kanji"""
+        sample = load_sample("utf-8-kanji.sample.html")
+        doc = Document(sample)
+        res = doc.summary()
+        assert 0 < len(res) < 10000
+
+    def test_author_present(self):
+        sample = load_sample("the-hurricane-rubin-carter-denzel-washington.html")
+        doc = Document(sample)
+        assert 'Alex von Tunzelmann' == doc.author()
+
+    def test_author_absent(self):
+        sample = load_sample("si-game.sample.html")
+        doc = Document(sample)
+        assert '[no-author]' == doc.author()
+
+    def test_keep_images_present(self):
+        sample = load_sample("summary-keep-all-images.sample.html")
+
+        doc = Document(sample)
+
+        assert "<img" in doc.summary(keep_all_images=True)
+
+    def test_keep_images_absent(self):
+        sample = load_sample("summary-keep-all-images.sample.html")
+
+        doc = Document(sample)
+
+        assert "<img" not in doc.summary(keep_all_images=False)
+
+    def test_keep_images_absent_by_defautl(self):
+        sample = load_sample("summary-keep-all-images.sample.html")
+
+        doc = Document(sample)
+
+        assert "<img" not in doc.summary()
+
+    def test_cjk_summary(self):
+        """Check we can extract CJK text correctly."""
+        html = """
+        <html>
+            <head>
+                <title>这是标题</title>
+            </head>
+            <body>
+                <div>一些无关紧要的内容</div>
+                <div class="article-content">
+                    <h1>主要文章标题</h1>
+                    <p>这是主要内容的第一段。</p>
+                    <p>これはコンテンツの第2段落です。</p>
+                    <p>이것은 콘텐츠의 세 번째 단락입니다.</p>
+                    <p>This is the fourth paragraph.</p>
+                </div>
+                <div>More irrelevant stuff</div>
+            </body>
+        </html>
+        """
+        doc = Document(html)
+        summary = doc.summary()
+        # Check that the main CJK content is present in the summary
+        self.assertTrue("这是主要内容的第一段" in summary)
+        self.assertTrue("これはコンテンツの第2段落です" in summary)
+        self.assertTrue("이것은 콘텐츠의 세 번째 단락입니다" in summary)
+        # Check that irrelevant content is mostly gone
+        self.assertFalse("一些无关紧要的内容" in summary)
+
+    def test_shorten_title_delimiter_bug(self):
+        """Test that shorten_title handles delimiters correctly when the last part is valid.
+
+        This specifically targets a potential bug where 'p1' might be used instead of 'pl'.
+        """
+        html = """
+        <html>
+            <head>
+                <title>Short Part | これは長いです</title>
+            </head>
+            <body>
+                <div>Content</div>
+            </body>
+        </html>
+        """
+        doc = Document(html)
+        # With the bug, this call might raise NameError: name 'p1' is not defined
+        # With the fix, it should correctly return the last part.
+        short_title = doc.short_title()
+        self.assertEqual(short_title, "これは長いです")
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 00000000..926fda50
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,33 @@
+# Tox (http://tox.testrun.org/) is a tool for running tests
+# in multiple virtualenvs. This configuration file will run the
+# test suite on all supported python versions. To use it, "pip install tox"
+# and then run "tox" from this directory.
+
+[tox]
+envlist =
+    py{38,39,310,311,312,313,py3}, doc
+skip_missing_interpreters =
+    True
+
+[testenv]
+deps =
+    pytest
+    doc: sphinx
+    doc: sphinx_rtd_theme
+    doc: myst-parser
+
+# This creates the virtual envs with --site-packages so already packages
+# that are already installed will be reused. This is especially useful on
+# Windows. Since we use lxml instead of compiling it locally (which in turn
+# requires a Compiler and the build dependencies), you can download
+# it from http://www.lfd.uci.edu/~gohlke/pythonlibs/#lxml and install it via
+# $PYTHONDIR\Scripts\pip.exe install *.whl
+sitepackages=
+    True
+commands =
+    pip install -r requirements.txt -e ".[test]"
+    py.test
+
+[testenv:doc]
+commands =
+    sphinx-build -b html doc/source/ build/