Page tree

Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
languagepy
linenumberstrue
from squirro_client import ItemUploader

# Processing config to detect boilerplate with the default classifier
processing_config = {
    'boilerplate-removal': {
        'enabled': True,
    },
}
uploader = ItemUploader(…, processing_config=processing_config)
html_body = """
<html><body>
<p><a src="http://www.example.com">Boilerplate</a></p>

<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Cras aliquet
venenatis blandit. Phasellus dapibus mi eu metus maximus, nec malesuada urna
congue. Vivamus in cursus risus. Sed neque ligula, lobortis in sollicitudin
quis, efficitur eu metus. Pellentesque eu nunc sit amet turpis bibendum
volutpat eu ac ante. Nam posuere eleifend rhoncus. Vivamus purus tellus,
interdum ac semper euismod, scelerisque ut ipsum. Phasellus ut convallis nunc,
quis finibus velit. Class aptent taciti sociosqu ad litora torquent per
conubia nostra, per inceptos himenaeos. Maecenas euismod placerat diam, at
pellentesque quam eleifend ac. Nunc quis est laoreet, hendrerit dui vel,
ornare sem. Integer volutpat ullamcorper orci quis accumsan. Proin
pellentesque vulputate pellentesque. Sed sapien ante, elementum sed lorem vel,
bibendum tristique arcu.</p>
</body></html>
"""
items = [
    {
        'body': html_body,
        'title': 'Item 01',
    },
]
uploader.upload(items)

...