"""
Test cases for text util.
"""
from unittest import TestCase
from atextcrawler.utils.html import clean_page
class CleanHtmlTest(TestCase):
"""
Test clean_page.
Have an eye on self-closing tags (br, hr, ...).
"""
def test_clean_page_1(self):
s = 'Hello
anything'
r = 'Hello
anything'
self.assertEqual(str(clean_page(s)), r)
def test_clean_page_2(self):
s = 'Hello
anything'
r = 'Hello
anything'
self.assertEqual(str(clean_page(s)), r)
def test_clean_page_3(self):
# nesting
s = '--..'
r = '--..'
self.assertEqual(str(clean_page(s)), r)
def test_clean_page_4(self):
# aria-hidden
s = '--
xx
..' r = '--xx
..' self.assertEqual(str(clean_page(s)), r) s = '--xx
..' r = '--xx
..' self.assertEqual(str(clean_page(s)), r) s = '--xx
..' r = '--xx
..' self.assertEqual(str(clean_page(s)), r) def test_clean_page_5(self): # no removal s = '--xxyy
..' r = '--xxyy
..' self.assertEqual(str(clean_page(s)), r) def test_clean_page_6(self): # self-closing tags to be removed s = '--xx
\n...xx
\n...tt
nn' r = '--nn' self.assertEqual(str(clean_page(s)), r)