""" Test cases for text util. """ from unittest import TestCase from atextcrawler.utils.html import clean_page class CleanHtmlTest(TestCase): """ Test clean_page. Have an eye on self-closing tags (br, hr, ...). """ def test_clean_page_1(self): s = 'Hello
anything' r = 'Hello
anything' self.assertEqual(str(clean_page(s)), r) def test_clean_page_2(self): s = 'Hello
anything' r = 'Hello
anything' self.assertEqual(str(clean_page(s)), r) def test_clean_page_3(self): # nesting s = '--
xx
yy
zz
..' r = '--..' self.assertEqual(str(clean_page(s)), r) def test_clean_page_4(self): # aria-hidden s = '--..' r = '--..' self.assertEqual(str(clean_page(s)), r) s = '--..' r = '--..' self.assertEqual(str(clean_page(s)), r) s = '--

xx

..' r = '--

xx

..' self.assertEqual(str(clean_page(s)), r) s = '--

xx

..' r = '--

xx

..' self.assertEqual(str(clean_page(s)), r) s = '--

xx

..' r = '--

xx

..' self.assertEqual(str(clean_page(s)), r) def test_clean_page_5(self): # no removal s = '--

xxyy

..' r = '--

xxyy

..' self.assertEqual(str(clean_page(s)), r) def test_clean_page_6(self): # self-closing tags to be removed s = '--

xx

\n...

tt

nn' r = '--

xx

\n...

tt

nn' self.assertEqual(str(clean_page(s)), r) def test_clean_page_7(self): s = '--nn' r = '--nn' self.assertEqual(str(clean_page(s)), r)