49 lines
1.6 KiB
Python
49 lines
1.6 KiB
Python
"""
|
|
Test cases for resource type page.
|
|
"""
|
|
|
|
from unittest import TestCase
|
|
|
|
from atextcrawler.utils.annotation import annotate
|
|
|
|
|
|
class AnnotateTest(TestCase):
|
|
"""
|
|
Test annotation.
|
|
|
|
Consider that the <br> and <hr> tags are self-closing.
|
|
"""
|
|
|
|
def test_annotate_1(self):
|
|
s = '<em>Hello</em><br><strong>world</strong>'
|
|
text, anns = annotate(s)
|
|
self.assertEqual(text, ' Hello world')
|
|
self.assertEqual(anns['semantic_breaks'], {0: 80, 6: 80})
|
|
self.assertEqual(anns['section_ids'], {})
|
|
|
|
def test_annotate_2(self):
|
|
s = '<em> Hello </em><br><strong> world </strong>'
|
|
text, anns = annotate(s)
|
|
self.assertEqual(text, ' Hello world')
|
|
self.assertEqual(anns['semantic_breaks'], {0: 80, 6: 80})
|
|
self.assertEqual(anns['section_ids'], {})
|
|
|
|
def test_annotate_3(self):
|
|
s = '<p> Hello <em>world</em> </p> '
|
|
text, anns = annotate(s)
|
|
self.assertEqual(text, ' Hello world')
|
|
self.assertEqual(anns['semantic_breaks'], {0: 60})
|
|
|
|
def test_annotate_4(self):
|
|
s = '<div id = "ref1"><p>Hello <em>world</em> </p> </div>'
|
|
text, anns = annotate(s)
|
|
self.assertEqual(text, ' Hello world')
|
|
self.assertEqual(anns['semantic_breaks'], {0: 60})
|
|
self.assertEqual(anns['section_ids'], {0: ['ref1']})
|
|
|
|
def test_annotate_5(self):
|
|
s = '<br id="ref2"> Hello <p>world </p> '
|
|
text, anns = annotate(s)
|
|
self.assertEqual(text, ' Hello world')
|
|
self.assertEqual(anns['semantic_breaks'], {0: 80, 6: 60})
|
|
self.assertEqual(anns['section_ids'], {1: ['ref2']})
|