105 lines
3.5 KiB
Python
105 lines
3.5 KiB
Python
from unittest import TestCase
|
|
|
|
from atextcrawler.utils.section import concat_section_texts, iter_sections
|
|
|
|
|
|
class IterSectionTest(TestCase):
|
|
def test_iter_sections_1(self):
|
|
s = 'abcdefghijklmnopqrstuvwxyz'
|
|
sb = {0: 80, 5: 2, 15: 1, 20: 3}
|
|
sections1 = list(iter_sections(s, sb, max_level=100))
|
|
sections2 = [
|
|
(0, 5, 80, 'bcde'),
|
|
(5, 15, 2, 'ghijklmno'),
|
|
(15, 20, 1, 'qrst'),
|
|
(20, 26, 3, 'uvwxyz'),
|
|
]
|
|
self.assertEqual(sections1, sections2)
|
|
|
|
def test_iter_sections_2(self):
|
|
s = 'abcdefghijklmnopqrstuvwxyz'
|
|
sb = {0: 4, 5: 2, 15: 1, 20: 3, 26: 9}
|
|
sections1 = list(iter_sections(s, sb, max_level=100))
|
|
sections2 = [
|
|
(0, 5, 4, 'bcde'),
|
|
(5, 15, 2, 'ghijklmno'),
|
|
(15, 20, 1, 'qrst'),
|
|
(20, 26, 3, 'vwxyz'),
|
|
]
|
|
self.assertEqual(sections1, sections2)
|
|
|
|
def test_iter_sections_3(self):
|
|
s = 'abcdefghijklmnopqrstuvwxyz'
|
|
sb = {5: 2, 15: 60, 18: 50, 20: 3}
|
|
sections1 = list(iter_sections(s, sb, max_level=59))
|
|
sections2 = [
|
|
(0, 5, 80, 'bcde'),
|
|
(5, 18, 2, 'ghijklmnopqr'),
|
|
(18, 20, 50, 't'),
|
|
(20, 26, 3, 'uvwxyz'),
|
|
]
|
|
self.assertEqual(sections1, sections2)
|
|
|
|
def test_iter_sections_4(self):
|
|
s = 'abcdefghijklmnopqrstuvwxyz'
|
|
sb = {5: 2, 15: 60, 18: 50, 20: 3, 24: 60}
|
|
sections1 = list(iter_sections(s, sb, max_level=59))
|
|
sections2 = [
|
|
(0, 5, 80, 'bcde'),
|
|
(5, 18, 2, 'ghijklmnopqr'),
|
|
(18, 20, 50, 't'),
|
|
(20, 26, 3, 'uvwxyz'),
|
|
]
|
|
self.assertEqual(sections1, sections2)
|
|
|
|
|
|
class AggSectionTest(TestCase):
|
|
def test_concat_sections_1(self):
|
|
s = 'abcdefghijklmnopqrstuvwxyz'
|
|
sb = {0: 1, 5: 1, 15: 1, 20: 1}
|
|
sections1 = list(concat_section_texts(s, sb, min_len=10))
|
|
sections2 = [
|
|
([0, 1], 'abcdefghijklmno'),
|
|
([2, 3], 'pqrstuvwxyz'),
|
|
]
|
|
self.assertEqual(sections1, sections2)
|
|
|
|
def test_concat_sections_2(self):
|
|
s = 'abcdefghijklmnopqrstuvwxyz'
|
|
sb = {0: 1, 2: 1, 10: 1, 20: 1, 26: 1}
|
|
sections1 = list(concat_section_texts(s, sb, min_len=10))
|
|
sections2 = [
|
|
([0, 1], 'abcdefghij'),
|
|
([2, 3, 4], 'klmnopqrstuvwxyz'),
|
|
]
|
|
self.assertEqual(sections1, sections2)
|
|
|
|
def test_concat_sections_3(self):
|
|
s = 'abcdefghijklmnopqrstuvwxyz'
|
|
sb = {0: 1, 4: 1, 6: 1, 16: 1, 26: 1}
|
|
sections1 = list(concat_section_texts(s, sb, min_len=10))
|
|
sections2 = [
|
|
([0, 1, 2], 'abcdefghijklmnop'),
|
|
([3, 4], 'qrstuvwxyz'),
|
|
]
|
|
self.assertEqual(sections1, sections2)
|
|
|
|
def test_concat_sections_4(self):
|
|
s = 'abcdefghijklmnopqrstuvwxyz'
|
|
sb = {0: 1, 5: 1, 15: 1, 26: 1}
|
|
sections1 = list(concat_section_texts(s, sb, min_len=10))
|
|
sections2 = [
|
|
([0, 1], 'abcdefghijklmno'),
|
|
([2, 3], 'pqrstuvwxyz'),
|
|
]
|
|
self.assertEqual(sections1, sections2)
|
|
|
|
def test_concat_sections_5(self):
|
|
s = 'abcdefghijklmnopqrstuvwxyz'
|
|
sb = {0: 1, 5: 1, 12: 1, 22: 1, 23: 1, 24: 1, 26: 1}
|
|
sections1 = list(concat_section_texts(s, sb, min_len=10))
|
|
sections2 = [
|
|
([0, 1], 'abcdefghijkl'),
|
|
([2, 3, 4, 5, 6], 'mnopqrstuvwxyz'),
|
|
]
|
|
self.assertEqual(sections1, sections2)
|