68 lines
2.8 KiB
Python
68 lines
2.8 KiB
Python
from unittest import IsolatedAsyncioTestCase
|
|
import asyncpg
|
|
from atextcrawler.utils.durl import Durl
|
|
from atextcrawler.config import Config
|
|
from atextcrawler.db import PGPool
|
|
|
|
|
|
class DurlTest(IsolatedAsyncioTestCase):
|
|
async def asyncSetUp(self):
|
|
config = Config().get()
|
|
self.pool = PGPool(config['postgresql'])
|
|
await self.pool.__aenter__()
|
|
self.conn = await self.pool.pool.acquire()
|
|
|
|
async def test_durl_basic(self):
|
|
durl1 = await Durl('https://U:Pw@www.EXAMPLE.com:8000/hello?world#a')
|
|
self.assertEqual(durl1.scheme, 'https')
|
|
self.assertEqual(durl1.netloc, 'U:Pw@www.example.com:8000')
|
|
self.assertEqual(durl1.port, 8000)
|
|
self.assertEqual(durl1.path, '/hello')
|
|
self.assertEqual(durl1.fragment, '')
|
|
self.assertEqual(durl1.pwa(), 'hello?world')
|
|
self.assertEqual(durl1.site(), 'https://U:Pw@www.example.com:8000/')
|
|
self.assertEqual(
|
|
durl1.url(), 'https://U:Pw@www.example.com:8000/' 'hello?world'
|
|
)
|
|
self.assertEqual(durl1.has_path(), True)
|
|
durl2 = await Durl('http://www.example.com/')
|
|
self.assertEqual(durl2.has_path(), False)
|
|
durl3 = await Durl('ftp://www.example.com/')
|
|
self.assertEqual(durl3, None)
|
|
|
|
async def test_durl_with_base(self):
|
|
durl1 = await Durl('https://www.example.com')
|
|
self.assertEqual(durl1.path, '/')
|
|
self.assertEqual(durl1.pwa(), '')
|
|
self.assertEqual(durl1.has_path(), False)
|
|
durl2 = await Durl('https://www.example.com/hello2', base=durl1)
|
|
self.assertEqual(durl2.hostname, 'www.example.com')
|
|
self.assertEqual(durl2.path, '/hello2')
|
|
self.assertEqual(durl2.pwa(), 'hello2')
|
|
durl3 = await Durl('/hello3?x=1', base=durl1)
|
|
self.assertEqual(durl3.hostname, 'www.example.com')
|
|
self.assertEqual(durl3.path, '/hello3')
|
|
self.assertEqual(durl3.pwa(), 'hello3?x=1')
|
|
self.assertEqual(durl3.site(), 'https://www.example.com/')
|
|
durl4 = await Durl('https://www.kernel.org/', base=durl1)
|
|
self.assertEqual(durl4, None)
|
|
|
|
async def test_durl_with_base_and_match_base(self):
|
|
durl1 = await Durl('https://www.example.com/base/path/')
|
|
self.assertEqual(durl1.path, '/base/path/')
|
|
self.assertEqual(durl1.pwa(), 'base/path/')
|
|
self.assertEqual(durl1.has_path(), True)
|
|
durl2 = await Durl(
|
|
'https://www.example.com/base/', base=durl1, match_base=True
|
|
)
|
|
self.assertEqual(durl2, None)
|
|
durl3 = await Durl(
|
|
'https://www.example.com/base/path/whatever?x=1#a',
|
|
base=durl1,
|
|
match_base=True,
|
|
)
|
|
self.assertEqual(durl3.pwa(), 'whatever?x=1')
|
|
|
|
async def asyncTearDown(self):
|
|
await self.pool.pool.release(self.conn)
|
|
await self.pool.pool.close()
|