Only return a SitePath if the site has crawl_enabled=true
This commit is contained in:
parent
028be1631d
commit
8246ce6251
1 changed files with 6 additions and 1 deletions
|
@ -99,14 +99,19 @@ async def get_site_path(
|
||||||
Return the next path of a given site that needs to be processed.
|
Return the next path of a given site that needs to be processed.
|
||||||
|
|
||||||
If none needs to be processed, return None.
|
If none needs to be processed, return None.
|
||||||
|
I particular, for sites having crawl_enabled=false return None.
|
||||||
|
|
||||||
Only return paths that have last been visited before *before*
|
Only return paths that have last been visited before *before*
|
||||||
or not been processed at all. Paths with a ok_count of -3 or lower
|
or not been processed at all. Paths with an ok_count of -3 or lower
|
||||||
are dropped.
|
are dropped.
|
||||||
|
|
||||||
If *only_new*, limit to paths that have not been processed at all,
|
If *only_new*, limit to paths that have not been processed at all,
|
||||||
irrespective of the value of *before*.
|
irrespective of the value of *before*.
|
||||||
"""
|
"""
|
||||||
|
sql = "SELECT crawl_enabled FROM site WHERE id=$1"
|
||||||
|
crawl_enabled = await conn.fetchval(sql, site.id_)
|
||||||
|
if not crawl_enabled:
|
||||||
|
return None
|
||||||
if only_new:
|
if only_new:
|
||||||
sql = (
|
sql = (
|
||||||
"SELECT * FROM site_path"
|
"SELECT * FROM site_path"
|
||||||
|
|
Loading…
Reference in a new issue