Only return a SitePath if the site has crawl_enabled=true
This commit is contained in:
parent
028be1631d
commit
8246ce6251
1 changed files with 6 additions and 1 deletions
|
@ -99,14 +99,19 @@ async def get_site_path(
|
|||
Return the next path of a given site that needs to be processed.
|
||||
|
||||
If none needs to be processed, return None.
|
||||
I particular, for sites having crawl_enabled=false return None.
|
||||
|
||||
Only return paths that have last been visited before *before*
|
||||
or not been processed at all. Paths with a ok_count of -3 or lower
|
||||
or not been processed at all. Paths with an ok_count of -3 or lower
|
||||
are dropped.
|
||||
|
||||
If *only_new*, limit to paths that have not been processed at all,
|
||||
irrespective of the value of *before*.
|
||||
"""
|
||||
sql = "SELECT crawl_enabled FROM site WHERE id=$1"
|
||||
crawl_enabled = await conn.fetchval(sql, site.id_)
|
||||
if not crawl_enabled:
|
||||
return None
|
||||
if only_new:
|
||||
sql = (
|
||||
"SELECT * FROM site_path"
|
||||
|
|
Loading…
Reference in a new issue