Improve search.
* do not only return the 10 items from the first page, but loop over at most `max_pages` pages * use https://usa.anarchistlibraries.net/ as fallback when the main site is not available * drop formats TXT, TEX, MUSE from search results as they cannot be displayed in calibre * on python3 use quote_plus instead of quote * obtain the version number in the user agent string from module TheAnarchistLibraryStore
This commit is contained in:
		
							parent
							
								
									fc96806830
								
							
						
					
					
						commit
						e4daeb15d2
					
				
					 1 changed files with 38 additions and 15 deletions
				
			
		| 
						 | 
					@ -1,24 +1,37 @@
 | 
				
			||||||
__license__ = 'GPL 3'
 | 
					__license__ = 'GPL 3'
 | 
				
			||||||
__copyright__ = '2012, Ruben Pollan <meskio@sindominio.net>'
 | 
					__copyright__ = '2012, Ruben Pollan <meskio@sindominio.net>; 2020, ibu radempa <ibu@radempa.de>'
 | 
				
			||||||
__docformat__ = 'restructuredtext en'
 | 
					__docformat__ = 'restructuredtext en'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import json
 | 
				
			||||||
try:
 | 
					try:
 | 
				
			||||||
    from urllib.parse import quote
 | 
					    from urllib.parse import quote_plus as quote
 | 
				
			||||||
except:
 | 
					except:
 | 
				
			||||||
    from urllib2 import quote
 | 
					    from urllib2 import quote
 | 
				
			||||||
try:
 | 
					try:
 | 
				
			||||||
    from PyQt5.Qt import QUrl
 | 
					    from PyQt5.Qt import QUrl
 | 
				
			||||||
except:
 | 
					except:
 | 
				
			||||||
    from PyQt4.Qt import QUrl
 | 
					    from PyQt4.Qt import QUrl
 | 
				
			||||||
 | 
					 | 
				
			||||||
from contextlib import closing
 | 
					from contextlib import closing
 | 
				
			||||||
import json
 | 
					 | 
				
			||||||
from calibre import browser
 | 
					from calibre import browser
 | 
				
			||||||
from calibre.gui2 import open_url
 | 
					from calibre.gui2 import open_url
 | 
				
			||||||
from calibre.gui2.store import StorePlugin
 | 
					from calibre.gui2.store import StorePlugin
 | 
				
			||||||
from calibre.gui2.store.basic_config import BasicStoreConfig
 | 
					from calibre.gui2.store.basic_config import BasicStoreConfig
 | 
				
			||||||
from calibre.gui2.store.search_result import SearchResult
 | 
					from calibre.gui2.store.search_result import SearchResult
 | 
				
			||||||
from calibre.gui2.store.web_store_dialog import WebStoreDialog
 | 
					from calibre.gui2.store.web_store_dialog import WebStoreDialog
 | 
				
			||||||
 | 
					from . import TheAnarchistLibraryStore
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					url1 = 'https://theanarchistlibrary.org/search?fmt=json&page=%s&query=%s'
 | 
				
			||||||
 | 
					url2 = 'https://usa.anarchistlibraries.net/search?fmt=json&page=%s&query=%s'
 | 
				
			||||||
 | 
					"""Search URLs. If the library has no fallback url, set url2 = None."""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					max_pages = 10
 | 
				
			||||||
 | 
					"""Page limit. (amusewiki gives us 10 results per page.)"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					user_agent = 'Calibre plugin calibre-tal v' + '{}.{}.{}'.format(*TheAnarchistLibraryStore.version)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TheAnarchistLibraryStore(BasicStoreConfig, StorePlugin):
 | 
					class TheAnarchistLibraryStore(BasicStoreConfig, StorePlugin):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -33,14 +46,28 @@ class TheAnarchistLibraryStore(BasicStoreConfig, StorePlugin):
 | 
				
			||||||
            d.set_tags(self.config.get('tags', ''))
 | 
					            d.set_tags(self.config.get('tags', ''))
 | 
				
			||||||
            d.exec_()
 | 
					            d.exec_()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def search(self, query, max_results=10, timeout=60):
 | 
					    def search(self, query, max_results=10, timeout=10):
 | 
				
			||||||
        url = 'http://theanarchistlibrary.org/search?fmt=json&query=' + quote(query)
 | 
					        br = browser(user_agent=user_agent)
 | 
				
			||||||
 | 
					        page = 0
 | 
				
			||||||
 | 
					        while page < max_pages:
 | 
				
			||||||
 | 
					            page += 1
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                for result in self._iter_search_results(br, url1, page, query, timeout):
 | 
				
			||||||
 | 
					                    if result is False:
 | 
				
			||||||
 | 
					                        return
 | 
				
			||||||
 | 
					                    yield result
 | 
				
			||||||
 | 
					            except:
 | 
				
			||||||
 | 
					                if url2:
 | 
				
			||||||
 | 
					                    for result in self._iter_search_results(br, url2, page, query, timeout):
 | 
				
			||||||
 | 
					                        if result is False:
 | 
				
			||||||
 | 
					                            return
 | 
				
			||||||
 | 
					                        yield result
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        br = browser()
 | 
					    def _iter_search_results(self, br, url, page, query, timeout):
 | 
				
			||||||
 | 
					        with closing(br.open(url % (page, quote(query)), timeout=timeout)) as f:
 | 
				
			||||||
        counter = max_results
 | 
					 | 
				
			||||||
        with closing(br.open(url, timeout=timeout)) as f:
 | 
					 | 
				
			||||||
            doc = json.load(f)
 | 
					            doc = json.load(f)
 | 
				
			||||||
 | 
					            if not doc:
 | 
				
			||||||
 | 
					                yield False
 | 
				
			||||||
            for data in doc:
 | 
					            for data in doc:
 | 
				
			||||||
                s = SearchResult()
 | 
					                s = SearchResult()
 | 
				
			||||||
                s.title = data['title'].strip()
 | 
					                s.title = data['title'].strip()
 | 
				
			||||||
| 
						 | 
					@ -52,9 +79,5 @@ class TheAnarchistLibraryStore(BasicStoreConfig, StorePlugin):
 | 
				
			||||||
                s.downloads['PDF'] = data['url'].strip() + '.pdf'
 | 
					                s.downloads['PDF'] = data['url'].strip() + '.pdf'
 | 
				
			||||||
                s.downloads['A4.PDF'] = data['url'].strip() + '.a4.pdf'
 | 
					                s.downloads['A4.PDF'] = data['url'].strip() + '.a4.pdf'
 | 
				
			||||||
                s.downloads['LT.PDF'] = data['url'].strip() + '.lt.pdf'
 | 
					                s.downloads['LT.PDF'] = data['url'].strip() + '.lt.pdf'
 | 
				
			||||||
                s.downloads['TXT'] = data['url'].strip() + '.txt'
 | 
					                s.formats = 'EPUB, PDF, A4.PDF, LT.PDF'
 | 
				
			||||||
                s.downloads['TEX'] = data['url'].strip() + '.tex'
 | 
					 | 
				
			||||||
                s.downloads['MUSE'] = data['url'].strip() + '.muse'
 | 
					 | 
				
			||||||
                s.formats = 'EPUB, PDF, A4.PDF, LT.PDF, TXT, TEX, MUSE'
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                yield s
 | 
					                yield s
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue