Improve search.

* do not only return the 10 items from the first page, but loop over at most `max_pages` pages * use https://usa.anarchistlibraries.net/ as fallback when the main site is not available * drop formats TXT, TEX, MUSE from search results as they cannot be displayed in calibre * on python3 use quote_plus instead of quote * obtain the version number in the user agent string from module TheAnarchistLibraryStore
2020-12-31 16:26:36 +00:00 · 2020-12-31 16:26:36 +00:00 · e4daeb15d2
commit e4daeb15d2
parent fc96806830
1 changed files with 38 additions and 15 deletions
--- a/theanarchistlibrary_store/theanarchistlibrary_plugin.py
+++ b/theanarchistlibrary_store/theanarchistlibrary_plugin.py
@ -1,24 +1,37 @@
 __license__ = 'GPL 3'
-__copyright__ = '2012, Ruben Pollan <meskio@sindominio.net>'
+__copyright__ = '2012, Ruben Pollan <meskio@sindominio.net>; 2020, ibu radempa <ibu@radempa.de>'
 __docformat__ = 'restructuredtext en'

+import json
 try:
-    from urllib.parse import quote
+    from urllib.parse import quote_plus as quote
 except:
    from urllib2 import quote
 try:
    from PyQt5.Qt import QUrl
 except:
    from PyQt4.Qt import QUrl
-
 from contextlib import closing
-import json
 from calibre import browser
 from calibre.gui2 import open_url
 from calibre.gui2.store import StorePlugin
 from calibre.gui2.store.basic_config import BasicStoreConfig
 from calibre.gui2.store.search_result import SearchResult
 from calibre.gui2.store.web_store_dialog import WebStoreDialog
+from . import TheAnarchistLibraryStore
+
+
+url1 = 'https://theanarchistlibrary.org/search?fmt=json&page=%s&query=%s'
+url2 = 'https://usa.anarchistlibraries.net/search?fmt=json&page=%s&query=%s'
+"""Search URLs. If the library has no fallback url, set url2 = None."""
+
+
+max_pages = 10
+"""Page limit. (amusewiki gives us 10 results per page.)"""
+
+
+user_agent = 'Calibre plugin calibre-tal v' + '{}.{}.{}'.format(*TheAnarchistLibraryStore.version)
+

 class TheAnarchistLibraryStore(BasicStoreConfig, StorePlugin):

@ -33,14 +46,28 @@ class TheAnarchistLibraryStore(BasicStoreConfig, StorePlugin):
            d.set_tags(self.config.get('tags', ''))
            d.exec_()

-    def search(self, query, max_results=10, timeout=60):
-        url = 'http://theanarchistlibrary.org/search?fmt=json&query=' + quote(query)
+    def search(self, query, max_results=10, timeout=10):
+        br = browser(user_agent=user_agent)
+        page = 0
+        while page < max_pages:
+            page += 1
+            try:
+                for result in self._iter_search_results(br, url1, page, query, timeout):
+                    if result is False:
+                        return
+                    yield result
+            except:
+                if url2:
+                    for result in self._iter_search_results(br, url2, page, query, timeout):
+                        if result is False:
+                            return
+                        yield result

-        br = browser()
-
-        counter = max_results
-        with closing(br.open(url, timeout=timeout)) as f:
+    def _iter_search_results(self, br, url, page, query, timeout):
+        with closing(br.open(url % (page, quote(query)), timeout=timeout)) as f:
            doc = json.load(f)
+            if not doc:
+                yield False
            for data in doc:
                s = SearchResult()
                s.title = data['title'].strip()
@ -52,9 +79,5 @@ class TheAnarchistLibraryStore(BasicStoreConfig, StorePlugin):
                s.downloads['PDF'] = data['url'].strip() + '.pdf'
                s.downloads['A4.PDF'] = data['url'].strip() + '.a4.pdf'
                s.downloads['LT.PDF'] = data['url'].strip() + '.lt.pdf'
-                s.downloads['TXT'] = data['url'].strip() + '.txt'
-                s.downloads['TEX'] = data['url'].strip() + '.tex'
-                s.downloads['MUSE'] = data['url'].strip() + '.muse'
-                s.formats = 'EPUB, PDF, A4.PDF, LT.PDF, TXT, TEX, MUSE'
-
+                s.formats = 'EPUB, PDF, A4.PDF, LT.PDF'
                yield s