From eaf2dd3c874d5985f99f3edea38d840334dc9197 Mon Sep 17 00:00:00 2001 From: mol Date: Wed, 6 Dec 2023 13:32:45 +0800 Subject: [PATCH] feat: wikijs --- engines/wikijs.py | 116 ++++++---------------------------------------- 1 file changed, 14 insertions(+), 102 deletions(-) diff --git a/engines/wikijs.py b/engines/wikijs.py index 4d0dfe9..d8e174c 100644 --- a/engines/wikijs.py +++ b/engines/wikijs.py @@ -1,21 +1,9 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -from collections.abc import Iterable from json import loads from urllib.parse import urlencode from searx.utils import to_string, html_to_text - -search_url = None -url_query = None -content_query = None -title_query = None -content_html_to_text = False -title_html_to_text = False -paging = False -suggestion_query = '' -results_query = '' -base_url = '' search_token = '' host = '' @@ -35,75 +23,14 @@ page_size = 1 first_page_num = 1 -def iterate(iterable): - if type(iterable) == dict: - it = iterable.items() - - else: - it = enumerate(iterable) - for index, value in it: - yield str(index), value - - -def is_iterable(obj): - if type(obj) == str: - return False - return isinstance(obj, Iterable) - - -def parse(query): - q = [] - for part in query.split('/'): - if part == '': - continue - else: - q.append(part) - return q - - -def do_query(data, q): - ret = [] - if not q: - return ret - - qkey = q[0] - - for key, value in iterate(data): - - if len(q) == 1: - if key == qkey: - ret.append(value) - elif is_iterable(value): - ret.extend(do_query(value, q)) - else: - if not is_iterable(value): - continue - if key == qkey: - ret.extend(do_query(value, q[1:])) - else: - ret.extend(do_query(value, q)) - return ret - - -def query(data, query_string): - q = parse(query_string) - - return do_query(data, q) - - def request(query, params): query = urlencode({'q': query})[2:] - fp = {'query': query} - if paging and search_url.find('{pageno}') >= 0: - fp['pageno'] = (params['pageno'] - 1) * page_size + first_page_num - headers['Authorization'] = 'Bearer %s' % search_token params['cookies'].update(cookies) params['headers'].update(headers) - params['url'] = host + '?query={pages {search(query: "%s") {results{id title description path locale __typename} suggestions totalHits __typename} __typename }}' % query - print(params['url']) + params['url'] = host + '/graphql?query={pages {search(query: "%s") {results{id title description path locale __typename} suggestions totalHits __typename} __typename }}' % query return params @@ -115,42 +42,27 @@ def response(resp): results = [] json = loads(resp.text) - title_filter = html_to_text if title_html_to_text else identity - content_filter = html_to_text if content_html_to_text else identity + title_filter = identity + content_filter = identity - if results_query: - rs = query(json, results_query) + for search_result in json: + search = search_result['data']['pages']['search'] + rs = search['results'] if not len(rs): return results - for result in rs[0]: + for result in rs: try: - url = query(result, url_query)[0] - title = query(result, title_query)[0] + url = result['path'] + title = result['title'] except: continue - try: - content = query(result, content_query)[0] - except: - content = "" results.append( { - 'url': base_url + to_string(url), - 'title': title_filter(to_string(title)), - 'content': content_filter(to_string(content)), + 'url': host + to_string(url), + 'title': to_string(title), + 'content': '', } ) - else: - for url, title, content in zip(query(json, url_query), query(json, title_query), query(json, content_query)): - results.append( - { - 'url': base_url + to_string(url), - 'title': title_filter(to_string(title)), - 'content': content_filter(to_string(content)), - } - ) - - if not suggestion_query: - return results - for suggestion in query(json, suggestion_query): - results.append({'suggestion': suggestion}) + for suggestion in search['suggestions']: + results.append({'suggestion': suggestion}) return results