feat: v1
This commit is contained in:
9
Dockerfile
Normal file
9
Dockerfile
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
FROM python:3.6
|
||||||
|
WORKDIR ./rss-center
|
||||||
|
|
||||||
|
COPY requirements.txt ./
|
||||||
|
RUN pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
CMD ["python", "./start.py"]
|
93
Rss.py
93
Rss.py
@ -1,27 +1,92 @@
|
|||||||
import feedparser
|
import feedparser
|
||||||
import pprint
|
import pymysql
|
||||||
import re
|
import json
|
||||||
|
|
||||||
class Rss():
|
class Rss():
|
||||||
def __init__(self, id, url, rss_pipe_handler, result_handler) -> None:
|
def __init__(self, id, type, url, result_handler) -> None:
|
||||||
self.rss_url = url
|
self.rss_url = url
|
||||||
self.id = id
|
self.id = id
|
||||||
self.rss_pipe_handler = rss_pipe_handler
|
self.type = type
|
||||||
|
# self.rss_pipe_handler = rss_pipe_handler
|
||||||
self.result_handler = result_handler
|
self.result_handler = result_handler
|
||||||
|
self.rss_db = RssDB()
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
rss_source = feedparser.parse(self.rss_url)
|
rss_source = feedparser.parse(self.rss_url)
|
||||||
result_list = self.rss_pipe_handler(rss_source['entries'])
|
rss_source = self.compare_entries(rss_source['entries'])
|
||||||
|
|
||||||
result_list = self.compare_result(pprint.pprint(result_list))
|
|
||||||
ids = self.result_handler(result_list)
|
|
||||||
|
|
||||||
self.save_result(ids)
|
db_source = self.rss_db.fetchData(self.type, 0)
|
||||||
return
|
result_list = rss_source + [json.loads(i[1].replace("'", '"')) for i in db_source]
|
||||||
|
|
||||||
|
print('need handle rss entry number: ' + str(len(result_list)))
|
||||||
|
# 调用对应类型的处理函数
|
||||||
|
(success_entries, failed_entries) = self.result_handler(result_list)
|
||||||
|
|
||||||
def compare_result(self, list):
|
# 保存处理结果
|
||||||
return list
|
self.save_result(success_entries, failed_entries)
|
||||||
|
|
||||||
def save_result(self, ids):
|
def compare_entries(self, entries):
|
||||||
return ids
|
db_list = self.rss_db.fetchData(self.type, None);
|
||||||
|
success_ids = [i[0] for i in db_list]
|
||||||
|
result = []
|
||||||
|
for item in entries:
|
||||||
|
if not(item['id'] in success_ids):
|
||||||
|
result.append(item)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def save_result(self, success_entries, failed_entries):
|
||||||
|
success_list = [{ 'entry_id': entry['id'], 'entry_content': '', 'type': self.type, 'is_success': 1, 'gmt_create': entry['updated'] } for entry in success_entries]
|
||||||
|
failed_list = [{ 'entry_id': entry['id'], 'entry_content': json.dumps(entry), 'type': self.type, 'is_success': 0, 'gmt_create': entry['updated'] } for entry in failed_entries]
|
||||||
|
|
||||||
|
list = success_list + failed_list
|
||||||
|
for item in list:
|
||||||
|
self.rss_db.updateData(item['entry_id'], item['entry_content'], item['type'], item['is_success'], item['gmt_create'])
|
||||||
|
|
||||||
|
class RssDB():
|
||||||
|
def __init(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def connect(self):
|
||||||
|
return pymysql.connect(host='192.168.124.12', user='rss_server', passwd='96QDm2Ldg^9Ngh', port=15006, db='RSS')
|
||||||
|
|
||||||
|
def fetchData(self, type, is_success):
|
||||||
|
db = self.connect()
|
||||||
|
cursor = db.cursor()
|
||||||
|
sql = ''
|
||||||
|
if is_success is None:
|
||||||
|
sql = 'SELECT entry_id, entry_content FROM rss_log WHERE type=%s'
|
||||||
|
cursor.execute(sql, type)
|
||||||
|
else:
|
||||||
|
sql = 'SELECT entry_id, entry_content FROM rss_log WHERE type=%s AND is_success=%s'
|
||||||
|
cursor.execute(sql, (type, is_success))
|
||||||
|
data = cursor.fetchall()
|
||||||
|
db.close()
|
||||||
|
return data
|
||||||
|
|
||||||
|
def updateData(self, entry_id, entry_content, type, is_success, gmt_create):
|
||||||
|
db = self.connect()
|
||||||
|
cursor = db.cursor()
|
||||||
|
sql = 'SELECT id FROM rss_log WHERE entry_id=%s'
|
||||||
|
cursor.execute(sql, entry_id)
|
||||||
|
key = cursor.fetchone()
|
||||||
|
if key is None:
|
||||||
|
try:
|
||||||
|
sql = 'INSERT INTO rss_log ( entry_id, entry_content, type, is_success, gmt_create ) VALUES (%s, %s, %s, %s, %s)'
|
||||||
|
cursor.execute(sql, (entry_id, entry_content, type, is_success, gmt_create))
|
||||||
|
db.commit()
|
||||||
|
# print('Info: INSERT success' + entry_id)
|
||||||
|
except:
|
||||||
|
db.rollback()
|
||||||
|
print('Error: INSERT failed' + entry_id)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
sql = 'UPDATE rss_log SET entry_id=%s, entry_content=%s, type=%s, is_success=%s, gmt_create=%s WHERE id=%s'
|
||||||
|
cursor.execute(sql, (entry_id, entry_content, type, is_success, gmt_create, key))
|
||||||
|
db.commit()
|
||||||
|
# print('Info: Update success' + entry_id)
|
||||||
|
except:
|
||||||
|
db.rollback()
|
||||||
|
print('Error: Update failed' + entry_id)
|
||||||
|
db.close()
|
||||||
|
|
@ -1,18 +1,32 @@
|
|||||||
import thread
|
from threading import Thread, Timer
|
||||||
|
|
||||||
from Rss import *
|
from Rss import *
|
||||||
|
|
||||||
class RssManager():
|
class RssManager():
|
||||||
def __init__(self, rss_options):
|
def __init__(self, rss_options):
|
||||||
self.rss_list = [Rss(opt['id'], opt['url'], opt['rss_pipe_handler'], opt['result_handler']) for opt in rss_options]
|
self.rss_list = [Rss(opt['id'], opt['type'], opt['url'], opt['result_handler']) for opt in rss_options]
|
||||||
|
self.running = False
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
try:
|
try:
|
||||||
for rss in rss_list:
|
for rss in self.rss_list:
|
||||||
thread.start_new_thread(rss.run)
|
print('Info: start async run')
|
||||||
|
t = Thread(target=rss.run)
|
||||||
|
t.start()
|
||||||
except:
|
except:
|
||||||
print('Error: unable to start thread')
|
print('Error: unable to start thread')
|
||||||
print('Info: start sync run')
|
print('Info: start sync run')
|
||||||
for rss in rss_list:
|
for rss in self.rss_list:
|
||||||
rss.run()
|
rss.run()
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
# self.run()
|
||||||
|
self.running = True
|
||||||
|
while self.running:
|
||||||
|
self.timer = Timer(120, self.run)
|
||||||
|
self.timer.start()
|
||||||
|
self.timer.join()
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
self.running = False
|
||||||
|
|
||||||
|
BIN
__pycache__/Rss.cpython-310.pyc
Normal file
BIN
__pycache__/Rss.cpython-310.pyc
Normal file
Binary file not shown.
BIN
__pycache__/RssManager.cpython-310.pyc
Normal file
BIN
__pycache__/RssManager.cpython-310.pyc
Normal file
Binary file not shown.
@ -1,3 +1,12 @@
|
|||||||
|
import re
|
||||||
|
import os
|
||||||
|
import requests
|
||||||
|
import time
|
||||||
|
from webdav3.client import Client
|
||||||
|
from webdav3.exceptions import LocalResourceNotFound, RemoteResourceNotFound
|
||||||
|
|
||||||
|
requests.packages.urllib3.disable_warnings()
|
||||||
|
|
||||||
def pixiv_handler(entries):
|
def pixiv_handler(entries):
|
||||||
# 整理为JSON数组
|
# 整理为JSON数组
|
||||||
pixiv_list = []
|
pixiv_list = []
|
||||||
@ -9,12 +18,111 @@ def pixiv_handler(entries):
|
|||||||
links = (re.findall(pattern, i['value']))
|
links = (re.findall(pattern, i['value']))
|
||||||
|
|
||||||
tag_pattern = re.compile(r'tags/(.*)/')
|
tag_pattern = re.compile(r'tags/(.*)/')
|
||||||
item = {id: entry['id'], 'title': entry['title'], 'link': links,
|
item = {'id': entry['id'], 'title': entry['title'], 'link': links, 'author': entry['author'], 'tag': re.findall(tag_pattern, entry['source']['id'])}
|
||||||
'author': entry['author'], 'tag': re.findall(tag_pattern, entry['source'].id)}
|
|
||||||
|
|
||||||
pixiv_list.append(item)
|
pixiv_list.append(item)
|
||||||
|
|
||||||
return pixiv_list
|
return pixiv_list
|
||||||
|
|
||||||
def pixiv_result_handler(list):
|
def pixiv_result_handler(entries):
|
||||||
pass
|
list = pixiv_handler(entries)
|
||||||
|
|
||||||
|
success_entries = []
|
||||||
|
failed_entries = []
|
||||||
|
|
||||||
|
need_sync_files = []
|
||||||
|
for item in list:
|
||||||
|
code = item['id']
|
||||||
|
tag = ','.join(item['tag'])
|
||||||
|
title = item['title']
|
||||||
|
author = item['author']
|
||||||
|
prefix_name = f'#{tag}# @{author}@ {title}'
|
||||||
|
|
||||||
|
result_flag = True
|
||||||
|
|
||||||
|
for url in item['link']:
|
||||||
|
file_name_pattern = re.compile(r'\/(\w*\.(?:jpg|png))$')
|
||||||
|
file_name = ','.join(re.findall(file_name_pattern, url))
|
||||||
|
|
||||||
|
if file_name:
|
||||||
|
full_name = f'{prefix_name} {file_name}'
|
||||||
|
(status, data) = downloadPic(url)
|
||||||
|
if status:
|
||||||
|
saveFile(full_name, data)
|
||||||
|
need_sync_files.append({ 'id': item['id'], 'file_name': full_name })
|
||||||
|
else:
|
||||||
|
result_flag = False
|
||||||
|
time.sleep(10)
|
||||||
|
|
||||||
|
if not(result_flag):
|
||||||
|
for entry in entries:
|
||||||
|
if entry['id'] == item['id']:
|
||||||
|
failed_entries.append(entry)
|
||||||
|
(success_ids, failed_ids) = sync(need_sync_files)
|
||||||
|
|
||||||
|
for entry in entries:
|
||||||
|
if entry['id'] in success_ids:
|
||||||
|
success_entries.append(entry)
|
||||||
|
elif entry['id'] in failed_ids:
|
||||||
|
failed_entries.append(entry)
|
||||||
|
|
||||||
|
return (success_entries, failed_entries)
|
||||||
|
|
||||||
|
def sync(file_list):
|
||||||
|
success_ids = []
|
||||||
|
failed_ids = []
|
||||||
|
|
||||||
|
options = {
|
||||||
|
'webdav_hostname': 'https://pan.hiiragi.club:8081/webdav/',
|
||||||
|
'webdav_login': 'mol',
|
||||||
|
'webdav_password': 'YvG4SkF82qd7ks',
|
||||||
|
'disable_check': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
client = Client(options)
|
||||||
|
temp_path = os.path.join(os.path.dirname(__file__), 'temp')
|
||||||
|
|
||||||
|
for file_item in file_list:
|
||||||
|
path = os.path.join(temp_path, file_item['file_name'])
|
||||||
|
try:
|
||||||
|
client.upload('Pictures/ACGN/Pixiv/' + file_item['file_name'], path)
|
||||||
|
print('Info: ' + file_item['file_name'] + ' upload success!!')
|
||||||
|
except LocalResourceNotFound as exception:
|
||||||
|
print('Error: ' + file_item['file_name'] + ' upload failed!!')
|
||||||
|
if not(file_item['id'] in failed_ids):
|
||||||
|
failed_ids.append(file_item['id'])
|
||||||
|
removeFile(path)
|
||||||
|
|
||||||
|
for file_item in file_list:
|
||||||
|
if not(file_item['id'] in failed_ids):
|
||||||
|
success_ids.append(file_item['id'])
|
||||||
|
|
||||||
|
return (success_ids, failed_ids)
|
||||||
|
|
||||||
|
def saveFile(name, data):
|
||||||
|
temp_path = os.path.join(os.path.dirname(__file__), 'temp')
|
||||||
|
with open(os.path.join(temp_path, name), 'wb') as code:
|
||||||
|
code.write(data)
|
||||||
|
|
||||||
|
def removeFile(path):
|
||||||
|
if len(path) > 0:
|
||||||
|
try:
|
||||||
|
os.remove(path)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def downloadPic(url):
|
||||||
|
headers = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE",
|
||||||
|
}
|
||||||
|
data = None
|
||||||
|
status = False
|
||||||
|
try:
|
||||||
|
proxies={'https': 'http://192.168.124.12:7890','http': 'http://192.168.124.12:7890'}
|
||||||
|
res = requests.get(url, headers = headers, verify=False, timeout=(5,5), proxies=proxies)
|
||||||
|
data = res.content
|
||||||
|
status = True
|
||||||
|
res.close()
|
||||||
|
print(f'Info: download success {url}')
|
||||||
|
except:
|
||||||
|
print(f'Error: download failed {url}')
|
||||||
|
return (status, data)
|
BIN
handlers/__pycache__/PixivHandler.cpython-310.pyc
Normal file
BIN
handlers/__pycache__/PixivHandler.cpython-310.pyc
Normal file
Binary file not shown.
BIN
handlers/__pycache__/__init__.cpython-310.pyc
Normal file
BIN
handlers/__pycache__/__init__.cpython-310.pyc
Normal file
Binary file not shown.
@ -1,5 +1,4 @@
|
|||||||
thread
|
|
||||||
feedparser
|
feedparser
|
||||||
pprint
|
|
||||||
requests
|
requests
|
||||||
webdavclient3
|
webdavclient3
|
||||||
|
PyMySQL
|
4
start.py
4
start.py
@ -6,8 +6,8 @@ class CreateRssServer():
|
|||||||
rss_options = [
|
rss_options = [
|
||||||
{
|
{
|
||||||
'id': 'pixiv',
|
'id': 'pixiv',
|
||||||
|
'type': 0,
|
||||||
'url': 'https://rss.hiiragi.club:8081/public.php?op=rss&id=-2&is_cat=0&q=&key=pp9ejw64463b6621a0b',
|
'url': 'https://rss.hiiragi.club:8081/public.php?op=rss&id=-2&is_cat=0&q=&key=pp9ejw64463b6621a0b',
|
||||||
'rss_pipe_handler': pixiv_handler,
|
|
||||||
'result_handler': pixiv_result_handler
|
'result_handler': pixiv_result_handler
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@ -17,4 +17,4 @@ rss_server = CreateRssServer()
|
|||||||
app = rss_server.app
|
app = rss_server.app
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
app.run()
|
app.start()
|
||||||
|
Reference in New Issue
Block a user