diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..3116c59 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,142 @@ +# ---> Python +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# drone config +.drone.yml diff --git a/.drone.yml b/.drone.yml index 4e3d9eb..a4ab50d 100644 --- a/.drone.yml +++ b/.drone.yml @@ -26,6 +26,7 @@ steps: # 定义管道的执行步骤 registry: https://git.liliyamol.cn:8081 repo: git.liliyamol.cn:8081/mol/rss-server auto_tag: true + tags: ["1.2.0"] --- kind: pipeline # 定义一个管道 @@ -58,6 +59,6 @@ steps: - list=$(docker ps -a | grep rss_server* | awk '{print $1}') - test "$list" = "" && echo "none rss_server containers running" || docker stop $list && docker container rm $list # 过滤出dockerImages的id, 删除none镜像 - - docker run -d -e PYTHONUNBUFFERED=0 --name=rss_server git.liliyamol.cn:8081/mol/rss-server:latest + - docker run -d -e PYTHONUNBUFFERED=0 --env-file /mnt/data/opt/rss-center/.env --name=rss_server git.liliyamol.cn:8081/mol/rss-server:latest - docker rmi $(docker images | grep "none" | awk '{print $3}') - echo ==-----==部署成功==-----== diff --git a/README.md b/README.md index e730308..d2c1bba 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,30 @@ # 更新笔记 +## v1.2.0 + +_Bug fixes_ + +- 自动替换文件名中的非法字符 + +_Features_ + +- 变更上传逻辑 +- 配置外放 + ## v1.1.3 -*Bug fixes* + +_Bug fixes_ + - 修复 docker 容器下 log 打印问题 ## v1.0.1 -*Features* + +_Features_ + - 添加数据库缓存功能 ## v1.0.0 -*Features* -- 完成基本功能开发 \ No newline at end of file + +_Features_ + +- 完成基本功能开发 diff --git a/Rss.py b/Rss.py index 0bfbcb6..576505e 100644 --- a/Rss.py +++ b/Rss.py @@ -1,6 +1,7 @@ import feedparser import pymysql import json +import os class Rss(): def __init__(self, id, type, url, result_handler) -> None: @@ -52,7 +53,7 @@ class RssDB(): pass def connect(self): - return pymysql.connect(host='192.168.124.12', user='rss_server', passwd='96QDm2Ldg^9Ngh', port=15006, db='RSS') + return pymysql.connect(host=os.getenv('db_host'), user=os.getenv('db_user'), passwd=os.getenv('db_passwd'), port=os.getenv('db_port'), db=os.getenv('db_db')) def fetchData(self, type, is_success): db = self.connect() diff --git a/handlers/PixivHandler.py b/handlers/PixivHandler.py deleted file mode 100644 index de2e3d2..0000000 --- a/handlers/PixivHandler.py +++ /dev/null @@ -1,129 +0,0 @@ -import re -import os -import requests -import time -from webdav3.client import Client -from webdav3.exceptions import LocalResourceNotFound, RemoteResourceNotFound - -requests.packages.urllib3.disable_warnings() - -def pixiv_handler(entries): - # 整理为JSON数组 - pixiv_list = [] - for entry in entries: - links = [] - for i in entry['content']: - pattern = re.compile( - r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+') - links = (re.findall(pattern, i['value'])) - - tag_pattern = re.compile(r'tags/(.*)/') - item = {'id': entry['id'], 'title': entry['title'], 'link': links, 'author': entry['author'], 'tag': re.findall(tag_pattern, entry['source']['id'])} - - pixiv_list.append(item) - return pixiv_list - -def pixiv_result_handler(entries): - list = pixiv_handler(entries) - - success_entries = [] - failed_entries = [] - - need_sync_files = [] - for item in list: - code = item['id'] - tag = ','.join(item['tag']) - title = item['title'] - author = item['author'] - prefix_name = f'#{tag}# @{author}@ {title}' - - result_flag = True - - for url in item['link']: - file_name_pattern = re.compile(r'\/(\w*\.(?:jpg|png))$') - file_name = ','.join(re.findall(file_name_pattern, url)) - if file_name: - full_name = f'{prefix_name} {file_name}' - # 替换不符合文件名规范的字符 - full_name = re.sub(r'[\/\\\:\*\?\"\<\>\|]', "_", full_name) - (status, data) = downloadPic(url) - if status: - saveFile(full_name, data) - need_sync_files.append({ 'id': item['id'], 'file_name': full_name }) - else: - result_flag = False - time.sleep(10) - - if not(result_flag): - for entry in entries: - if entry['id'] == item['id']: - failed_entries.append(entry) - (success_ids, failed_ids) = sync(need_sync_files) - - for entry in entries: - if entry['id'] in success_ids: - success_entries.append(entry) - elif entry['id'] in failed_ids: - failed_entries.append(entry) - - return (success_entries, failed_entries) - -def sync(file_list): - success_ids = [] - failed_ids = [] - - options = { - 'webdav_hostname': 'https://pan.liliyamol.cn:8081/webdav/', - 'webdav_login': 'mol', - 'webdav_password': 'YvG4SkF82qd7ks', - 'disable_check': True, - } - - client = Client(options) - temp_path = os.path.join(os.path.dirname(__file__), 'temp') - - for file_item in file_list: - path = os.path.join(temp_path, file_item['file_name']) - try: - client.upload('Pictures/ACGN/Pixiv/' + file_item['file_name'], path) - print('Info: ' + file_item['file_name'] + ' upload success!!') - except LocalResourceNotFound as exception: - print('Error: ' + file_item['file_name'] + ' upload failed!!') - if not(file_item['id'] in failed_ids): - failed_ids.append(file_item['id']) - removeFile(path) - - for file_item in file_list: - if not(file_item['id'] in failed_ids): - success_ids.append(file_item['id']) - - return (success_ids, failed_ids) - -def saveFile(name, data): - temp_path = os.path.join(os.path.dirname(__file__), 'temp') - with open(os.path.join(temp_path, name), 'wb') as code: - code.write(data) - -def removeFile(path): - if len(path) > 0: - try: - os.remove(path) - except: - pass - -def downloadPic(url): - headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE", - } - data = None - status = False - try: - proxies={'https': 'http://192.168.124.12:7890','http': 'http://192.168.124.12:7890'} - res = requests.get(url, headers = headers, verify=False, timeout=(5,5), proxies=proxies) - data = res.content - status = True - res.close() - print(f'Info: download success {url}') - except: - print(f'Error: download failed {url}') - return (status, data) \ No newline at end of file diff --git a/handlers/pixiv/Webdav.py b/handlers/pixiv/Webdav.py new file mode 100644 index 0000000..b242d4c --- /dev/null +++ b/handlers/pixiv/Webdav.py @@ -0,0 +1,56 @@ +import os + +from webdav3.client import Client +from webdav3.exceptions import LocalResourceNotFound, RemoteResourceNotFound + +class Webdav(object): + def __init__(self, option): + webdav_option = { + 'webdav_hostname': option['webdav_hostname'], + 'webdav_login': option['webdav_login'], + 'webdav_password': option['webdav_password'], + 'disable_check': option['disable_check'], + } + self.client = self.connect_webdav(webdav_option) + self.default_upload_path = option['default_upload_path'] + + # 链接webdav + def connect_webdav(self, options): + return Client(options) + + # 批量同步 + def batch_sync(self, file_list, upload_path=None): + # 记录失败与成功的id + success_ids = [] + failed_ids = [] + # 获取临时文件路径 + temp_path = os.path.join(os.path.dirname(__file__), 'temp') + + for file_item in file_list: + path = os.path.join(temp_path, file_item['file_name']) + result_flag = self.sync_file(file_item['file_name'], path, upload_path) + + if result_flag: + success_ids.append(file_item['id']) + else: + failed_ids.append(file_item['id']) + + return (success_ids, failed_ids) + + def sync_file(self, file_name, path, upload_path=None): + result_flag = True + try: + # self.client.upload('Pictures/ACGN/Pixiv/' + file_name, path) + if not(upload_path): + upload_path = self.default_upload_path + + self.client.upload(upload_path + file_name, path) + + print('Info: ' + file_name + ' upload success!!') + except LocalResourceNotFound as exception: + result_flag = False + + print('Error: ' + file_name + ' upload failed!!') + + return result_flag + diff --git a/handlers/pixiv/__init__.py b/handlers/pixiv/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/handlers/pixiv/comb.py b/handlers/pixiv/comb.py new file mode 100644 index 0000000..ebafffe --- /dev/null +++ b/handlers/pixiv/comb.py @@ -0,0 +1,26 @@ +import re + +# 整理数据 +def extract_pixiv_info(entries): + # 整理为JSON数组 + pixiv_list = [] + for entry in entries: + links = [] + for i in entry['content']: + pattern = re.compile( + r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+') + links = (re.findall(pattern, i['value'])) + + tag_pattern = re.compile(r'tags/(.*)/') + item = {'id': entry['id'], 'title': entry['title'], 'link': links, 'author': entry['author'], 'tag': re.findall(tag_pattern, entry['source']['id'])} + + pixiv_list.append(item) + return pixiv_list + +def get_prefix_name_on_entry(entry): + tag = ','.join(entry['tag']) + title = entry['title'] + author = entry['author'] + prefix_name = f'#{tag}# @{author}@ {title}' + + return prefix_name \ No newline at end of file diff --git a/handlers/pixiv/file.py b/handlers/pixiv/file.py new file mode 100644 index 0000000..7b1ce78 --- /dev/null +++ b/handlers/pixiv/file.py @@ -0,0 +1,13 @@ +import os + +def save_file(name, data): + temp_path = os.path.join(os.path.dirname(__file__), 'temp') + with open(os.path.join(temp_path, name), 'wb') as code: + code.write(data) + +def remove_file(path): + if len(path) > 0: + try: + os.remove(path) + except: + pass \ No newline at end of file diff --git a/handlers/pixiv/pixiv_handler.py b/handlers/pixiv/pixiv_handler.py new file mode 100644 index 0000000..ef499b4 --- /dev/null +++ b/handlers/pixiv/pixiv_handler.py @@ -0,0 +1,64 @@ +import re +import time +import os + +from handlers.pixiv.comb import extract_pixiv_info, get_prefix_name_on_entry +from handlers.pixiv.Webdav import Webdav +from handlers.pixiv.request import downloadPic +from handlers.pixiv.file import save_file, remove_file + + +webdav = Webdav({ + 'webdav_hostname': os.getenv('webdav_hostname'), + 'webdav_login': os.getenv('webdav_login'), + 'webdav_password': os.getenv('webdav_password'), + 'disable_check': True, + 'default_upload_path': os.getenv('default_upload_path') +}) + +# 处理结果 +def pixiv_result_handler(entries): + list = extract_pixiv_info(entries) + + success_entries = [] + failed_entries = [] + + for item in list: + prefix_name = get_prefix_name_on_entry(item) + + result_flag = True + + need_sync_files = [] + + for url in item['link']: + file_name_pattern = re.compile(r'\/(\w*\.(?:jpg|png))$') + file_name = ','.join(re.findall(file_name_pattern, url)) + + if file_name: + full_name = f'{prefix_name} {file_name}' + # 替换不符合文件名规范的字符 + full_name = re.sub(r'[\/\\\:\*\?\"\<\>\|]', "_", full_name) + (status, data) = downloadPic(url) + if status: + save_file(full_name, data) + need_sync_files.append({ 'id': item['id'], 'file_name': full_name }) + else: + result_flag = False + time.sleep(10) + + if result_flag: + (success_ids, failed_ids) = webdav.batch_sync(need_sync_files) + + for entry in entries: + if entry['id'] in success_ids: + success_entries.append(entry) + elif entry['id'] in failed_ids: + failed_entries.append(entry) + else: + for entry in entries: + if entry['id'] == item['id']: + failed_entries.append(entry) + + return (success_entries, failed_entries) + + \ No newline at end of file diff --git a/handlers/pixiv/request.py b/handlers/pixiv/request.py new file mode 100644 index 0000000..6a1bd2c --- /dev/null +++ b/handlers/pixiv/request.py @@ -0,0 +1,21 @@ +import requests +import os + +requests.packages.urllib3.disable_warnings() + +def downloadPic(url): + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE", + } + data = None + status = False + try: + proxies={'https': os.getenv('https_proxy'),'http': os.getenv('http_proxy')} + res = requests.get(url, headers = headers, verify=False, timeout=(5,5), proxies=proxies) + data = res.content + status = True + res.close() + print(f'Info: download success {url}') + except: + print(f'Error: download failed {url}') + return (status, data) \ No newline at end of file diff --git a/handlers/temp/#ff14# @御崎悠@ 無題 111145139_p0.jpg b/handlers/temp/#ff14# @御崎悠@ 無題 111145139_p0.jpg deleted file mode 100644 index 0282f77..0000000 Binary files a/handlers/temp/#ff14# @御崎悠@ 無題 111145139_p0.jpg and /dev/null differ diff --git a/start.py b/start.py index 1513ebf..241dbc3 100644 --- a/start.py +++ b/start.py @@ -1,5 +1,7 @@ +import os + from RssManager import * -from handlers.PixivHandler import * +from handlers.pixiv.pixiv_handler import * class CreateRssServer(): def __init__(self): @@ -7,7 +9,7 @@ class CreateRssServer(): { 'id': 'pixiv', 'type': 0, - 'url': 'https://rss.liliyamol.cn:8081/public.php?op=rss&id=-2&is_cat=0&q=&key=pp9ejw64463b6621a0b', + 'url': os.getenv('pixiv_rss_url'), 'result_handler': pixiv_result_handler } ]