From 2f4742b84cc62f42345ac19468ef1af42ff0d0e2 Mon Sep 17 00:00:00 2001 From: mol Date: Wed, 26 Apr 2023 16:47:36 +0800 Subject: [PATCH] feat: v1 --- Dockerfile | 9 ++ Rss.py | 93 +++++++++++--- RssManager.py | 24 +++- __pycache__/Rss.cpython-310.pyc | Bin 0 -> 3906 bytes __pycache__/RssManager.cpython-310.pyc | Bin 0 -> 1433 bytes handlers/PixivHandler.py | 118 +++++++++++++++++- .../__pycache__/PixivHandler.cpython-310.pyc | Bin 0 -> 3547 bytes handlers/__pycache__/__init__.cpython-310.pyc | Bin 0 -> 149 bytes requirements.txt | 5 +- start.py | 4 +- 10 files changed, 224 insertions(+), 29 deletions(-) create mode 100644 Dockerfile create mode 100644 __pycache__/Rss.cpython-310.pyc create mode 100644 __pycache__/RssManager.cpython-310.pyc create mode 100644 handlers/__pycache__/PixivHandler.cpython-310.pyc create mode 100644 handlers/__pycache__/__init__.cpython-310.pyc diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..1d09453 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,9 @@ +FROM python:3.6 +WORKDIR ./rss-center + +COPY requirements.txt ./ +RUN pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple + +COPY . . + +CMD ["python", "./start.py"] \ No newline at end of file diff --git a/Rss.py b/Rss.py index d49d147..afbc297 100644 --- a/Rss.py +++ b/Rss.py @@ -1,27 +1,92 @@ import feedparser -import pprint -import re +import pymysql +import json class Rss(): - def __init__(self, id, url, rss_pipe_handler, result_handler) -> None: + def __init__(self, id, type, url, result_handler) -> None: self.rss_url = url self.id = id - self.rss_pipe_handler = rss_pipe_handler + self.type = type + # self.rss_pipe_handler = rss_pipe_handler self.result_handler = result_handler + self.rss_db = RssDB() def run(self): rss_source = feedparser.parse(self.rss_url) - result_list = self.rss_pipe_handler(rss_source['entries']) - - result_list = self.compare_result(pprint.pprint(result_list)) - ids = self.result_handler(result_list) + rss_source = self.compare_entries(rss_source['entries']) - self.save_result(ids) - return + db_source = self.rss_db.fetchData(self.type, 0) + result_list = rss_source + [json.loads(i[1].replace("'", '"')) for i in db_source] + + print('need handle rss entry number: ' + str(len(result_list))) + # 调用对应类型的处理函数 + (success_entries, failed_entries) = self.result_handler(result_list) - def compare_result(self, list): - return list + # 保存处理结果 + self.save_result(success_entries, failed_entries) - def save_result(self, ids): - return ids + def compare_entries(self, entries): + db_list = self.rss_db.fetchData(self.type, None); + success_ids = [i[0] for i in db_list] + result = [] + for item in entries: + if not(item['id'] in success_ids): + result.append(item) + return result + + def save_result(self, success_entries, failed_entries): + success_list = [{ 'entry_id': entry['id'], 'entry_content': '', 'type': self.type, 'is_success': 1, 'gmt_create': entry['updated'] } for entry in success_entries] + failed_list = [{ 'entry_id': entry['id'], 'entry_content': json.dumps(entry), 'type': self.type, 'is_success': 0, 'gmt_create': entry['updated'] } for entry in failed_entries] + + list = success_list + failed_list + for item in list: + self.rss_db.updateData(item['entry_id'], item['entry_content'], item['type'], item['is_success'], item['gmt_create']) + +class RssDB(): + def __init(self): + pass + + def connect(self): + return pymysql.connect(host='192.168.124.12', user='rss_server', passwd='96QDm2Ldg^9Ngh', port=15006, db='RSS') + + def fetchData(self, type, is_success): + db = self.connect() + cursor = db.cursor() + sql = '' + if is_success is None: + sql = 'SELECT entry_id, entry_content FROM rss_log WHERE type=%s' + cursor.execute(sql, type) + else: + sql = 'SELECT entry_id, entry_content FROM rss_log WHERE type=%s AND is_success=%s' + cursor.execute(sql, (type, is_success)) + data = cursor.fetchall() + db.close() + return data + + def updateData(self, entry_id, entry_content, type, is_success, gmt_create): + db = self.connect() + cursor = db.cursor() + sql = 'SELECT id FROM rss_log WHERE entry_id=%s' + cursor.execute(sql, entry_id) + key = cursor.fetchone() + if key is None: + try: + sql = 'INSERT INTO rss_log ( entry_id, entry_content, type, is_success, gmt_create ) VALUES (%s, %s, %s, %s, %s)' + cursor.execute(sql, (entry_id, entry_content, type, is_success, gmt_create)) + db.commit() + # print('Info: INSERT success' + entry_id) + except: + db.rollback() + print('Error: INSERT failed' + entry_id) + else: + try: + sql = 'UPDATE rss_log SET entry_id=%s, entry_content=%s, type=%s, is_success=%s, gmt_create=%s WHERE id=%s' + cursor.execute(sql, (entry_id, entry_content, type, is_success, gmt_create, key)) + db.commit() + # print('Info: Update success' + entry_id) + except: + db.rollback() + print('Error: Update failed' + entry_id) + db.close() + \ No newline at end of file diff --git a/RssManager.py b/RssManager.py index b4214d1..6c3dc5f 100644 --- a/RssManager.py +++ b/RssManager.py @@ -1,18 +1,32 @@ -import thread +from threading import Thread, Timer from Rss import * class RssManager(): def __init__(self, rss_options): - self.rss_list = [Rss(opt['id'], opt['url'], opt['rss_pipe_handler'], opt['result_handler']) for opt in rss_options] + self.rss_list = [Rss(opt['id'], opt['type'], opt['url'], opt['result_handler']) for opt in rss_options] + self.running = False def run(self): try: - for rss in rss_list: - thread.start_new_thread(rss.run) + for rss in self.rss_list: + print('Info: start async run') + t = Thread(target=rss.run) + t.start() except: print('Error: unable to start thread') print('Info: start sync run') - for rss in rss_list: + for rss in self.rss_list: rss.run() + def start(self): + # self.run() + self.running = True + while self.running: + self.timer = Timer(120, self.run) + self.timer.start() + self.timer.join() + + def stop(self): + self.running = False + diff --git a/__pycache__/Rss.cpython-310.pyc b/__pycache__/Rss.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b2d93b58bb9b6b6accb2c7c449a787e2de309f00 GIT binary patch literal 3906 zcmbVOTW=f36`q;Ba7j^=Dhs|eZAv&7n}#CAN}5Kg>NW#sN-^~X+I_U-b%@N$n9ni}$uj{dG8@uL?IVV#BiR-^<;{kxODakHzKXPH zOKqozy-?RwnDnF6-*0tg6e{gt-}R5NGiJ)2hNJByjCPfaeN+iTTUf6=^iG+Y#pA`T zmHoIAu5{vPrKjS9Fi2N=hv|OYU4f3bg0P#0Y6V)CdxzTd{dTvV`abDP2pyN3zo_y- zl_rfQ1RNhnKblR~HdpSVld@x6_duNRA)d!p#wEYT^3j2P?BE*=c_M}aU&!Dr6YK1V zUtvQlElDT!(sE`Uae8yaIm~oi$*dFpXEC&gSbaPL6FMh?gAN37Q1CyHA# z0Z3slY6an`&|>**&D+Mzxd4lquH1>*NgBkR-rY;ICmrBA8@xj1Q~mDpohS}kQF0fZ z(Y(Q{-7u8(9OOE1snc~1>)n25CsZHQLAc5;X7pMrArjYAZ>VwxY0W7@}vA*0a#h8P;&s46UqScRz{yDhPF1?u@!K1rO=S zdM@b)L5O+9p1QW%YDb|Qjn7)d!nMgX35hDN@p-Yp7e!6XTZ2+zmVnGO`VpaQG()IQ z&;elzN=ZhvA-qPU8CrX6XlHz#P<1j#TG!xn&$y~*HmK*EfIv?#Wj1iNo9C!O+Zbvr zqGPDxg0qGe1sS;ERDP!`m~`fL>gRa(Dw+X$kqvIpjn1HQ5$MKg8@3|A6e-A|a$CJ# z*p&qyY91eS2|yUj&x|bJ1``o}C^l%jorayWr}`z<{Q@l^1@o}E%{^Y_6)yg6i@~J} z&KOWc9Q`Pkl+oy2vIFrCwhJeQ)jhKG5g%ISjb+{#=LH5&lX__)$Tfuh|2PF+jxnre>StJ^ zOZ}c~rJ+1^v}c^kM{J$VdqIrg2S+o3v(g6db6)GpUMKYf6=Gq7o6o00T_tsnS$WEh zv*b1>QX3N7K*x~aO*VLq3_Rst7ZKnb7st0Bq%wk@!4-;WX`_C)NT5*N`efd-XZZWL_-*^ z>Pl`>iY|>(ZU}-$(fxIG#L%3tMPM)ko1}V+rkc&oG0X`lER%|)3bDW!xF0#&OnrGHcFJNx~)#=`?})$omloG>Q{aLX}=ZaU(_7tD!Q8@e^9i~l%q^cK*5K? zsmXaBt8mZrYSoIO_W{~VKYIBQT0$_5D+d#iP&b%#hPolIADp6oaAg_sr=rNGUc#Lq zqjn!Hpv?3FP>RG4;wYB&DxZYWPB=^2vYwqp0zx@DETbo;3RDtZr z$O(3zGLzeDCCOh=y!2v~YJmzx=u+>nbC^7hbP4{}Mg1|wzww>aL!68_XD+nJ2eSYk z7b=#Vi%sKl<4deTLBLK>I%L){#}joQVvvxm&(swu(6^68=46}-hHgb#lQ)^zMczk& zflPswE;QIDz$0@_wouE^K3Ln@c(if7_v&9g*x1~t zQ|KktaDC6QHa4~oEqdZ3R zHbnalodOm&a}6vdz~W^kK!Fruj9xNEFK1-~)o%xv0M>X0N@b*z%-J<{9&{LBzGB@~ z4uA-vV~3zhU5WZKuM_{ox~~lurF~vYR^D~Zo)bN_b9z9LksfF-k@lj#9Na8`Y|D$_ z9wiony#27bwXylQ{;>J@i!m#1{uFtZ3{{q? zu(+XAjPkJ9F^{!_;K?7>?>*ia@4dA#p)L-1hP~!ZLD%VHnD>*DU|3MQEzuuu{lsL~ zdcJkTGYmCMQ=fqW1vTSt2-e1(PCM0}ilb5W*;+NM72xe<6h1C8$~+gs=2weR4bQ=R9%d#P4VguKNT<0=;&%{8 zDP~e0bn_dUa*g(!I>#iIkrz@ zw|rwnTgyXWav!2^L1k>nB^wIKrFhAPUK5(a$`JEFuE+@gP@Y4YmTmo^_3vq!?qzC3 zagGPsBo83^C#WXgoIhj>K4%F^hDIht@dqOgkK;kdBFGNJ^}WaD?Ae7QZrB_Lu9D_X z3#YxSY~jBa2ZBObkxT2F!#cCxR28MMm zf4`b!{Yh2yYgLW2k?GfmX1^-?O6%LBtTdVG|KIFpeP~--MXt@Ln$+7XR4Wy}!Dcs? zypkj@bCV>STSYZW3%w2F_{?Ke5g~X&D|SUq%Q7$bu;}Zocy7GOLU?m?pFSSF}plxWA>0e>jGZDy5O7{G3R4XidXzd%*7l3 zE#vIKe+s(oz@iVHp7Z3O98P?V5gze5f8#m-U;cFdaWI%&{H=Um-Ro(SD$`5#VL9rd zS5Ps;%3fw>KR;5cQulh(GX1;AdZzm7YaDfF>&xd)jt8oX`fX6FyfliSw;saUz~zZO zwbm?YZMivvpSp}adU*N@lmK)QZ$pQlLqCGuBKym%vjitXpF)xdOK3VIw_r%eYC=L? z13m(lSU@M>`PgevbXGw4ke?0-QTGD8!S1Jj5F`%O1^g66c17C2(7RPsr}f8GUcTe5 zv$GwM!zGArLxm#b9oI$Y^Q^nng~R5XK2F{#LDKZ$suF|8F>eaGj-4|pK~q!JwBd*k z`u%V!Q&kV*6=G_cPO>Ditt6RLa$1nxPLhLZS~NRqor2SqxHi>|)4Aw%Dq_6~phz^; w88O_HAGO<&TOY)MJ=YKhkI3<%L6Ll&$5QzxA#JqP)U-|asp)BSStQoPf2qYZ$^ZZW literal 0 HcmV?d00001 diff --git a/handlers/PixivHandler.py b/handlers/PixivHandler.py index 3608181..c4e92d8 100644 --- a/handlers/PixivHandler.py +++ b/handlers/PixivHandler.py @@ -1,3 +1,12 @@ +import re +import os +import requests +import time +from webdav3.client import Client +from webdav3.exceptions import LocalResourceNotFound, RemoteResourceNotFound + +requests.packages.urllib3.disable_warnings() + def pixiv_handler(entries): # 整理为JSON数组 pixiv_list = [] @@ -9,12 +18,111 @@ def pixiv_handler(entries): links = (re.findall(pattern, i['value'])) tag_pattern = re.compile(r'tags/(.*)/') - item = {id: entry['id'], 'title': entry['title'], 'link': links, - 'author': entry['author'], 'tag': re.findall(tag_pattern, entry['source'].id)} + item = {'id': entry['id'], 'title': entry['title'], 'link': links, 'author': entry['author'], 'tag': re.findall(tag_pattern, entry['source']['id'])} pixiv_list.append(item) - return pixiv_list -def pixiv_result_handler(list): - pass \ No newline at end of file +def pixiv_result_handler(entries): + list = pixiv_handler(entries) + + success_entries = [] + failed_entries = [] + + need_sync_files = [] + for item in list: + code = item['id'] + tag = ','.join(item['tag']) + title = item['title'] + author = item['author'] + prefix_name = f'#{tag}# @{author}@ {title}' + + result_flag = True + + for url in item['link']: + file_name_pattern = re.compile(r'\/(\w*\.(?:jpg|png))$') + file_name = ','.join(re.findall(file_name_pattern, url)) + + if file_name: + full_name = f'{prefix_name} {file_name}' + (status, data) = downloadPic(url) + if status: + saveFile(full_name, data) + need_sync_files.append({ 'id': item['id'], 'file_name': full_name }) + else: + result_flag = False + time.sleep(10) + + if not(result_flag): + for entry in entries: + if entry['id'] == item['id']: + failed_entries.append(entry) + (success_ids, failed_ids) = sync(need_sync_files) + + for entry in entries: + if entry['id'] in success_ids: + success_entries.append(entry) + elif entry['id'] in failed_ids: + failed_entries.append(entry) + + return (success_entries, failed_entries) + +def sync(file_list): + success_ids = [] + failed_ids = [] + + options = { + 'webdav_hostname': 'https://pan.hiiragi.club:8081/webdav/', + 'webdav_login': 'mol', + 'webdav_password': 'YvG4SkF82qd7ks', + 'disable_check': True, + } + + client = Client(options) + temp_path = os.path.join(os.path.dirname(__file__), 'temp') + + for file_item in file_list: + path = os.path.join(temp_path, file_item['file_name']) + try: + client.upload('Pictures/ACGN/Pixiv/' + file_item['file_name'], path) + print('Info: ' + file_item['file_name'] + ' upload success!!') + except LocalResourceNotFound as exception: + print('Error: ' + file_item['file_name'] + ' upload failed!!') + if not(file_item['id'] in failed_ids): + failed_ids.append(file_item['id']) + removeFile(path) + + for file_item in file_list: + if not(file_item['id'] in failed_ids): + success_ids.append(file_item['id']) + + return (success_ids, failed_ids) + +def saveFile(name, data): + temp_path = os.path.join(os.path.dirname(__file__), 'temp') + with open(os.path.join(temp_path, name), 'wb') as code: + code.write(data) + +def removeFile(path): + if len(path) > 0: + try: + os.remove(path) + except: + pass + +def downloadPic(url): + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE", + } + data = None + status = False + try: + proxies={'https': 'http://192.168.124.12:7890','http': 'http://192.168.124.12:7890'} + res = requests.get(url, headers = headers, verify=False, timeout=(5,5), proxies=proxies) + data = res.content + status = True + res.close() + print(f'Info: download success {url}') + except: + print(f'Error: download failed {url}') + return (status, data) \ No newline at end of file diff --git a/handlers/__pycache__/PixivHandler.cpython-310.pyc b/handlers/__pycache__/PixivHandler.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a5752492d24300c7fa42fc4c3ad899a10e02fe36 GIT binary patch literal 3547 zcmZ8kTXPi06`t<7?nNshbFrL@7lU04pj{+!5ro(fVu8!H>=+ysLX>LQ9!VpOW>!73 z5^T*9sz{~$lFD0NOsb^E{D%CKeo9rI_zRR1@|~Vt8Q7^leL3A{x~ISIoU`fJSk=Jq zU;n-%{xV@0|Dn#CKaS1~l&pXdhG4O=T-I!v={{fPx;B?h)I7Gf?PZ(M95Z&d-DP*% zTlSa{^3BR-b-#9CKxYa2&&G0HIKo9eCOlC=-4In#Lp?6)Vhr_!XozvtM+CcT_!BR& z17p!=%DELsVUqh?9s4Tn1o3xamiA;PTuk$Yw3i5V{JU^F&BJ%cJB%z0e;#T&`Yn{~ zDa5)lV1@B}T8gI%V?Ex2+7xus{*uGh~2aFjzRzb(BE-JT^COHn)U);#^?t|>%XV=>8 zGoM|15KQ-PPA@+!9$c8d3VC{Z^+xNHb7-AByK-j5KmV}6tdB_>Ot0OXUU>LM)A!FQ zdpC%CVZWLO>sk9u>#W~aJQB*zqdX3k6ZGaInJ2;vwvy4^4lzNcIqKt^Gvs@>>uw7VKdS*~mve4=c!mMIn~w;SYn zC=*qK)zwkQit=##pN5>m`(9mdZ=~B{dpnKWU72o%oxI(Bf{i9^nPt-*SPW%*BS=IX z%B=k@z3fieYjvNfvGVOhJzaG6K7_?RX0bX7XV&XUUghUmom)))0?N1WaSQ|W2{b;w z0&;SrFrKpk12!~1P~+x+k8EHUHeqGp2)@tICTuX=u`Mi&dZ=;pgNkqp_eVpxMFr9; zydRB!ID@LF3~HEB)mWP!)N{UXiW=6Bku~(JVyvhi5c(QLLv9vTtgaUgtTqSZxn0zs zvlTv=7#t}kWG8ou@#4sH&Wys^ryXEl#kd%If%VKtxM&P&xwl^t;~IOjMJ@Z2M*a9; zvY6Df^_nd%pB5%8OzL?la9%Ae{igQ@NA-%S!xiuC8uIT2Kb-x;(9%(iAG~4Zhk_U8 z8YeO@YLLBR=VcuqS%Xc(>=irJH%~Qh^!bhEJz0aUe{7|FW@Yc}N(&(0?5-ExWZm~q zzhX_FYa^<%1}v^7!FIUtl5X0s>FZS1W*Q}OjO;f^j63$R!m_Q#VDJzBU*d&(qX zPFbB)gzubF)w(iViyrH3tC|e6UYxJ4#lgBVdoor>4$m}Vhy0LiRn~fOTrP65Jji>Q zvP6&v@;%blMjMO-2sqpv+L}?N_<-geE!i=A>yRX*$?iaaBA^6O#bXT=j~z#^&Z}$^ zbwksqs>iB~KcjgQtjersb>{Ipv~~3OGmpzlF!mOy$dS;UYEoGs6**lZlz@8`CdGvp z*1!bSOi<0r*(S)KrIp(UOgM14wg%rf1EpQ<3Qp$P5!T>J+4GTL-!3wU)&{*t$OddajyYB``Ya@zeupYHKaqrQ! z`3v(i?Y;1k2zJ}bgf~=;`@6R<-rZW5pWP8xwz7M0%H!pP)r~aEi3h4a9Ej8PC^=Mj zgDl%irBGuc%7RBAO=lzQYytP-c30!8e-yOHdjLlJ=B?X{rCV+HoiCEL^jfojq}l5d zcbY?-ojlohKbJC<&`peVCG1X~)Z_#gDTPox1yJGVH{?m`x+0Q#qu#1!%Id0eO4G{j z$_UZI#I%x?Rf%X`($udE$xd{Xb2R0mHkWquC`~fubaZNuw zJE5Nb4lCp*P%oflWD^8n+~kN8oH;*v?s0&7678exeURcPKo0qg|F>!p=o=;A$wG(D zJNj8@97d64baG>p?Q<>7+ybN-AkA~+6*Lb2NErfu&i~dx24e0z1uNku3=7Y5lM<`D z#*C4DvIn5ibDt6_jQQrGypA_0zW2y?}*Kf4Qg3f&MTqoW)L^Xu%R*Lw6Vc7xA`gQD5pLZb|sf^AvySl`R(fJ64s zhDY0{wmtw~lw`s;l^KVLK7n$|%lBD%mUlKrZ`wr3D2UW?I}^;oWBnva^Va5QW0cqE z%{@kgTuy{|4*J6N49xy%xF2#robtU*P2wffw+Un2R`#K}kbgwP;tKVm#oW8k^IMRRob=r^OIA~vLU1*+JLPC}9 zWzEHV&C9Ks-!zxLS-O1DZ{F;7<8Uc_^ktN{FU?(P&0TJu`SQ-aufICqjH9ivc^foN z{pPI=NjcNyxz>f&-0a-d*38^&^KP&f$Y`|e4`1B*zBzaK!rjjo`X7+jfzz3}I@_AL zJl~p`y@+z{%KX&}NUm+|laxi3&tKa3X7W1-fX3Yj1B5j2Z8wzB+7sncmmqi{QN8O{EQp~pDXln$fm+sc#SPA|;zOquIpF6ouZ?!+m+*6--ejwD#g zyCgJZXQ=f@2xY=&y^GtApx8mlehq<`_S!Nj@vDNq014vS1oKSie~?VlTz|667BqL3 zFd$Dus7C2b=ird(e@E%iCP#yZ?j$RH7ExY#-Jr7-tRoF}5zXW1(VUtX`P{ugCQ-7U z=_EzRNbPwvpEPduUfwv12EC5k-u3NN^y2VS;z>qB9-=ipb)2QnJ?>3-=e-X-yYBuU DA?jnr literal 0 HcmV?d00001 diff --git a/handlers/__pycache__/__init__.cpython-310.pyc b/handlers/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bffb1df1b2bab961b8cef094fdaaf393f6170a95 GIT binary patch literal 149 zcmd1j<>g`k0-YN76cGIwL?8o3AjbiSi&=m~3PUi1CZpd2 SKczG$)edB0F%ytrVE_Qc`ysLb literal 0 HcmV?d00001 diff --git a/requirements.txt b/requirements.txt index d1b2f93..fd219b5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ -thread feedparser -pprint requests -webdavclient3 \ No newline at end of file +webdavclient3 +PyMySQL \ No newline at end of file diff --git a/start.py b/start.py index 732232f..fb329cc 100644 --- a/start.py +++ b/start.py @@ -6,8 +6,8 @@ class CreateRssServer(): rss_options = [ { 'id': 'pixiv', + 'type': 0, 'url': 'https://rss.hiiragi.club:8081/public.php?op=rss&id=-2&is_cat=0&q=&key=pp9ejw64463b6621a0b', - 'rss_pipe_handler': pixiv_handler, 'result_handler': pixiv_result_handler } ] @@ -17,4 +17,4 @@ rss_server = CreateRssServer() app = rss_server.app if __name__ == '__main__': - app.run() + app.start()