From d99d235123d43825f35cdc4c8cb49339d9651056 Mon Sep 17 00:00:00 2001 From: zmc <zmc_li@foxmail.com> Date: 星期五, 22 十二月 2023 11:56:13 +0800 Subject: [PATCH] 1.修改了创建连接数据库引擎时的方言类型 2.修改了爬取数据的逻辑以及相关的异常分析代码 --- py_spider.py | 30 +++++++++--------------------- 1 files changed, 9 insertions(+), 21 deletions(-) diff --git a/py_spider.py b/py_spider.py index fed11c5..bf4c00c 100644 --- a/py_spider.py +++ b/py_spider.py @@ -2,10 +2,9 @@ from flask_cors import CORS import sys -# sys.path.append('D:\\z\workplace\\VsCode\\pyvenv\\venv') sys.path.append('../') -import src.Crawling as Crawling +import src.Crawling_1 as Crawling import src.auto_login as login import pandas as pd from sqlalchemy import create_engine @@ -22,19 +21,17 @@ session = -1 + +# 妯℃嫙鐧婚檰骞剁埇鍙栨暟鎹� @app.route('/getData',methods=['POST']) def get_data(): if request.method == 'POST': data=request.get_json() if(session != -1) : - # if(len(session.cookies.get_dict()) == 0): - # # session澶辨晥 - # return '-2' + result,all_data=Crawling.pass_login(session,data.get('beginTime'),data.get('endTime'),data.get('selectedShopNames')) print('\n\n鐖彇鐨勬墍鏈夌殑allData\n',all_data) duplicate_data,new_data=is_duplicate(all_data) - # if(len(duplicate_data)==0): - # duplicate_data.append('鏃犻噸澶嶆暟鎹�') print('閲嶅鐨勬暟鎹负锛�',duplicate_data) jso ={ # 鍙嶉淇℃伅 @@ -51,24 +48,22 @@ return '-1' return jsonify(jso) -# 鍐欏叆鏁版嵁搴� 鍐欏叆4寮犺〃涓� +# 瀵规暟鎹繘琛屽紓甯稿垎鏋愶紝灏嗙粨鏋滃啓鍏ュ紓甯歌〃锛岃澶囦俊鎭〃锛屽垎閽熸暟鎹〃 @app.route('/store',methods=['POST']) def write_new(): if request.method =='POST': data = request.get_json() - # print('data涓猴細',data.get('allData')) - # print('瑕佸瓨鍏ョ殑鏁版嵁鏉℃暟涓猴細',len(data.get('allData'))) + # 鍐欏叆鏁版嵁搴� w_t_MySql.write(data.get('allData')) return '鍐欏叆瀹屾垚锛�' -# 鍐欏叆鏁版嵁搴� 鍙啓鍏ュ垎閽熸暟鎹〃 + +# 鍙啓鍏ュ垎閽熸暟鎹〃 @app.route('/minute',methods=['POST']) def write_dup(): if request.method =='POST': data = request.get_json() - # print('data涓猴細',data.get('allData')) - # print('瑕佸瓨鍏ョ殑鏁版嵁鏉℃暟涓猴細',len(data.get('allData'))) # 鍐欏叆鏁版嵁搴� w_t_minute.write(data.get('allData')) return '鍐欏叆瀹屾垚锛�' @@ -88,25 +83,18 @@ # lst涓鸿鍜屾暟鎹簱宸插瓨鐨勬暟鎹繘琛屾瘮杈冿紝lst鍏冪礌鍙渶瑕�3涓瓧娈点�� 杩斿洖鍊兼槸閲嶅鐨勬暟鎹� def is_duplicate(lst): temp=copy.deepcopy(lst) - # print('temp',temp) - # print('\n') # 鍙繚瀛�3涓瓧娈� after_address=[] for item in temp: - # print('item',item) - # print('\n') a=[] # 搴楅摵鍚嶅拰璁惧缂栧彿 a.append(item[1]) - # print('item1',item[1]) - # print('\n') a.append(item[2]) # 褰掑睘鏃堕棿 time=str(item[11])+':00' a.append(time) - # print('a',a) - # print('\n') + after_address.append(a) engine = create_engine("mysql+pymysql://fumeRemote:feiyu2023@114.215.109.124:3306/fume?charset=utf8") -- Gitblit v1.9.3