From d99d235123d43825f35cdc4c8cb49339d9651056 Mon Sep 17 00:00:00 2001
From: zmc <zmc_li@foxmail.com>
Date: 星期五, 22 十二月 2023 11:56:13 +0800
Subject: [PATCH] 1.修改了创建连接数据库引擎时的方言类型 2.修改了爬取数据的逻辑以及相关的异常分析代码

---
 py_spider.py |   30 +++++++++---------------------
 1 files changed, 9 insertions(+), 21 deletions(-)

diff --git a/py_spider.py b/py_spider.py
index fed11c5..bf4c00c 100644
--- a/py_spider.py
+++ b/py_spider.py
@@ -2,10 +2,9 @@
 from flask_cors import CORS
 
 import sys
-# sys.path.append('D:\\z\workplace\\VsCode\\pyvenv\\venv')
 sys.path.append('../')
 
-import src.Crawling as Crawling
+import src.Crawling_1 as Crawling
 import src.auto_login as login
 import pandas as pd
 from sqlalchemy import create_engine
@@ -22,19 +21,17 @@
 
 session = -1
 
+
+# 妯℃嫙鐧婚檰骞剁埇鍙栨暟鎹�
 @app.route('/getData',methods=['POST'])
 def get_data():
     if request.method == 'POST':
         data=request.get_json()
         if(session != -1) :
-            # if(len(session.cookies.get_dict()) == 0):
-            #     # session澶辨晥
-            #     return '-2'
+
             result,all_data=Crawling.pass_login(session,data.get('beginTime'),data.get('endTime'),data.get('selectedShopNames'))
             print('\n\n鐖彇鐨勬墍鏈夌殑allData\n',all_data)
             duplicate_data,new_data=is_duplicate(all_data)
-            # if(len(duplicate_data)==0):
-            #     duplicate_data.append('鏃犻噸澶嶆暟鎹�')
             print('閲嶅鐨勬暟鎹负锛�',duplicate_data)
             jso ={
                 # 鍙嶉淇℃伅
@@ -51,24 +48,22 @@
             return '-1'
     return jsonify(jso)
 
-# 鍐欏叆鏁版嵁搴� 鍐欏叆4寮犺〃涓�
+# 瀵规暟鎹繘琛屽紓甯稿垎鏋愶紝灏嗙粨鏋滃啓鍏ュ紓甯歌〃锛岃澶囦俊鎭〃锛屽垎閽熸暟鎹〃
 @app.route('/store',methods=['POST'])
 def write_new():
     if request.method =='POST':
         data = request.get_json()
-        # print('data涓猴細',data.get('allData'))
-        # print('瑕佸瓨鍏ョ殑鏁版嵁鏉℃暟涓猴細',len(data.get('allData')))
+
         # 鍐欏叆鏁版嵁搴�
         w_t_MySql.write(data.get('allData'))
         return '鍐欏叆瀹屾垚锛�'
 
-# 鍐欏叆鏁版嵁搴� 鍙啓鍏ュ垎閽熸暟鎹〃
+
+# 鍙啓鍏ュ垎閽熸暟鎹〃
 @app.route('/minute',methods=['POST'])
 def write_dup():
     if request.method =='POST':
         data = request.get_json()
-        # print('data涓猴細',data.get('allData'))
-        # print('瑕佸瓨鍏ョ殑鏁版嵁鏉℃暟涓猴細',len(data.get('allData')))
         # 鍐欏叆鏁版嵁搴�
         w_t_minute.write(data.get('allData'))
         return '鍐欏叆瀹屾垚锛�'
@@ -88,25 +83,18 @@
 # lst涓鸿鍜屾暟鎹簱宸插瓨鐨勬暟鎹繘琛屾瘮杈冿紝lst鍏冪礌鍙渶瑕�3涓瓧娈点�� 杩斿洖鍊兼槸閲嶅鐨勬暟鎹�
 def is_duplicate(lst):
     temp=copy.deepcopy(lst)
-    # print('temp',temp)
-    # print('\n')
     # 鍙繚瀛�3涓瓧娈�
     after_address=[]
     for item in temp:
-        # print('item',item)
-        # print('\n')
         a=[]
         # 搴楅摵鍚嶅拰璁惧缂栧彿
         a.append(item[1])
-        # print('item1',item[1])
-        # print('\n')
 
         a.append(item[2])
         # 褰掑睘鏃堕棿
         time=str(item[11])+':00'
         a.append(time)
-        # print('a',a)
-        # print('\n')
+
         after_address.append(a)
 
     engine = create_engine("mysql+pymysql://fumeRemote:feiyu2023@114.215.109.124:3306/fume?charset=utf8")

--
Gitblit v1.9.3