From d99d235123d43825f35cdc4c8cb49339d9651056 Mon Sep 17 00:00:00 2001 From: zmc <zmc_li@foxmail.com> Date: 星期五, 22 十二月 2023 11:56:13 +0800 Subject: [PATCH] 1.修改了创建连接数据库引擎时的方言类型 2.修改了爬取数据的逻辑以及相关的异常分析代码 --- src/write_to_minute_table.py | 2 test_get_data/get_data.py | 92 ++++ test_get_data/__pycache__/login.cpython-39.pyc | 0 src/__pycache__/Crawling_1.cpython-38.pyc | 0 test_get_data/__pycache__/login.cpython-38.pyc | 0 py_spider.py | 30 src/__pycache__/Crawling.cpython-38.pyc | 0 test_get_data/database_connect.py | 86 +++ Vcode.jpg | 0 test_get_data/url_help.py | 36 + src/Crawling.py | 41 - test_get_data/__pycache__/__init__.cpython-39.pyc | 0 test_get_data/__pycache__/__init__.cpython-38.pyc | 0 src/__pycache__/write_to_minute_table.cpython-38.pyc | 0 test_get_data/__pycache__/request.cpython-38.pyc | 0 Scripts/Vcode.jpg | 0 test_get_data/__init__.py | 0 test_get_data/__pycache__/url_help.cpython-39.pyc | 0 /dev/null | 45 - test_get_data/login.py | 64 ++ test_get_data/__pycache__/request.cpython-39.pyc | 0 src/__pycache__/write_to_MySql.cpython-38.pyc | 0 test_get_data/request.py | 37 + src/Crawling_1.py | 920 ++++++++++++++++++++++++++++++++++++++++ src/write_to_MySql.py | 11 test_get_data/__pycache__/url_help.cpython-38.pyc | 0 26 files changed, 1,262 insertions(+), 102 deletions(-) diff --git a/PackagesInfo.txt b/PackagesInfo.txt deleted file mode 100644 index 8d4c3f7..0000000 --- a/PackagesInfo.txt +++ /dev/null @@ -1,45 +0,0 @@ -altgraph==0.17.3 -async-generator==1.10 -attrs==23.1.0 -baidu-aip==4.16.10 -beautifulsoup4==4.12.2 -bs4==0.0.1 -certifi==2023.5.7 -cffi==1.15.1 -chardet==5.1.0 -charset-normalizer==3.1.0 -DBUtils==2.0 -exceptiongroup==1.1.1 -greenlet==2.0.2 -h11==0.14.0 -idna==3.4 -mysql==0.0.3 -mysql-connector-python==8.0.33 -mysqlclient==2.1.1 -numpy==1.24.3 -outcome==1.2.0 -pandas==2.0.1 -pefile==2023.2.7 -Pillow==9.5.0 -protobuf==3.20.3 -pycparser==2.21 -pyinstaller==5.11.0 -pyinstaller-hooks-contrib==2023.3 -PyMySQL==1.0.3 -PySocks==1.7.1 -python-dateutil==2.8.2 -pytz==2023.3 -pywin32-ctypes==0.2.0 -requests==2.30.0 -selenium==4.9.1 -six==1.16.0 -sniffio==1.3.0 -sortedcontainers==2.4.0 -soupsieve==2.4.1 -SQLAlchemy==2.0.13 -trio==0.22.0 -trio-websocket==0.10.2 -typing-extensions==4.5.0 -tzdata==2023.3 -urllib3==2.0.2 -wsproto==1.2.0 diff --git a/Scripts/Vcode.jpg b/Scripts/Vcode.jpg new file mode 100644 index 0000000..17e63eb --- /dev/null +++ b/Scripts/Vcode.jpg Binary files differ diff --git a/Vcode.jpg b/Vcode.jpg index f032937..ed594ca 100644 --- a/Vcode.jpg +++ b/Vcode.jpg Binary files differ diff --git a/py_spider.py b/py_spider.py index fed11c5..bf4c00c 100644 --- a/py_spider.py +++ b/py_spider.py @@ -2,10 +2,9 @@ from flask_cors import CORS import sys -# sys.path.append('D:\\z\workplace\\VsCode\\pyvenv\\venv') sys.path.append('../') -import src.Crawling as Crawling +import src.Crawling_1 as Crawling import src.auto_login as login import pandas as pd from sqlalchemy import create_engine @@ -22,19 +21,17 @@ session = -1 + +# 妯℃嫙鐧婚檰骞剁埇鍙栨暟鎹� @app.route('/getData',methods=['POST']) def get_data(): if request.method == 'POST': data=request.get_json() if(session != -1) : - # if(len(session.cookies.get_dict()) == 0): - # # session澶辨晥 - # return '-2' + result,all_data=Crawling.pass_login(session,data.get('beginTime'),data.get('endTime'),data.get('selectedShopNames')) print('\n\n鐖彇鐨勬墍鏈夌殑allData\n',all_data) duplicate_data,new_data=is_duplicate(all_data) - # if(len(duplicate_data)==0): - # duplicate_data.append('鏃犻噸澶嶆暟鎹�') print('閲嶅鐨勬暟鎹负锛�',duplicate_data) jso ={ # 鍙嶉淇℃伅 @@ -51,24 +48,22 @@ return '-1' return jsonify(jso) -# 鍐欏叆鏁版嵁搴� 鍐欏叆4寮犺〃涓� +# 瀵规暟鎹繘琛屽紓甯稿垎鏋愶紝灏嗙粨鏋滃啓鍏ュ紓甯歌〃锛岃澶囦俊鎭〃锛屽垎閽熸暟鎹〃 @app.route('/store',methods=['POST']) def write_new(): if request.method =='POST': data = request.get_json() - # print('data涓猴細',data.get('allData')) - # print('瑕佸瓨鍏ョ殑鏁版嵁鏉℃暟涓猴細',len(data.get('allData'))) + # 鍐欏叆鏁版嵁搴� w_t_MySql.write(data.get('allData')) return '鍐欏叆瀹屾垚锛�' -# 鍐欏叆鏁版嵁搴� 鍙啓鍏ュ垎閽熸暟鎹〃 + +# 鍙啓鍏ュ垎閽熸暟鎹〃 @app.route('/minute',methods=['POST']) def write_dup(): if request.method =='POST': data = request.get_json() - # print('data涓猴細',data.get('allData')) - # print('瑕佸瓨鍏ョ殑鏁版嵁鏉℃暟涓猴細',len(data.get('allData'))) # 鍐欏叆鏁版嵁搴� w_t_minute.write(data.get('allData')) return '鍐欏叆瀹屾垚锛�' @@ -88,25 +83,18 @@ # lst涓鸿鍜屾暟鎹簱宸插瓨鐨勬暟鎹繘琛屾瘮杈冿紝lst鍏冪礌鍙渶瑕�3涓瓧娈点�� 杩斿洖鍊兼槸閲嶅鐨勬暟鎹� def is_duplicate(lst): temp=copy.deepcopy(lst) - # print('temp',temp) - # print('\n') # 鍙繚瀛�3涓瓧娈� after_address=[] for item in temp: - # print('item',item) - # print('\n') a=[] # 搴楅摵鍚嶅拰璁惧缂栧彿 a.append(item[1]) - # print('item1',item[1]) - # print('\n') a.append(item[2]) # 褰掑睘鏃堕棿 time=str(item[11])+':00' a.append(time) - # print('a',a) - # print('\n') + after_address.append(a) engine = create_engine("mysql+pymysql://fumeRemote:feiyu2023@114.215.109.124:3306/fume?charset=utf8") diff --git a/src/Crawling.py b/src/Crawling.py index 448d8db..fa332bd 100644 --- a/src/Crawling.py +++ b/src/Crawling.py @@ -120,7 +120,8 @@ count_all=count_all+get_OnePage(i,count) if len(list_temp)==0: #濡傛灉璇ラ〉涓虹┖锛屽垯琛ㄧず璇ラ〉鍚庨潰閮芥棤鏁版嵁 閫�鍑哄惊鐜� print('鍚庨潰椤垫暟涓虹┖锛岀埇鍘讳笅涓�涓簵閾�') - break #閫�鍑哄惊鐜� + break + #閫�鍑哄惊鐜� list_all.extend(list_temp) #灏嗕竴椤垫暟鎹垪琛ㄨ拷鍔犲埌list_all涓� print("鐖彇浜嗙",page,"椤�") page=page+1 @@ -604,11 +605,7 @@ global ck global list_temp #浣跨敤鍏ㄥ眬鍙橀噺 list_temp.clear() #娓呯┖涓存椂琛� - # session.headers = { - # # 姝ゅ娉ㄦ剰cookie锛岃鑷繁鎶撳彇 - # # "Cookie":ck, - # "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36", - # } + r = session.get(url, verify=False).text soup = bs(r,'html.parser') @@ -686,11 +683,7 @@ global list_temp #浣跨敤鍏ㄥ眬鍙橀噺 list_temp.clear() #娓呯┖涓存椂琛� - # session.headers = { - # # 姝ゅ娉ㄦ剰cookie锛岃鑷繁鎶撳彇 - # # "Cookie":ck, - # "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36", - # } + r = session.get(url, verify=False).text soup = bs(r,'html.parser') @@ -707,7 +700,6 @@ print('鍒犻櫎鐗规畩鐨勫悗涓や釜') print(list1) list.append(list1) - #list1.clear() #print(list) list_data=[] @@ -753,36 +745,27 @@ urls=url_more() #杩斿洖鏂囦欢涓墍鏈夊簵閾虹殑url,甯︽渶澶ф樉绀烘潯鏁�100 銆倁rls鏄垪琛� #print(urls) teshu_url=[] - #'shop=%25E9%25A3%259F%25E5%2585%25B6%25E5%25AE%25B6' 椋熷叾瀹� special_url=['shop=%25E4%25BA%25BA%25E7%2594%259F%25E4%25B8%2580%25E4%25B8%25B2','shop=%25E7%25BC%2598%25E5%25AE%25B6','shop=%25E4%25B8%25B0%25E8%258C%2582%25E7%2583%25A4%25E4%25B8%25B2','shop=%25E6%25B3%25B0%25E7%2585%258C%25E9%25B8%25A1','shop=%25E5%25B0%258F%25E9%2593%2581%25E5%2590%259B'] for url in urls: #閬嶅巻鎵�鏈夊簵閾虹殑url begin=url.find('&')+1 end=url.rfind('&') - #print(begin,end) #鎵惧埌鐗规畩鐨剈rl,杩涜鐗规畩澶勭悊 if url[begin:end] in special_url: print('鍙戠幇鐗规畩鐨勶紒') - already_spider_shopnum += 1 #鐖幓鐨勫簵閾烘暟閲忓姞1 + already_spider_shopnum += 1 # 鐖幓鐨勫簵閾烘暟閲忓姞1 teshu_url.append(url) #鍐嶅垹闄や竴涓垪琛ㄥ厓绱� - url_teshu=url_add_time(url,date_begin,date_end) #缁欐墍鏈塽rl鍔犱笂鏃ユ湡 - list_to_MySql=get_MorePages_teshu(url_teshu,page) #搴楅摵鐨剈rl,鐖彇鐨勯〉鏁� - # a=remove_Duplicates_list(list_to_MySql) - # print('\n') - # for item in a: - # print(item) + url_teshu=url_add_time(url,date_begin,date_end) # 缁欐墍鏈塽rl鍔犱笂鏃ユ湡 + list_to_MySql=get_MorePages_teshu(url_teshu,page) # 搴楅摵鐨剈rl,鐖彇鐨勯〉鏁� + if len(list_to_MySql) == 0 : print('璇ュ搴楅摵鏃堕棿娈垫棤鏁版嵁锛屽凡璺宠繃') continue has_remove_duplicates = remove_Duplicates_list(list_to_MySql) #鍘婚櫎鏌愪釜搴楅摵鎸囧畾椤垫暟鎵�鏈夐噸澶嶇殑鏁版嵁 for item in has_remove_duplicates: all_data.append(item) - # is_minutes_exceed_30(has_remove_duplicates,con) # 灏嗘寚瀹氶〉鏁扮殑璁惧鏁呴殰鏁版嵁鍐欏叆鏁版嵁搴撳紓甯歌〃涓� - # isExceeding(has_remove_duplicates,con) # 灏嗘寚瀹氶〉鏁版暟鎹啓鍏ユ暟鎹簱瓒呮爣琛ㄤ腑 - # ea_t_dev(has_remove_duplicates,con) # 灏嗘寚瀹氶〉鏁版暟鎹啓鍏ユ暟鎹簱璁惧淇℃伅琛ㄤ腑 - # fd_t_minbute(has_remove_duplicates,con) #灏嗘寚瀹氶〉鏁版暟鎹啓鍏ユ暟鎹簱鍒嗛挓鏁版嵁琛ㄤ腑 - + list_to_MySql.clear() if url[begin:end]=='shop=%25E9%25A3%259F%25E5%2585%25B6%25E5%25AE%25B6': #椋熷叾瀹� print('鍙戠幇鐗规畩鐨勶紒') @@ -800,11 +783,7 @@ has_remove_duplicates = remove_Duplicates_list(list_to_MySql) #鍘婚櫎鏌愪釜搴楅摵鎸囧畾椤垫暟鎵�鏈夐噸澶嶇殑鏁版嵁 for item in has_remove_duplicates: all_data.append(item) - # is_minutes_exceed_30(has_remove_duplicates,con) # 灏嗘寚瀹氶〉鏁扮殑璁惧鏁呴殰鏁版嵁鍐欏叆鏁版嵁搴撳紓甯歌〃涓� - # isExceeding(has_remove_duplicates,con) # 灏嗘寚瀹氶〉鏁版暟鎹啓鍏ユ暟鎹簱瓒呮爣琛ㄤ腑 - # ea_t_dev(has_remove_duplicates,con) # 灏嗘寚瀹氶〉鏁版暟鎹啓鍏ユ暟鎹簱璁惧淇℃伅琛ㄤ腑 - # fd_t_minbute(has_remove_duplicates,con) #灏嗘寚瀹氶〉鏁版暟鎹啓鍏ユ暟鎹簱鍒嗛挓鏁版嵁琛ㄤ腑 - + list_to_MySql.clear() for t in teshu_url: #浠巙rls琛ㄤ腑鍒犻櫎鐗规畩鐨� urls.remove(t) diff --git a/src/Crawling_1.py b/src/Crawling_1.py new file mode 100644 index 0000000..dd9d834 --- /dev/null +++ b/src/Crawling_1.py @@ -0,0 +1,920 @@ +#sum 澶氶〉 鍏ュ簱鎴愬姛 鐖彇鏂囦欢涓墍鏈夌殑搴楅摵 缃戦〉瀹屾暣琛ㄧ 鍘婚櫎閲嶅鏁版嵁 閬囧埌绌洪〉闈細璺冲埌涓嬩竴瀹跺簵閾� 銆傞亣鍒版煇瀹跺簵閾烘棤鏁版嵁锛岃烦杩囧幓涓嬩竴瀹� +#鐖幓鏌愬搴楅摵鎸囧畾鐨勯〉鏁帮紙涓�椤佃褰曟暟榛樿澶у皬涓�100鏉★級锛屾瘮濡傜埇鍙�12椤碉紝鍒欑埇鍙�12椤靛悗灏嗙粨鏋滀竴娆℃�у啓鍏ユ暟鎹簱 +#鐖幓鏁翠釜椤甸潰琛ㄧ粨鏋勶紝鍐嶅垎鍒啓鍏�4寮犺〃涓紙鐖彇鐨勬暟鎹瓨鍏ヤ袱寮犺〃涓紝杩樻湁瓒呮爣琛� 寮傚父琛級 +#缃戦〉涓婂瓧娈靛叡14涓紝瀛樺叆鏁版嵁搴撴槸15涓紙搴忓彿+14锛� +import requests +from bs4 import BeautifulSoup as bs +import re #姝e垯琛ㄨ揪寮� +from pymysql import * # 杩炴帴mysql鏁版嵁搴� +import pandas as pd +from sqlalchemy import create_engine +import urllib.parse #url鍙岄噸缂栫爜 +import time +import uuid +from datetime import datetime, timedelta + +import sys +sys.path.append('../../') +import src.core_modules.remove_duplicates_methods as rdm + + + +now_date = time.strftime("%Y-%m-%d", time.localtime()) #鑾峰彇褰撳墠骞存湀鏃� #url缂栫爜骞存湀鏃ュ紑濮嬮粯璁ゆ椂闂� +now_date1 = time.strftime("%Y-%m", time.localtime()) +month_begin=now_date1+'-01' #璁剧疆褰撳墠鏈堜唤鐨勫紑濮� + +list_temp=[] #涓存椂鍒楄〃 鍏ㄥ眬鍙橀噺 + + + +def remove_Duplicates_list(list): #鍒楄〃鑷韩鍘婚噸 + global already_spider_datanum + list_store=[] + for item in list: + if item not in list_store: + list_store.append(item) + else: + print("鍙戠幇閲嶅") + already_spider_datanum=already_spider_datanum-1 + #print(list_store) + return list_store + +def merge(list): #鍚堝苟list鍊掓暟鍏釜鍏冪礌 + date_1=str(list.pop(-1)) #鍒犻櫎灏惧厓绱犲悗杩樿兘缁х画浣跨敤鏀瑰厓绱狅紝 + date_2=str(list.pop(-1)) + date1=date_2+' '+date_1 #鍚堝苟涓哄勾鏈堟棩鏃跺垎绉� + + date_3=str(list.pop(-1)) + date_4=str(list.pop(-1)) + date2=date_4+' '+date_3 + + date_5=str(list.pop(-1)) + date_6=str(list.pop(-1)) + date3=date_6+' '+date_5 + list.append(date3) #灏嗗悎骞剁殑鏁版嵁鍐欎細list鍒楄〃缁撳熬. + list.append(date2) + list.append(date1) + + + return list + +def list_url(url,page_num): #url涓殑i鏄〉 ,apge_num琛ㄧず鐖彇鐨勯〉鏁� 銆倁rl鍚庨潰鍔犱笂椤电殑鍙傛暟 + urls = [url+'&page'+'={}'.format(str(i)) for i in range(1,page_num+1)] + return urls # 杩斿洖璇rl瀵瑰簲椤电殑鎵�鏈夐摼鎺ュ舰寮忥紝杩斿洖鍊间负鍒楄〃 + + +def get_OnePage(url,count): #鎶撳彇涓�椤电殑鏁版嵁,鏀惧叆list_data涓�.urls涓鸿璁块棶鐨勭綉椤靛湴鍧� + global list_temp #浣跨敤鍏ㄥ眬鍙橀噺 + + list_temp.clear() #娓呯┖涓存椂琛� + r = session.get(url, verify=False).text + soup = bs(r,'html.parser') + + # 鎵惧埌鎵�鏈夌殑tr鏍囩 + rows = soup.find_all('tr') + + # 鎻愬彇琛ㄦ牸涓殑鏁版嵁 + result = [] #鍒涘缓鍒楄〃鏉ヤ繚瀛樼粨鏋� + for row in rows: + # 鏁版嵁鏉℃暟鍔�1 + count=count+1 + # 淇濆瓨琛ㄦ牸鐨勮鏁版嵁 + data = [] + # 寰楀埌璇ヨ鐨勬墍鏈夊垪鏁版嵁 + cols = row.find_all('td') + # 寰幆姣忎竴鍒� + for col in cols: + if col.find('div'): + # 濡傛灉td涓寘鍚玠iv锛屽垯鍗曠嫭鎻愬彇鍏跺唴瀹� + div_content = col.find('div').text.strip() + # 杩斿洖鍏冪礌鐨勬枃鏈唴瀹� 鎼滅储tag鐨勭洿鎺ュ瓙鑺傜偣 + td_content = ''.join(col.find_all(text=True, recursive=False)).strip() + data.append(td_content) + data.append(div_content) + else: + # 濡傛灉td涓笉鍖呭惈div锛屽垯鐩存帴鎻愬彇td鐨勫唴瀹� + td_content = col.text.strip() + data.append(td_content) + # 鍒犻櫎'鎿嶄綔'锛�'璇︽儏' + del (data[-2:]) + # 鍒犻櫎鍦板潃 + del (data[2]) + result.append(data) + print('涓�椤电殑缁撴灉涓猴細',result) + # 鍒犻櫎琛ㄥご鐨勬爣棰樿 + del (result[0]) + + count=count-1 #鍒犻櫎浜嗚〃澶�,鎬绘暟鎹殑琛屾暟鍑忎竴 + + list_temp=result[:] + + print('-'*10) + print('鑾峰彇鍒扮殑鏁版嵁骞朵笖澶勭悊鍚庝负锛�') + print(list_temp) + print('-'*10) + + # 杩斿洖鑾峰彇鍒版暟鎹殑鏉℃暟 + return count + + + +def get_MorePages(url,page_num): #鐖彇鎸囧畾搴楅摵鍚嶇殑澶氶〉鏁版嵁,apge_num琛ㄧず鐖彇鐨勯〉鏁� + global sleeptime + global already_spider_datanum + urls=list_url(url,page_num) #寰楀埌闇�瑕侀亶鍘嗙殑椤电殑url + count_all=0 #淇濆瓨鏁版嵁鐨勬�昏鏁� + list_all=[] #淇濆瓨鐖彇鐨勬墍鏈夌殑鏁版嵁 + page=1 + for i in urls: + count=0 + count_all=count_all+get_OnePage(i,count) + if len(list_temp)==0: #濡傛灉璇ラ〉涓虹┖锛屽垯琛ㄧず璇ラ〉鍚庨潰閮芥棤鏁版嵁 閫�鍑哄惊鐜� + print('鍚庨潰椤垫暟涓虹┖锛岀埇鍘讳笅涓�涓簵閾�') + break + #閫�鍑哄惊鐜� + list_all.extend(list_temp) #灏嗕竴椤垫暟鎹垪琛ㄨ拷鍔犲埌list_all涓� + print("鐖彇浜嗙",page,"椤�") + page=page+1 + print("\n") + time.sleep(sleeptime) #闂撮殧2绉掕姹備竴娆� + + for j in list_all: + print(j) #鎵撳嵃鍒楄〃涓瘡涓�琛� + print("鎬昏鏁颁负:",count_all) + already_spider_datanum += count_all #宸茬埇鍙栨暟鎹殑鎬诲拰 + + return list_all + + +def url_more(): #杩斿洖鏂囦欢涓摵鍚嶇紪鐮佸舰鎴恥rl,杩斿洖鍊兼槸url鍒楄〃 榛樿鏌ョ湅缃戦〉鐨勬渶澶ф樉绀烘潯鏁�100 + global shopnum + global webshops + shopnames = webshops[:] #淇濆瓨涓枃搴楅摵鍚嶇О + print('url_more:',shopnames) + #缂栫爜 + shopnum=len(shopnames) #鏂囦欢涓簵閾烘�绘暟 + shopname_encoding=[] #淇濆瓨缂栫爜鍚庣殑搴楅摵鍚嶇О + i=0 + for name in shopnames: + shopname_encoding.append(urllib.parse.quote(urllib.parse.quote(shopnames[i]))) #搴楅摵鍚嶇О杩涜鍙岄噸url缂栫爜 + i=i+1 + #鎷兼帴缃戝潃褰㈡垚鍙敤鐨剈rl + urls=[] #淇濆瓨鎷兼帴鍚庣殑url + for shop in shopname_encoding: + url='http://xhhb.senzly.cn/sys/yyRealTimeValue_list.jsp?key1=&shop='+shop+'&pagesize=100' + urls.append(url) + # for i in urls: + # print(i) + return urls #杩斿洖鏂囦欢涓簵閾哄悕绉板搴旂殑url + +#鏍规嵁寮�濮嬪拰缁撴潫鏃ユ湡鏉ユ嫾鎺rl +def url_add_time(url,date_begin=month_begin,date_end=now_date): #url,骞�-鏈�-鏃� 2023-05-03 + url_date=url+'&key5='+date_begin+'&key6='+date_end + print(url_date) + return url_date + +#------------------------------------------------------------------------------------------------------------瓒呮爣娌圭儫鏁版嵁鍐欏叆寮傚父琛ㄤ腑 +#涓ゆ椂闂存槸鍚︾浉宸�10鍒嗛挓 鏄垯杩斿洖TRUE 鍚﹀垯杩斿洖FALSE +def is_time_difference_equals_10_mins(datestr1, datestr2): + date1 = datetime.strptime(datestr1, "%Y-%m-%d %H:%M") + date2 = datetime.strptime(datestr2, "%Y-%m-%d %H:%M") + time_diff = date2 - date1 + + return time_diff == timedelta(minutes = 10) or time_diff == timedelta(minutes = -10) #timedelta() 琛ㄧず涓や釜 date 瀵硅薄鎴栬�� time 瀵硅薄,鎴栬�� datetime 瀵硅薄涔嬮棿鐨勬椂闂撮棿闅� + + +#姣忛殧鍗佸垎閽熶竴娆′负姝e父銆� 鎵惧嚭瓒呰繃10鍒嗛挓鐨勯棿鏂偣 +def find_break_point(list): #list涓鸿秴鏍囨暟鎹殑鍒楄〃 + i=0 + j=1 + break_point = [] #淇濆瓨闂存柇鐐� + for item in list[1:]: + if(is_time_difference_equals_10_mins(list[i][2],item[2]) == False): + break_point.append(j) + i=i+1 + j=j+1 + print('闂存柇鐐逛负锛�') + print(break_point) + + #鍐欏叆闂存柇鐐� + return break_point + + + +#鏍规嵁闂存柇鐐瑰皢鍒楄〃鍒嗗壊鎴愬嚑涓瓙鍒楄〃锛岀敱result杩斿洖 +def point_write(list,b_point): #list涓哄垪琛ㄣ�俠_point鍒楄〃鍏冪礌涓洪棿鏂偣锛岄棿鏂偣鍊间粠灏忓埌澶� + result = [] + last_index = 0 + for index in b_point: + result.append(list[last_index:index]) #鐏垫椿 + last_index=index + result.append(list[last_index:]) + return result + + +#灏嗚澶囨晠闅滀俊鎭啓鍏bnormal_data寮傚父琛ㄤ腑 +def abnormal_write_to_SQL(list,con): + data = pd.DataFrame(list,columns=['dev_id','exception','exception_type','region','begin_time','end_time']) + print("\n\n") + print(data) + + # test3 瑕佸啓鍏ョ殑鏁版嵁琛紝杩欐牱鍐欑殑璇濊鎻愬墠鍦ㄦ暟鎹簱寤哄ソ琛� + data.to_sql(name="abnormal_data", con=con, if_exists="append",index=False,index_label=False) + + + +def exception(list,con): #list涓鸿秴鏍囨暟鎹殑鍒楄〃 + break_point=find_break_point(list) #杩斿洖闂存柇鐐� + split_list=point_write(list,break_point) #鏍规嵁闂存柇鐐瑰皢鍘熷鍒楄〃鍒嗗壊鎴愬嚑涓瓙鍒楄〃 split_list涓轰笁灞傛暟缁�,褰㈠紡涓篬[[1,2],[4,'g']],[[8,'2'],['4','g']],[[1,2],[4,'g']]] + # print('瓒呮爣鏃堕棿娈靛垝鍒嗘垚鐨勫瓙鍒楄〃涓猴細锛�') + # for i in split_list: + # print(i) + print('\n') + abnormal=[] #閲嶇粍濂界殑寮傚父琛ㄦ暟鎹� + + for item in split_list: #浠庡垎鍓茬殑鏁扮粍涓彁鍙栭渶瑕佺殑鏃堕棿淇℃伅锛屽苟娣诲姞鏂扮殑淇℃伅鏁版嵁 + temp=[] + temp.append(item[0][0]) #璁惧缂栧彿 + temp.append('鏁版嵁寮傚父') #璁惧缂栧彿 + temp.append('0') #娌圭儫娴撳害瓒呮爣 + temp.append('寰愭眹鍖�') + temp.append(item[len(item)-1][2]) #鍓嶄竴鏉¤褰曠殑褰掑睘鏃堕棿 寮�濮嬫椂闂� + temp.append(item[0][2]) #褰掑睘鏃堕棿 缁撴潫鏃堕棿 + abnormal.append(temp) + + print(abnormal) + + print('瓒呮爣寮傚父鏃堕棿娈垫暟鎹负锛�') + for j in abnormal: + print(j) + abnormal_write_to_SQL(abnormal,con) #鍐欏叆寮傚父琛ㄤ腑 + print("瓒呮爣娌圭儫鏁版嵁寮傚父琛ㄥ啓鍏ュ畬鎴�!") + +#------------------------------------------------------------------------------------------------------------璁惧鏁呴殰鏁版嵁鍐欏叆寮傚父琛ㄤ腑 +#涓ゆ椂闂存槸鍚︾浉宸�30鍒嗛挓 鏄垯杩斿洖TRUE 鍚﹀垯杩斿洖FALSE +def is_time_difference_equals_30_mins(datestr1, datestr2): + date1 = datetime.strptime(datestr1, "%Y-%m-%d %H:%M") + date2 = datetime.strptime(datestr2, "%Y-%m-%d %H:%M") + time_diff = date2 - date1 + return time_diff > timedelta(minutes=30) + +#鎵惧嚭璁惧鏁呴殰鐨勪俊鎭紝骞跺皢姝や俊鎭啓鍏ュ紓甯歌〃涓� +def is_minutes_exceed_30(list,con) : # list涓烘煇搴楅摵鎸囧畾椤垫暟鐨勫叏閮ㄧ殑璁板綍 list鍏冪礌涓殑鏃堕棿涓哄�掑簭鎺掑垪锛屽嵆浠庡ぇ鍒板皬 + device_failure=[] #瀛樺偍璁惧鏁呴殰鐨勬暟鎹� + startTime = list[0][11] + print('寮�濮嬫椂闂达細',startTime) + for item in list[1:] : + if is_time_difference_equals_30_mins(item[11],startTime) : #蹇呴』澶т簬30鍒嗛挓 涓嶈兘绛変簬30鍒嗛挓 + temp=[] + temp.append(item[2]) #璁惧缂栧彿 + temp.append('璁惧鏁呴殰') #璁惧缂栧彿 + temp.append('1') #璁惧鏁呴殰 + temp.append('寰愭眹鍖�') + temp.append(item[11]) #鏁呴殰寮�濮嬫椂闂� + startTimeSub= datetime.strptime(startTime,"%Y-%m-%d %H:%M") - timedelta(minutes = 10) #缁撴灉涓篸atetime.datetime绫诲瀷 锛岄渶瑕佸啀杞负瀛楃涓茬被鍨� + print('鐩稿噺鍚庣粨鏋滐細',str(startTimeSub)) + print('鐩稿噺鍚庣被鍨嬶細',type(str(startTimeSub))) + temp.append(str(startTimeSub)[:16]) #鏁呴殰缁撴潫鏃堕棿 + device_failure.append(temp) + startTime = item[11] + print('璁惧鏁呴殰鐨勬暟鎹负锛�') + for i in device_failure : + print(i) + not_Key_period_exceed_30_minutes(device_failure,con) #灏嗕緵鐢靛紓甯镐俊鎭啓鍏ュ紓甯歌〃 + #abnormal_write_to_SQL(device_failure,con) #灏嗚澶囨晠闅滀俊鎭啓鍏ュ紓甯歌〃 + print('渚涚數寮傚父/鎺夌嚎淇℃伅鍐欏叆寮傚父琛ㄥ畬鎴�!') +#-----------------------------------------------------------------------------------------------------------渚涚數寮傚父鏁版嵁鍐欏叆寮傚父琛ㄤ腑 +#寮�濮嬪拰缁撴潫鏃堕棿閮藉浜庨潪閲嶇偣鏃舵鏃�,杩斿洖true +def is_time_not_between_key_period(begin_time,end_time) : #褰㈠弬涓烘棩鏈熷瓧绗︿覆,褰㈠ '2023-06-21 14:30' + global Key_period_noon_begin,Key_period_noon_end,Key_period_night_begin,Key_period_night_end + # #涓崍閲嶇偣鏃舵 + # Key_period_noon_begin = datetime.strptime('10:00',"%H:%M") + # Key_period_noon_end = datetime.strptime('14:00',"%H:%M") + + # #鏅氫笂閲嶇偣鏃舵 + # Key_period_night_begin = datetime.strptime('17:00',"%H:%M") + # Key_period_night_end = datetime.strptime('21:00',"%H:%M") + + begin1 = datetime.strptime(begin_time[11:],"%H:%M") + end1 = datetime.strptime(end_time[11:],"%H:%M") + + #褰撳紑濮嬪拰缁撴潫鏃堕棿閮藉浜庨潪閲嶇偣鏃舵鏃讹紝灏嗚鏉℃晠闅滀俊鎭悓鏃惰褰曚负锛� 鐤戜技渚涚數寮傚父 + if ((( begin1 > Key_period_noon_begin and begin1 < Key_period_noon_end ) or ( begin1 > Key_period_night_begin and begin1 < Key_period_night_end )) or (( end1 > Key_period_noon_begin and end1 < Key_period_noon_end ) or ( end1 > Key_period_night_begin and end1 < Key_period_night_end ))) ==False : + print('寮�濮嬫垨缁撴潫鏃堕棿鏃堕棿鍦ㄩ潪閲嶇偣鏃舵') + return True + print('澶勪簬閲嶇偣鏃舵') + return False + +#寮�濮嬪拰缁撴潫鏃堕棿閮藉浜庨噸鐐规椂娈垫椂,杩斿洖true +def is_time_between_key_period(begin_time,end_time) : #褰㈠弬涓烘棩鏈熷瓧绗︿覆,褰㈠ '2023-06-21 14:30' + global Key_period_noon_begin,Key_period_noon_end,Key_period_night_begin,Key_period_night_end + # #涓崍閲嶇偣鏃舵 + # Key_period_noon_begin = datetime.strptime('10:00',"%H:%M") + # Key_period_noon_end = datetime.strptime('14:00',"%H:%M") + + # #鏅氫笂閲嶇偣鏃舵 + # Key_period_night_begin = datetime.strptime('17:00',"%H:%M") + # Key_period_night_end = datetime.strptime('21:00',"%H:%M") + + begin1 = datetime.strptime(begin_time[11:],"%H:%M") + end1 = datetime.strptime(end_time[11:],"%H:%M") + + #褰撳紑濮嬪拰缁撴潫鏃堕棿閮藉浜庨噸鐐规椂娈垫椂锛屽皢璇ユ潯鏁呴殰淇℃伅鍚屾椂璁板綍涓猴細 鎺夌嚎 + if ((begin1 > Key_period_noon_begin and begin1 < Key_period_noon_end) and ( end1 > Key_period_noon_begin and end1 < Key_period_noon_end )) or ( (begin1 > Key_period_night_begin and begin1 < Key_period_night_end) and ( end1 > Key_period_night_begin and end1 < Key_period_night_end )) : + print('寮�濮嬫垨缁撴潫鏃堕棿澶勪簬閲嶇偣鏃舵') + return True + print('澶勪簬闈為噸鐐规椂娈�') + return False + + + +def not_Key_period_exceed_30_minutes(list,con) : #list涓鸿澶囨晠闅滅殑鏃堕棿娈垫暟鎹� + power_supply_abnormal = [] #淇濆瓨渚涚數寮傚父鎴栨帀绾跨殑淇℃伅 + for item in list : + if is_time_not_between_key_period(item[4],item[5]) : #else: + temp = [] + temp.append(item[0]) + temp.append('璁惧鏁呴殰') + temp.append('1') #鐤戜技渚涚數寮傚父 + temp.append('寰愭眹鍖�') + temp.append(item[4]) + temp.append(item[5]) + power_supply_abnormal.append(temp) + elif is_time_between_key_period(item[4],item[5]) : + temp = [] + temp.append(item[0]) + temp.append('璁惧鏁呴殰') + temp.append('2') #鎺夌嚎 + temp.append('寰愭眹鍖�') + temp.append(item[4]) + temp.append(item[5]) + power_supply_abnormal.append(temp) + print('渚涚數寮傚父鐨勬暟鎹负锛�') + for i in power_supply_abnormal : + print(i) + + #灏嗕緵鐢靛紓甯哥殑淇℃伅鍐欏叆鏁版嵁搴撳紓甯歌〃涓� + abnormal_write_to_SQL(power_supply_abnormal,con) #灏嗚澶囨晠闅滀俊鎭啓鍏ュ紓甯歌〃 + print('渚涚數寮傚父鐨勪俊鎭啓鍏ュ紓甯歌〃瀹屾垚!') + + + +#------------------------------------------------------------------------------------------------------------鍐欏叆瓒呮爣琛ㄤ腑 + +#杩斿洖閲嶇粍鍚庣殑鍒楄〃 +def refind_ex(list): #list涓虹綉椤电殑涓�鏉¤褰� + temp=[] + temp.append(list[2]) #璁惧缂栧彿 + temp.append(list[12]) #涓婃姤鏃堕棿 + temp.append(list[11]) #褰掑睘鏃堕棿 + temp.append(list[6]) #椋庢満鐢垫祦 6 + temp.append(list[7]) #鍑�鍖栧櫒鐢垫祦7 + temp.append(list[4]) #杩涙补鐑熸祿搴﹀�� + temp.append(list[5]) #鎺掓补鐑熸祿搴﹀�� + + print(temp) + return temp + + +#灏嗗垪琛ㄥ啓鍏xceeding_st_data琛ㄤ腑 +def ex_write_to_SQL(list,con): + data = pd.DataFrame(list,columns=['MV_Stat_Code','MV_Create_Time','MV_Data_Time','MV_Fan_Electricity','MV_Purifier_Electricity','MV_Fume_Concentration','MV_Fume_Concentration2']) + print("\n\n") + print(data) + #engine = create_engine("mysql+pymysql://root:1234@localhost:3306/fume?charset=utf8") + #con = engine.connect() + + # test3 瑕佸啓鍏ョ殑鏁版嵁琛紝杩欐牱鍐欑殑璇濊鎻愬墠鍦ㄦ暟鎹簱寤哄ソ琛� + data.to_sql(name="exceeding_st_data", con=con, if_exists="append",index=False,index_label=False) + #con.close() + print("瓒呮爣琛ㄥ啓鍏ュ畬鎴�!") + + +# list涓烘煇搴楅摵鎸囧畾椤垫暟鐨勫叏閮ㄧ殑璁板綍 灏嗚秴鏍囨暟鎹啓鍏ヨ秴鏍囪〃 +def isExceeding(list,con): #list涓烘煇搴楅摵鎸囧畾椤垫暟鐨勫叏閮ㄧ殑璁板綍 list鍏冪礌涓哄垪琛ㄥ舰寮� + exceedingData=[] #淇濆瓨瓒呮爣鐨勬暟鎹� + for item in list: #鏌ユ壘瓒呮爣鐨勬暟鎹紝骞惰褰曚笅 + if float(item[5]) > 1: # 鎺掔儫娴撳害澶т簬1鍒欒秴鏍� + print("璇ユ潯鏁版嵁瓒呮爣") + #淇濆瓨璇ユ潯璁板綍锛屾彁鍙栭渶瑕佺殑鍊硷紝骞舵坊鍔犲叾浠栧瓧娈� + exceedingData.append(refind_ex(item)) + + + for i in exceedingData: #閬嶅巻鍒楄〃 + print(i) + + if(len(exceedingData) != 0) : #鏈夎秴鏍囨暟鎹椂鎵嶆墽琛� + #灏嗚秴鏍囨暟鎹椂闂村垎绫诲啀鍐檃bnormal_data寮傚父琛ㄤ腑 + exception(exceedingData,con) + + #灏嗚秴鏍囨暟鎹洿鎺ュ啓鍏ユ暟鎹簱瓒呮爣琛ㄤ腑 + ex_write_to_SQL(exceedingData,con) + else: + print('璇ュ簵閾烘棤瓒呮爣鏁版嵁') + + +#------------------------------------------------------------------------------------------------------------鏁版嵁鍐欏叆璁惧淇℃伅琛� +def generate_short_uuid(): + arrayOf=[ + "a", + "b", + "c", + "d", + "e", + "f", + "g", + "h", + "i", + "j", + "k", + "l", + "m", + "n", + "o", + "p", + "q", + "r", + "s", + "t", + "u", + "v", + "w", + "x", + "y", + "z", + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "H", + "I", + "J", + "K", + "L", + "M", + "N", + "O", + "P", + "Q", + "R", + "S", + "T", + "U", + "V", + "W", + "X", + "Y", + "Z" + ] + list=[] + ui=str(uuid.uuid4()).replace('-', '') + for i in range(0,16): + a1=ui[i*2:i*2+2] + x=int(a1,16) + list.append(arrayOf[x % 0x3E]) + return ''.join(list) + + +#杩斿洖閲嶇粍鍚庣殑鍒楄〃 +def refind_ea(list): #涓�鏉¤褰曪紝涔熷氨鏄竴涓垪琛� + temp=[] + temp.append(generate_short_uuid()) + temp.append(list[2]) + temp.append(list[1]) + temp.append(list[0]) + temp.append(1) + print(temp) + return temp + +#灏嗗垪琛ㄥ啓鍏ヨ澶囦俊鎭澶囦俊鎭痚a_t_dev琛ㄤ腑 +def ea_write_to_SQL(list,con): + data = pd.DataFrame(list,columns=['DI_GUID','DI_Code','DI_Name','DI_Supplier','DI_Online']) + print("\n\n") + print('鍐欏叆鏁版嵁琛� 锛孌ateFrame涓猴細',data) + + # test3 瑕佸啓鍏ョ殑鏁版嵁琛紝杩欐牱鍐欑殑璇濊鎻愬墠鍦ㄦ暟鎹簱寤哄ソ琛� + data.to_sql(name="ea_t_device_info", con=con, if_exists="append",index=False,index_label=False) + print("璁惧淇℃伅琛ㄥ啓鍏ュ畬鎴�!") + + +def dev_info_data_if_exisitd(list,con): #list涓虹埇鍙栨煇瀹跺簵閾烘寚瀹氶〉鏁拌浆鎹㈠悗鐨勬暟鎹� + global con_read + #鍒涘缓绗簩涓暟鎹簱杩炴帴 + # engine = create_engine("mysql+pymysql://root:1234@localhost:3306/fume?charset=utf8") + # con_read = engine.connect() + + df = pd.read_sql('SELECT DI_Code,DI_Name,DI_Supplier FROM ea_t_device_info',con=con_read) #浠庤澶囦俊鎭〃涓鍙栬澶囩紪鍙凤紝搴楅摵鍚嶏紝渚涘簲鍟嗗瓧娈电殑鏁版嵁銆傝繑鍥炲�兼槸DateFrame绫诲瀷 + # con_read.close() #鍏抽棴閾炬帴 + + res = df.values.tolist() #DateFrame鎸夌収琛岃浆鎴恖ist绫诲瀷锛宺es瀛樻斁鐨勬槸璁惧淇℃伅琛ㄤ腑鐨勬暟鎹� + print('******** 璁惧淇℃伅******') + for i in res: + print(i) + print('璁惧淇℃伅琛ㄨ褰曟潯鏁颁负锛�',len(res)) + + list1 = rdm.remove_duplicates_dev_info(list) #璁惧缂栧彿锛屽簵閾哄悕锛屼緵搴斿晢鐩哥瓑鏃讹紝鍒欎负閲嶅锛屽幓闄ゃ�俵ist1涓哄幓閲嶅悗鐨� + if len(res) > 0 : #璁惧琛ㄤ腑鏈夋暟鎹� + #姣旇緝 + temp=list1[:] #灏唋ist1鏁版嵁缁檛emp锛岄亶鍘唗emp,鑻ョ浉绛夛紝浠巐ist涓垹闄ゆ暟鎹紝閬垮厤涓�涓垪琛ㄥ悓鏃堕亶鍘嗕笖鍒犻櫎 + print('鍘婚櫎閲嶅涓�:') + print(list1) + for item in temp: + if item[1:4] in ( x[:] for x in res ) : #寰呭瓨鍏ユ暟鎹簱鐨勫�间笌璁惧琛ㄤ腑鏁版嵁鐩哥瓑鏃�,灏嗗緟瀛樺叆鐨勫�间粠list涓Щ闄� + list1=rdm.remove_given_data_dev_info(list1,item[1:4]) #璇tem浠巐ist1涓Щ闄� + + print('璁惧淇℃伅琛ㄤ腑鏈夋暟鎹椂锛屽幓閲嶅悗鐨刲ist涓猴細',list1) + if( len(list1) != 0 ) : #鍒犻櫎鍚庝笉涓虹┖鏃讹紝鍐欏叆 + ea_write_to_SQL(list1,con) #灏嗗垪琛ㄥ啓鍏a_t_dev琛ㄤ腑 + else : #璁惧琛ㄤ腑鏃犳暟鎹� + # a=rdm.remove_duplicates_dev_info(list) #璁惧缂栧彿锛屽簵閾哄悕锛屼緵搴斿晢鐩哥瓑鏃讹紝鍒欎负閲嶅锛屽幓闄� + print('璁惧琛ㄦ棤鏁版嵁锛屽鐞嗗悗寰呭啓鍏ョ殑璁惧淇℃伅涓猴細',list1) + #灏嗗幓閲嶅悗鏁版嵁鍐欏叆璁惧淇℃伅琛� + ea_write_to_SQL(list1,con) #灏嗗垪琛ㄥ啓鍏ヨ澶囪〃涓� 銆� 绗竴涓弬鏁帮細璁惧缂栧彿锛屽簵閾哄悕锛屼緵搴斿晢鐩哥瓑鏃讹紝鍒欎负閲嶅锛屽幓闄� + + + +#灏嗗師濮嬫暟鎹浆鍖栨垚鏂扮殑鍒楄〃锛屽啀鍐欏叆璁惧淇℃伅璁惧淇℃伅琛ㄤ腑 /瀛樺叆 +def ea_t_dev(list,con): #鏌愬搴楅摵鐨勫埗瀹氶〉鐨勬暟鎹褰� 锛宭ist鍒楄〃鍏冪礌渚濈劧涓哄垪琛紝姣斿[[1,2,3,'a'],[52,3,'a'],[6,2,3,'a']] 锛宑on涓烘暟鎹簱鐨勫缓绔� + staging=[] #琛ㄧず杞崲鍚庣殑鍒楄〃 + for item in list: + #鎻愬彇闇�瑕佺殑鍊硷紝骞舵坊鍔犲叾浠栧瓧娈� + staging.append(refind_ea(item)) #杞寲 + print('璁惧鏁版嵁杞寲鍚庯細') + for i in staging: + print(i) + + #鏌ヨ璁惧琛ㄥ凡瀛樼殑鏁版嵁锛岃嫢宸插瓨鍦ㄨ澶囦俊鎭紝鍒欎笉鍐欏叆 + dev_info_data_if_exisitd(staging,con) + + +#----------------------------------鍐欏叆鍒嗛挓鏁版嵁琛� + +#杩斿洖閲嶇粍鍚庣殑鍒楄〃 +def refind_fd(list): #涓�鏉¤褰曪紝涔熷氨鏄竴涓垪琛� + temp=[] + temp.append(list[2]) #璁惧缂栧彿 + temp.append(list[12]) #涓婃姤鏃堕棿 + temp.append(list[11]) #褰掑睘鏃堕棿 + temp.append(list[6]) #椋庢満鐢垫祦 6 + temp.append(list[7]) #鍑�鍖栧櫒鐢垫祦 7 + temp.append(list[4]) #杩涙补鐑熸祿搴﹀�� + temp.append(list[5]) #鎺掓补鐑熸祿搴﹀�� + + print(temp) + return temp + + +#灏嗗垪琛ㄥ啓鍏ュ垎閽熸暟鎹〃涓� +def fd_write_to_SQL(list,con): + data = pd.DataFrame(list,columns=['MV_Stat_Code','MV_Create_Time','MV_Data_Time','MV_Fan_Electricity','MV_Purifier_Electricity','MV_Fume_Concentration','MV_Fume_Concentration2']) + print("鍐欏叆鍒嗘暟鏁版嵁琛�,DateFrame涓猴細") + print(data) + + # test3 瑕佸啓鍏ョ殑鏁版嵁琛紝杩欐牱鍐欑殑璇濊鎻愬墠鍦ㄦ暟鎹簱寤哄ソ琛� + data.to_sql(name="fd_t_minutevalue", con=con, if_exists="append",index=False,index_label=False) + + print("鍒嗛挓鏁版嵁琛ㄥ啓鍏ュ畬鎴�!") + +#杞寲 鍐嶅啓鍏d_t_minbute琛ㄤ腑 +def fd_t_minbute(list,con): #涓�椤电殑鏁版嵁璁板綍 锛宑on涓烘暟鎹簱鐨勫缓绔� + staging=[] #淇濆瓨杞崲鍚庣殑鍒楄〃 + for item in list: + #鎻愬彇闇�瑕佺殑鍊硷紝骞舵坊鍔犲叾浠栧瓧娈� + staging.append(refind_fd(item)) + print('鍒嗛挓鏁版嵁杞寲鍚庯細') + for i in staging: + print(i) + fd_write_to_SQL(staging,con) #灏嗗垪琛ㄥ啓鍏a_t_dec琛ㄤ腑 + + +#--------------------------------------------------------------------------------------------------------------椋熷叾瀹� +def get_OnePage_teshu_shiqijia(url,count): + global ck + global list_temp #浣跨敤鍏ㄥ眬鍙橀噺 + list_temp.clear() #娓呯┖涓存椂琛� + # session.headers = { + # # 姝ゅ娉ㄦ剰cookie锛岃鑷繁鎶撳彇 + # # "Cookie":ck, + # "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36", + # } + r = session.get(url, verify=False).text + soup = bs(r,'html.parser') + + list=[] #鍒涘缓鍒楄〃鏉ヤ繚瀛樼粨鏋� + + tags = soup.find_all("tr") # 鍒楄〃鎵�鏈夎 + for tag in tags: # 姣忎釜tag鏄竴琛� + count=count+1 + element = tag.text # 鑾峰彇<tr>鏍囩鍐呮墍鏈夋枃鏈俊鎭� + element = element.strip() # 灏嗗瓧绗︿覆棣栧熬绌烘牸鍘婚櫎 + list1 = element.split(); # 浠ョ┖鏍间负鍒嗛殧灏嗗瓧绗︿覆鍙樹负鍒楄〃 + + del (list1[-2:]) #鍒楄〃鏈�鍚庝袱涓厓绱犱笉闇�瑕�,鍒犻櫎 + print('鍒犻櫎鐗规畩鐨勫悗涓や釜') + print(list1) + + str_temp1=list1[4]+list1[5] #绗�5,6鍚堝苟涓や釜鍏冪礌涓轰竴涓� + print(str_temp1) + del list1[5] + list1[4]=str_temp1 + print("鍏冪礌鍚堝苟瀹屾垚") + print(list1) + + str_temp2=list1[1]+list1[2] #绗簩涓夊厓绱犲悎骞跺畬鎴� + del list1[2] + list1[1]=str_temp2 + + list.append(list1) + print("鏈�缁堣鏁版嵁") + print(list1) + #list1.clear() + + #print(list) + list_data=[] + for i in list: #宸插皢灏炬棩鏈熸暟鎹悎骞舵垚骞存湀鏃� 鏃跺垎绉� + list_data.append(merge(i)) + del list_data[0] #鍒犻櫎鏂囧瓧琛ㄥご + count=count-1 #鍒犻櫎浜嗚〃澶�,鎬绘暟鎹殑琛屾暟鍑忎竴 + #print(list_data) + #list_temp=remove_Duplicates_list(list_data)[:] #灏嗘墍鏈夋暟鎹鍒剁粰涓存椂鍒楄〃list_temp 鏄幓闄ら噸澶嶅悗鐨勫垪琛� + list_temp=list_data[:] + return count + + +def get_MorePages_teshu_shiqijia(url,page_num): + global sleeptime + global already_spider_datanum + urls=list_url(url,page_num) #寰楀埌闇�瑕侀亶鍘嗙殑椤电殑url + count_all=0 #淇濆瓨鏁版嵁鐨勬�昏鏁� + list_all=[] #淇濆瓨鐖彇鐨勬墍鏈夌殑鏁版嵁 + page=1 + for i in urls: + count=0 + count_all=count_all+get_OnePage_teshu_shiqijia(i,count) + if len(list_temp)==0: #濡傛灉璇ラ〉涓虹┖锛屽垯琛ㄧず璇ラ〉鍚庨潰閮芥棤鏁版嵁 閫�鍑哄惊鐜� + print('鍚庨潰椤垫暟涓虹┖锛岀埇鍘讳笅涓�涓簵閾�') + break + list_all.extend(list_temp) #灏嗗垪琛ㄨ拷鍔犲埌list_all涓� + print("鐖彇浜嗙",page,"椤�") + page=page+1 + print("\n") + time.sleep(sleeptime) #闂撮殧2绉掕姹備竴娆� + + for j in list_all: + print(j) #鎵撳嵃鍒楄〃涓瘡涓�琛� + print("鎬昏鏁颁负:",count_all) + already_spider_datanum += count_all #宸茬埇鍙栨暟鎹殑鎬诲拰 + return list_all + + + +#-------------------------------------------------------------------------------------------------------------鐗规畩鐨剈rl +def get_OnePage_teshu(url,count): #鎶撳彇涓�椤电殑鏁版嵁,鏀惧叆list_data涓�.urls涓鸿璁块棶鐨勭綉椤靛湴鍧� + # global ck + global list_temp #浣跨敤鍏ㄥ眬鍙橀噺 + + list_temp.clear() #娓呯┖涓存椂琛� + # session.headers = { + # # 姝ゅ娉ㄦ剰cookie锛岃鑷繁鎶撳彇 + # # "Cookie":ck, + # "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36", + # } + r = session.get(url, verify=False).text + soup = bs(r,'html.parser') + + list=[] #鍒涘缓鍒楄〃鏉ヤ繚瀛樼粨鏋� + + tags = soup.find_all("tr") # 鍒楄〃鎵�鏈夎 + for tag in tags: # 姣忎釜tag鏄竴琛� + count=count+1 + element = tag.text # 鑾峰彇<tr>鏍囩鍐呮墍鏈夋枃鏈俊鎭� + element = element.strip() # 灏嗗瓧绗︿覆棣栧熬绌烘牸鍘婚櫎 + list1 = element.split(); # 浠ョ┖鏍间负鍒嗛殧灏嗗瓧绗︿覆鍙樹负鍒楄〃 + + del (list1[-2:]) #鍒楄〃鏈�鍚庝袱涓厓绱犱笉闇�瑕�,鍒犻櫎 + print('鍒犻櫎鐗规畩鐨勫悗涓や釜') + print(list1) + list.append(list1) + #list1.clear() + + #print(list) + list_data=[] + for i in list: + list_data.append(merge(i)) #灏嗗熬鏃ユ湡鏁版嵁鍚堝苟鎴愬勾鏈堟棩 鏃跺垎绉� + del list_data[0] #鍒犻櫎鏂囧瓧琛ㄥご + count=count-1 #鍒犻櫎浜嗚〃澶�,鎬绘暟鎹殑琛屾暟鍑忎竴 + #print(list_data) + #list_temp=remove_Duplicates_list(list_data)[:] #灏嗘墍鏈夋暟鎹鍒剁粰涓存椂鍒楄〃list_temp 鏄幓闄ら噸澶嶅悗鐨勫垪琛� + list_temp=list_data[:] + return count + + +def get_MorePages_teshu(url,page_num): #鐖彇鎸囧畾搴楅摵鍚嶇殑澶氶〉鏁版嵁,pge_num琛ㄧず鐖彇鐨勯〉鏁� + global sleeptime + global already_spider_datanum + urls=list_url(url,page_num) #寰楀埌闇�瑕侀亶鍘嗙殑椤电殑url 杩斿洖璇rl瀵瑰簲椤电殑鎵�鏈夐摼鎺ュ舰寮忥紝杩斿洖鍊间负鍒楄〃 + count_all=0 #淇濆瓨鏁版嵁鐨勬�昏鏁� + list_all=[] #淇濆瓨鐖彇鐨勬墍鏈夌殑鏁版嵁 + page=1 + for i in urls: + count=0 + count_all=count_all+get_OnePage_teshu(i,count) + if len(list_temp)==0: #濡傛灉璇ラ〉涓虹┖锛屽垯琛ㄧず璇ラ〉鍚庨潰閮芥棤鏁版嵁 閫�鍑哄惊鐜� + print('鍚庨潰椤垫暟涓虹┖锛岀埇鍘讳笅涓�涓簵閾�') + break + list_all.extend(list_temp) #灏嗗垪琛ㄨ拷鍔犲埌list_all涓� + print("鐖彇浜嗙",page,"椤�") + page=page+1 + print("\n") + time.sleep(sleeptime) #闂撮殧2绉掕姹備竴娆� + + for j in list_all: + print(j) #鎵撳嵃鍒楄〃涓瘡涓�琛� + print("鎬昏鏁颁负:",count_all) + already_spider_datanum += count_all #宸茬埇鍙栨暟鎹殑鎬诲拰 + return list_all + + +def spilt_url_teshu(con,page,date_begin=month_begin,date_end=now_date): #鍏堝鐗规畩鐨剈rl鍋氬鐞�,鍐嶈繃婊� + global already_spider_shopnum + global all_data + urls=url_more() #杩斿洖鏂囦欢涓墍鏈夊簵閾虹殑url,甯︽渶澶ф樉绀烘潯鏁�100 銆倁rls鏄垪琛� + #print(urls) + teshu_url=[] + special_url=['shop=%25E4%25BA%25BA%25E7%2594%259F%25E4%25B8%2580%25E4%25B8%25B2','shop=%25E7%25BC%2598%25E5%25AE%25B6','shop=%25E4%25B8%25B0%25E8%258C%2582%25E7%2583%25A4%25E4%25B8%25B2','shop=%25E6%25B3%25B0%25E7%2585%258C%25E9%25B8%25A1','shop=%25E5%25B0%258F%25E9%2593%2581%25E5%2590%259B'] + + for url in urls: #閬嶅巻鎵�鏈夊簵閾虹殑url + begin=url.find('&')+1 + end=url.rfind('&') + #print(begin,end) + #鎵惧埌鐗规畩鐨剈rl,杩涜鐗规畩澶勭悊 + if url[begin:end] in special_url: + print('鍙戠幇鐗规畩鐨勶紒') + already_spider_shopnum += 1 # 鐖幓鐨勫簵閾烘暟閲忓姞1 + teshu_url.append(url) + #鍐嶅垹闄や竴涓垪琛ㄥ厓绱� + url_teshu=url_add_time(url,date_begin,date_end) # 缁欐墍鏈塽rl鍔犱笂鏃ユ湡 + list_to_MySql=get_MorePages_teshu(url_teshu,page) # 搴楅摵鐨剈rl,鐖彇鐨勯〉鏁� + + if len(list_to_MySql) == 0 : + print('璇ュ搴楅摵鏃堕棿娈垫棤鏁版嵁锛屽凡璺宠繃') + continue + has_remove_duplicates = remove_Duplicates_list(list_to_MySql) #鍘婚櫎鏌愪釜搴楅摵鎸囧畾椤垫暟鎵�鏈夐噸澶嶇殑鏁版嵁 + for item in has_remove_duplicates: + all_data.append(item) + + list_to_MySql.clear() + if url[begin:end]=='shop=%25E9%25A3%259F%25E5%2585%25B6%25E5%25AE%25B6': #椋熷叾瀹� + print('鍙戠幇鐗规畩鐨勶紒') + already_spider_shopnum += 1 #鐖幓鐨勫簵閾烘暟閲忓姞1 + teshu_url.append(url) + #鍐嶅垹闄や竴涓垪琛ㄥ厓绱� + url_teshu=url_add_time(url,date_begin,date_end) #缁欐墍鏈塽rl鍔犱笂鏃ユ湡 + list_to_MySql=get_MorePages_teshu_shiqijia(url_teshu,page) #搴楅摵鐨剈rl,鐖彇鐨勯〉鏁� + + if len(list_to_MySql) == 0 : + print('璇ュ搴楅摵鏃堕棿娈垫棤鏁版嵁锛屽凡璺宠繃') + continue + has_remove_duplicates = remove_Duplicates_list(list_to_MySql) #鍘婚櫎鏌愪釜搴楅摵鎸囧畾椤垫暟鎵�鏈夐噸澶嶇殑鏁版嵁 + for item in has_remove_duplicates: + all_data.append(item) + + list_to_MySql.clear() + for t in teshu_url: #浠巙rls琛ㄤ腑鍒犻櫎鐗规畩鐨� + urls.remove(t) + print(len(urls)) + return urls + +#------------------------------------------------------------------------------------------------------------- + + +def spider_all(con,page,date_begin=month_begin,date_end=now_date): #鐖彇鏂囦欢涓墍鏈夊簵閾�(鍖呮嫭鐗规畩鐨剈rl搴楅摵) 鏁版嵁搴撹繛鎺ュ璞� ,瑕佺埇鍙栫殑椤垫暟,寮�濮嬫椂闂�,缁撴潫鏃堕棿 + global already_spider_shopnum + global all_data + url_all=[] + + urls=url_more() #杩斿洖鏂囦欢涓墍鏈夊簵閾虹殑url,甯︽渶澶ф樉绀烘潯鏁�100 銆倁rls鏄垪琛� + for url in urls: #缁欐墍鏈塽rl鍔犱笂鏃ユ湡 + url_all.append(url_add_time(url,date_begin,date_end)) + + for i in url_all: #鎵撳嵃鏈�缁堢殑url + print(i) + + for j in url_all: #鏍规嵁鎵�鏈塽rl鍐欏叆鏁版嵁搴� + list_to_MySql=get_MorePages(j,page) #搴楅摵鐨剈rl,鐖彇鐨勯〉鏁� + already_spider_shopnum += 1 #鐖幓鐨勫簵閾烘暟閲忓姞1 + + if len(list_to_MySql) == 0 : + print('璇ュ搴楅摵鏃堕棿娈垫棤鏁版嵁锛屽凡璺宠繃') + continue + has_remove_duplicates = remove_Duplicates_list(list_to_MySql) #鍘婚櫎鏌愪釜搴楅摵鎸囧畾椤垫暟鎵�鏈夐噸澶嶇殑鏁版嵁 + for item in has_remove_duplicates: + all_data.append(item) + + list_to_MySql.clear() + + + + +def write_Sql(list,con): #灏嗙綉绔欐暟鎹啓鍏ユ暟鎹簱 + data = pd.DataFrame(list,columns=['provider','shop_name','equipment_number','equipment_name','smoke_push_density','smoke_pop_density','wind_turbine','purifier','level','alarm_required','alarm_triggered','attribution_time','reporting_time','data_time']) + print("\n\n") + print(data) + + # test3 瑕佸啓鍏ョ殑鏁版嵁琛紝杩欐牱鍐欑殑璇濊鎻愬墠鍦ㄦ暟鎹簱寤哄ソ琛� + data.to_sql(name="ed_data", con=con, if_exists="append",index=False,index_label=False) + print("鍐欏叆瀹屾垚!") + +# ck="" #淇濆瓨cookie +session = requests.session() +webshops=[] +shopnum=0 #鏂囦欢涓簵閾烘�绘暟 +already_spider_shopnum=0 #宸茬埇鍙栫殑搴楅摵鏁伴噺 +already_spider_datanum=0 #宸茬埇鍙栫殑鏁版嵁鏉℃暟 +sleeptime=8 + +Key_period_noon_begin = datetime.strptime('10:00',"%H:%M") #涓崍閲嶇偣鏃舵 +Key_period_noon_end = datetime.strptime('14:00',"%H:%M") + + +Key_period_night_begin = datetime.strptime('17:00',"%H:%M") #鏅氫笂閲嶇偣鏃舵 +Key_period_night_end = datetime.strptime('21:00',"%H:%M") + +engine = create_engine("mysql+pymysql://fumeRemote:feiyu2023@114.215.109.124:3306/fume?charset=utf8") + + + +# 涓撻棬璇诲彇璁惧淇℃伅琛� +con_read = engine.connect() + +all_data = [] +def pass_login(ses,beginTime,endTime,shops): + global con_read + # global ck + global session + global webshops + + global all_data + global shopnum + global already_spider_shopnum + global already_spider_datanum + # 鍒濆鍖� 闃叉褰卞搷涓嬫 + all_data.clear() + shopnum=0 + already_spider_shopnum=0 + already_spider_datanum=0 + + # 鍒濆鍖杝ession + session = ses + webshops = shops[:] + print(f'鑾峰彇鏁版嵁鐨勬椂闂村尯闂翠负锛歿beginTime}-{endTime}') + print('浼犲叆鐨勫簵閾哄悕绉颁负锛�',shops) + + engine = create_engine("mysql+pymysql://fumeRemote:feiyu2023@114.215.109.124:3306/fume?charset=utf8") + con = engine.connect() + + + #鐖彇鎵�鏈夊簵閾� 骞惰绠楄�楁椂 + start_time=time.time() + + spider_all(con,100,beginTime,endTime) #鐖彇鏂囦欢涓墍鏈夌殑搴楅摵鍚� + # 缁欐墍鏈夋暟鎹渶鍚庝竴涓瓧娈垫爣璁颁负鈥�0鈥欙紝琛ㄧず涓嶉噸澶� + for item in all_data: + item.append(0) + + end_time=time.time() + # 鍏抽棴鏁版嵁搴撹繛鎺� + + con_read.close() + con.close() + + print("鑾峰彇鏁版嵁瀹屾垚!") + print("璁剧疆鑾峰彇鐨勬椂闂撮棿闅斾负",sleeptime,"绉�") + print("鍏辨湁",shopnum,"瀹�","宸茶幏鍙�",already_spider_shopnum,"瀹�") + print("鍏辫幏鍙�",already_spider_datanum,"鏉¤褰�") + print("鍏辫�楁椂:{:.2f}绉�".format(end_time-start_time)) + + result=[] + result.append('鑾峰彇鏁版嵁瀹屾垚!') + result.append("鍏辨湁"+str(shopnum)+"瀹讹紝宸茶幏鍙�"+str(already_spider_shopnum)+"瀹�") + result.append("鍏辫幏鍙�"+str(already_spider_datanum)+"鏉¤褰�") + result.append("鍏辫�楁椂:{:.2f}绉�".format(end_time-start_time)) + + return result,all_data + + + +# pass_login() diff --git a/src/__pycache__/Crawling.cpython-38.pyc b/src/__pycache__/Crawling.cpython-38.pyc index 1c6a4b9..9554731 100644 --- a/src/__pycache__/Crawling.cpython-38.pyc +++ b/src/__pycache__/Crawling.cpython-38.pyc Binary files differ diff --git a/src/__pycache__/Crawling_1.cpython-38.pyc b/src/__pycache__/Crawling_1.cpython-38.pyc new file mode 100644 index 0000000..3a1d924 --- /dev/null +++ b/src/__pycache__/Crawling_1.cpython-38.pyc Binary files differ diff --git a/src/__pycache__/write_to_MySql.cpython-38.pyc b/src/__pycache__/write_to_MySql.cpython-38.pyc index aa429e6..8728de9 100644 --- a/src/__pycache__/write_to_MySql.cpython-38.pyc +++ b/src/__pycache__/write_to_MySql.cpython-38.pyc Binary files differ diff --git a/src/__pycache__/write_to_minute_table.cpython-38.pyc b/src/__pycache__/write_to_minute_table.cpython-38.pyc index 5a658b7..03e0342 100644 --- a/src/__pycache__/write_to_minute_table.cpython-38.pyc +++ b/src/__pycache__/write_to_minute_table.cpython-38.pyc Binary files differ diff --git a/src/write_to_MySql.py b/src/write_to_MySql.py index 3af97c9..20161e9 100644 --- a/src/write_to_MySql.py +++ b/src/write_to_MySql.py @@ -7,7 +7,6 @@ from datetime import datetime, timedelta import sys -# sys.path.append('D:\\z\workplace\\VsCode\\pyvenv\\venv') sys.path.append('../../') import src.core_modules.remove_duplicates_methods as rdm @@ -107,7 +106,7 @@ temp=[] temp.append(item[2]) #璁惧缂栧彿 temp.append('璁惧鏁呴殰') #璁惧缂栧彿 - temp.append('1') #璁惧鏁呴殰 + temp.append('1') #渚涚數寮傚父 temp.append('寰愭眹鍖�') temp.append(item[11]) #鏁呴殰寮�濮嬫椂闂� startTimeSub= datetime.strptime(startTime,"%Y-%m-%d %H:%M") - timedelta(minutes = 10) #缁撴灉涓篸atetime.datetime绫诲瀷 锛岄渶瑕佸啀杞负瀛楃涓茬被鍨� @@ -178,6 +177,7 @@ for i in power_supply_abnormal : print(i) + #灏嗕緵鐢靛紓甯哥殑淇℃伅鍐欏叆鏁版嵁搴撳紓甯歌〃涓� abnormal_write_to_SQL(power_supply_abnormal,con) #灏嗚澶囨晠闅滀俊鎭啓鍏ュ紓甯歌〃 print('渚涚數寮傚父鐨勪俊鎭啓鍏ュ紓甯歌〃瀹屾垚!') @@ -314,9 +314,12 @@ def refind_ea(list): #涓�鏉¤褰曪紝涔熷氨鏄竴涓垪琛� temp=[] temp.append(generate_short_uuid()) + # 璁惧缂栧彿 temp.append(list[2]) + # 搴楅摵鍚嶅瓧 temp.append(list[1]) - temp.append(list[0]) + # 渚涘簲鍟� + temp.append(list[3]) temp.append(1) print(temp) return temp @@ -388,7 +391,7 @@ temp.append(list[7]) #鍑�鍖栧櫒鐢垫祦 7 temp.append(list[4]) #杩涙补鐑熸祿搴﹀�� temp.append(list[5]) #鎺掓补鐑熸祿搴﹀�� - temp.append(list[14]) #閲嶅鐨勬鏁� + temp.append(list[13]) #閲嶅鐨勬鏁� print(temp) return temp diff --git a/src/write_to_minute_table.py b/src/write_to_minute_table.py index 3afe27b..a5d9b69 100644 --- a/src/write_to_minute_table.py +++ b/src/write_to_minute_table.py @@ -16,7 +16,7 @@ temp.append(list[7]) #鍑�鍖栧櫒鐢垫祦 7 temp.append(list[4]) #杩涙补鐑熸祿搴﹀�� temp.append(list[5]) #鎺掓补鐑熸祿搴﹀�� - temp.append(list[14]) #閲嶅鐨勬鏁� + temp.append(list[13]) #閲嶅鐨勬鏁� print(temp) return temp diff --git a/test_get_data/__init__.py b/test_get_data/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test_get_data/__init__.py diff --git a/test_get_data/__pycache__/__init__.cpython-38.pyc b/test_get_data/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..454113f --- /dev/null +++ b/test_get_data/__pycache__/__init__.cpython-38.pyc Binary files differ diff --git a/test_get_data/__pycache__/__init__.cpython-39.pyc b/test_get_data/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..a7a60c6 --- /dev/null +++ b/test_get_data/__pycache__/__init__.cpython-39.pyc Binary files differ diff --git a/test_get_data/__pycache__/login.cpython-38.pyc b/test_get_data/__pycache__/login.cpython-38.pyc new file mode 100644 index 0000000..9182c7a --- /dev/null +++ b/test_get_data/__pycache__/login.cpython-38.pyc Binary files differ diff --git a/test_get_data/__pycache__/login.cpython-39.pyc b/test_get_data/__pycache__/login.cpython-39.pyc new file mode 100644 index 0000000..2f7041c --- /dev/null +++ b/test_get_data/__pycache__/login.cpython-39.pyc Binary files differ diff --git a/test_get_data/__pycache__/request.cpython-38.pyc b/test_get_data/__pycache__/request.cpython-38.pyc new file mode 100644 index 0000000..7da1355 --- /dev/null +++ b/test_get_data/__pycache__/request.cpython-38.pyc Binary files differ diff --git a/test_get_data/__pycache__/request.cpython-39.pyc b/test_get_data/__pycache__/request.cpython-39.pyc new file mode 100644 index 0000000..36f8a2c --- /dev/null +++ b/test_get_data/__pycache__/request.cpython-39.pyc Binary files differ diff --git a/test_get_data/__pycache__/url_help.cpython-38.pyc b/test_get_data/__pycache__/url_help.cpython-38.pyc new file mode 100644 index 0000000..ed2aa42 --- /dev/null +++ b/test_get_data/__pycache__/url_help.cpython-38.pyc Binary files differ diff --git a/test_get_data/__pycache__/url_help.cpython-39.pyc b/test_get_data/__pycache__/url_help.cpython-39.pyc new file mode 100644 index 0000000..8fb7aa6 --- /dev/null +++ b/test_get_data/__pycache__/url_help.cpython-39.pyc Binary files differ diff --git a/test_get_data/database_connect.py b/test_get_data/database_connect.py new file mode 100644 index 0000000..d1eef38 --- /dev/null +++ b/test_get_data/database_connect.py @@ -0,0 +1,86 @@ +import sys +import os +sys.path.append(os.path.dirname(__file__)) +# sys.path.append('E:\\ruanjian\\Python\\Lib\\site-packages') + + + +from sqlalchemy import create_engine +class DataBase: + """ 杩滅▼鏁版嵁搴� """ + # con_read = None + # con_write = None + # ip = '114.215.109.124' + # user = 'fumeRemote' + # password = 'feiyu2023' + # port = 3306 + # data_base_name = 'fume' + + + """ 鏈満 """ + con_read = None + con_write = None + ip = 'localhost' + user = 'root' + password = '1234' + port = 3306 + data_base_name = 'qianduan_sql' + + + + """杩炴帴鏁版嵁搴� + """ + def connect_remote_database_read(self): + + + if self.con_read == None or self.con_read.closed: + engine = create_engine(f"mysql+pymysql://{self.user}:{self.password}@{self.ip}:{self.port}/{self.data_base_name}?charset=utf8",pool_recycle=3600, pool_size=3, max_overflow=0) + self.con_read = engine.connect() + return self.con_read + + def connect_remote_database_write(self): + """ 鍐�""" + + + if self.con_write == None or self.con_write.closed: + engine = create_engine(f"mysql+pymysql://{self.user}:{self.password}@{self.ip}:{self.port}/{self.data_base_name}?charset=utf8",pool_recycle=3600, pool_size=3, max_overflow=0) + self.con_write = engine.connect() + return self.con_write + + + # """ 杩炴帴鏈湴鏁版嵁搴� + # """ + # def connect_local_database_read(self): + # """ 璇绘暟鎹� + # """ + # if self.con_read == None or self.con_read.closed: + # engine = create_engine(f"mysql+pymysql://{self.user}:{self.password}@{self.ip}:{self.port}/{self.data_base_name}?charset=utf8") + # self.con_read = engine.connect() + # return self.con_read + + # def connect_local_database_write(self): + # """ 鍐欐暟鎹� + # """ + # if self.con_write == None or self.con_write.closed: + # engine = create_engine(f"mysql+pymysql://{self.user}:{self.password}@{self.ip}:{self.port}/{self.data_base_name}?charset=utf8") + # self.con_write = engine.connect() + # return self.con_write + + + def disconnect(self,area_type:str,option_type:str): + """"鏂紑杩炴帴 + + Args: + area_type (str): 鏁版嵁搴撴墍灞炰綅缃�俵ocal涓巖emote + option_type (str): 鎿嶄綔绫诲瀷銆倃rite鍜宺ead + """ + + self.con_read.close() + + +# 鍏朵粬鏂囦欢瀵煎叆姝ゅ璞″嵆鍙� +datebase_single_obj = DataBase() + +if __name__ == '__main__': + # print(datebase_single_obj.connect_remote_database_read()) + pass \ No newline at end of file diff --git a/test_get_data/get_data.py b/test_get_data/get_data.py new file mode 100644 index 0000000..ebffb82 --- /dev/null +++ b/test_get_data/get_data.py @@ -0,0 +1,92 @@ +from test_get_data.request import request_get +from test_get_data.url_help import Url +from test_get_data.login import * + +from bs4 import BeautifulSoup as bs +import re #姝e垯琛ㄨ揪寮� +import time + +import sys +sys.path.append('../../') +import src.core_modules.remove_duplicates_methods as rdm + +now_date = time.strftime("%Y-%m-%d", time.localtime()) #鑾峰彇褰撳墠骞存湀鏃� #url缂栫爜骞存湀鏃ュ紑濮嬮粯璁ゆ椂闂� +now_date1 = time.strftime("%Y-%m", time.localtime()) +month_begin=now_date1+'-01' #璁剧疆褰撳墠鏈堜唤鐨勫紑濮� + +list_temp=[] #涓存椂鍒楄〃 鍏ㄥ眬鍙橀噺 + + + +#-------------------------------------------------------------------------------------------------------------鐗规畩鐨剈rl +def get_OnePage_teshu(url,count=1): #鎶撳彇涓�椤电殑鏁版嵁,鏀惧叆list_data涓�.urls涓鸿璁块棶鐨勭綉椤靛湴鍧� + global list_temp #浣跨敤鍏ㄥ眬鍙橀噺 + + list_temp.clear() #娓呯┖涓存椂琛� + + r = request_get(url).text + soup = bs(r,'html.parser') + + list=[] #鍒涘缓鍒楄〃鏉ヤ繚瀛樼粨鏋� + + # 鎵惧埌鎵�鏈夌殑tr鏍囩 + rows = soup.find_all('tr') + + # 鎻愬彇琛ㄦ牸涓殑鏁版嵁 + result = [] + for row in rows: + data = [] + cols = row.find_all('td') + for col in cols: + if col.find('div'): + # 濡傛灉td涓寘鍚玠iv锛屽垯鍗曠嫭鎻愬彇鍏跺唴瀹� + div_content = col.find('div').text.strip() + # data.append(col.find('td').text.strip()) + # 杩斿洖鍏冪礌鐨勬枃鏈唴瀹� 鎼滅储tag鐨勭洿鎺ュ瓙鑺傜偣 + td_content = ''.join(col.find_all(text=True, recursive=False)).strip() + data.append(td_content) + data.append(div_content) + else: + # 濡傛灉td涓笉鍖呭惈div锛屽垯鐩存帴鎻愬彇td鐨勫唴瀹� + td_content = col.text.strip() + data.append(td_content) + del (data[-2:]) + del (data[2]) + result.append(data) + # 鍒犻櫎琛ㄥご + del (result[0]) + # 鎵撳嵃鎻愬彇鐨勬暟鎹� + print(result) + + + + # for tag in tags: # 姣忎釜tag鏄竴琛� + # element = tag.text # 鑾峰彇<tr>鏍囩鍐呮墍鏈夋枃鏈俊鎭� + # element = element.strip() # 灏嗗瓧绗︿覆棣栧熬绌烘牸鍘婚櫎 + # list1 = element.split(); # 浠ョ┖鏍间负鍒嗛殧灏嗗瓧绗︿覆鍙樹负鍒楄〃 + + # # del (list1[-2:]) #鍒楄〃鏈�鍚庝袱涓厓绱犱笉闇�瑕�,鍒犻櫎 + # # print('鍒犻櫎鐗规畩鐨勫悗涓や釜') + # # print(list1) + # list.append(list1) + # print(list) + + # list_data=[] + # for i in list: + # list_data.append(merge(i)) #灏嗗熬鏃ユ湡鏁版嵁鍚堝苟鎴愬勾鏈堟棩 鏃跺垎绉� + # del list_data[0] #鍒犻櫎鏂囧瓧琛ㄥご + # count=count-1 #鍒犻櫎浜嗚〃澶�,鎬绘暟鎹殑琛屾暟鍑忎竴 + # #print(lt_isates_list(list_data)[:] #灏嗘墍鏈夋暟鎹鍒剁粰涓存椂鍒楄〃list_temp 鏄幓闄ら噸澶嶅悗鐨勫垪琛� + # list_temp=list_data[:] + # return count + +if __name__ == '__main__': + # 鐧诲綍 + login_fume_web() + u = Url() + urls = u.concatenate_url_with_condition('鏉ㄨ榻愰綈鍝堝皵鐑よ倝','2023-10-01','2023-10-31',1) + for item in urls: + get_OnePage_teshu(item) + + + diff --git a/test_get_data/login.py b/test_get_data/login.py new file mode 100644 index 0000000..0380423 --- /dev/null +++ b/test_get_data/login.py @@ -0,0 +1,64 @@ +from test_get_data.request import request_post,request_get +import time +import json +import base64 + +import src.user.account_photo as account + +def get_time(): + # 姣濡欑骇鏃堕棿鎴� 13浣嶆暟瀛� + now_time = str(int(time.time()*1000)) + return now_time + +def get_photo_url(url): + return url + get_time() + + + +def base64_api(img): + # 杩斿洖璐﹀彿瀵嗙爜 + uname,pwd = account.back_account_password() + with open(img, 'rb') as f: + base64_data = base64.b64encode(f.read()) + b64 = base64_data.decode() + data = {"username": uname, "password": pwd, "typeid": 2, "image": b64} + result = json.loads(request_post("http://api.ttshitu.com/predict", data)) + if result['success']: + return result["data"]["result"] + else: + #锛侊紒锛侊紒锛侊紒锛佹敞鎰忥細杩斿洖 浜哄伐涓嶈冻绛� 閿欒鎯呭喌 璇峰姞閫昏緫澶勭悊闃叉鑴氭湰鍗℃ 缁х画閲嶆柊 璇嗗埆 + return result["message"] + return "" + + + +def login_fume_web(): + # 璇锋眰楠岃瘉鐮佸湴鍧� + # 鏋勯�犳椂闂存埑 + # 鎷兼帴url + # 璇嗗埆楠岃瘉鐮� + url_photo = get_photo_url('http://xhhb.senzly.cn/servlet/Vcode_new.serv?t=') + response = request_get(url_photo) # 鍥剧墖涓轰簩杩涘埗鏁版嵁 + image_data = response.content + with open('Vcode.jpg',mode='wb') as f: + f.write(image_data) + # 楠岃瘉鐮佺粨鏋� + v_code_result = base64_api('Vcode.jpg') + + + play_load = { + "account": "9SUBjEeNy7nFMzk123", + "password": "6SUBIyusanb170e13a221a4cb58c66876006488504", + "vcode": v_code_result + } + + url_jump = 'http://xhhb.senzly.cn/cusLogin.php' + request_post(url_jump,play_load) + print('鐧诲綍鎴愬姛') + + # return session + # 涓汉楠岃瘉 + + +if __name__ == '__main__': + login_fume_web() \ No newline at end of file diff --git a/test_get_data/request.py b/test_get_data/request.py new file mode 100644 index 0000000..c654340 --- /dev/null +++ b/test_get_data/request.py @@ -0,0 +1,37 @@ +import requests +import urllib3 + +class MyRequest: + def __init__(self): + urllib3.disable_warnings() + self.session = requests.session() + self.session.headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36", + } + + def get(self,url:str): + r = self.session.get(url, verify=False) + if r.status_code != 200: + return False + return r + + def post(self, url: str, params: dict): + """post璇锋眰 + + Args: + url (str): 鐩爣url + params (dict): 璇锋眰鍙傛暟 + + Returns: + _type_: 鍝嶅簲鍐呭 + """ + r = self.session.post(url, data=params, verify=False) + if r.status_code != 200: + return False + return r.text + + + +_my_request = MyRequest() +request_post = _my_request.post +request_get = _my_request.get diff --git a/test_get_data/url_help.py b/test_get_data/url_help.py new file mode 100644 index 0000000..6bc1c54 --- /dev/null +++ b/test_get_data/url_help.py @@ -0,0 +1,36 @@ +import urllib.parse + +class Url: + + def concatenate_url_with_condition(self,shop_name,date_begin,date_end,page_num)->list: + """褰㈡垚瀹屾暣鐨剈rl""" + # 鍚嶅瓧缂栫爜 + base_url = self.encoding_shop_name(shop_name) + # 鍔犱笂鏃ユ湡鏉′欢 + url_with_date = self.url_add_date(base_url,date_begin,date_end) + # 鍔犱笂椤垫暟鏉′欢 + sub_urls_wait_for_request = self.list_add_page(url_with_date,page_num) + return sub_urls_wait_for_request + + + + def url_add_date(self,url,date_begin,date_end): #url,骞�-鏈�-鏃� 2023-05-03 + url_date=url+'&key5='+date_begin+'&key6='+date_end + return url_date + + def list_add_page(self,url,page_num): # url涓殑i鏄〉 ,apge_num琛ㄧず鐖彇鐨勯〉鏁� 銆倁rl鍚庨潰鍔犱笂椤电殑鍙傛暟 + urls = [url+'&page'+'={}'.format(str(i)) for i in range(1,page_num+1)] + return urls + + def encoding_shop_name(self,shop_name): + encoded_shop_name = urllib.parse.quote(urllib.parse.quote(shop_name)) + + return 'http://xhhb.senzly.cn/sys/yyRealTimeValue_list.jsp?key1=&shop='+encoded_shop_name+'&pagesize=100' + + + +if __name__ == '__main__': + u = Url() + urls = u.concatenate_url_with_condition('椋熷叾瀹�','2023-10-01','2023-10-31',5) + for item in urls: + print(item) \ No newline at end of file -- Gitblit v1.9.3