From 9fdbf60165db0400c2e8e6be2dc6e88138ac719a Mon Sep 17 00:00:00 2001
From: zmc <zmc_li@foxmail.com>
Date: 星期五, 22 十二月 2023 13:28:11 +0800
Subject: [PATCH] Merge branch 'master' of ssh://114.215.109.124:29418/fume-manage-python

---
 core_modules/remove_duplicates_methods.py                         |   66 +++
 core_modules/__pycache__/remove_duplicates_methods.cpython-39.pyc |    0 
 core_modules/__pycache__/remove_duplicates_methods.cpython-38.pyc |    0 
 main.py                                                           |  898 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 964 insertions(+), 0 deletions(-)

diff --git a/core_modules/__pycache__/remove_duplicates_methods.cpython-38.pyc b/core_modules/__pycache__/remove_duplicates_methods.cpython-38.pyc
new file mode 100644
index 0000000..b77dd82
--- /dev/null
+++ b/core_modules/__pycache__/remove_duplicates_methods.cpython-38.pyc
Binary files differ
diff --git a/core_modules/__pycache__/remove_duplicates_methods.cpython-39.pyc b/core_modules/__pycache__/remove_duplicates_methods.cpython-39.pyc
new file mode 100644
index 0000000..b3d6f81
--- /dev/null
+++ b/core_modules/__pycache__/remove_duplicates_methods.cpython-39.pyc
Binary files differ
diff --git a/core_modules/remove_duplicates_methods.py b/core_modules/remove_duplicates_methods.py
new file mode 100644
index 0000000..b153d4d
--- /dev/null
+++ b/core_modules/remove_duplicates_methods.py
@@ -0,0 +1,66 @@
+def remove_duplicates_dev_info (origin_data) :
+    """瀵瑰啓鍏ヨ澶囦俊鎭〃鐨勬暟鎹幓閲�
+
+    Args:
+        origin_data (list): 鐖彇鐨勫師濮嬫暟鎹�
+
+    Returns:
+        list: 宸插幓闄ら噸澶嶇殑鏁版嵁
+    """
+    has_removed_dup_dev_info=[]
+    for item in origin_data :
+        if item[1:4] not in ( [x[1:4] for x in has_removed_dup_dev_info] ) :
+            has_removed_dup_dev_info.append(item)
+    return has_removed_dup_dev_info
+
+
+
+
+
+
+
+
+
+
+def remove_given_data_dev_info (wait_for_remove_list, sub_data ) :
+    """wait_for_remove_list鐨勫厓绱燵1:4]涓寘鍚玸ub_data,鍒欏垹闄よ鍏冪礌
+
+    Args:
+        wait_for_remove_list (list): 鍘熷鍒楄〃,鍏冪礌渚濈劧涓簂ist绫诲瀷
+        sub_data (list): 鎸囧畾瀛愬垪琛�
+
+    Returns:
+        temp: 鍒犻櫎鍚庣殑鏁版嵁
+    """
+  
+    temp = []
+    for item in wait_for_remove_list :
+        if item[1:4] != sub_data :
+            temp.append(item)
+    return temp
+
+
+
+
+
+
+
+
+
+# list_test1=['32','琛℃櫤杩滅鎶�锛堟繁鍦筹級鏈夐檺鍏徃', '棣ㄨ繙缇庨灏忛晣锛堝搱灏肩編椋熷箍鍦猴級','hengzhiyuan_64480047078091','']
+# list_test2=['f','琛℃櫤杩滅鎶�锛堟繁鍦筹級鏈夐檺鍏徃', '棣ㄨ繙缇庨灏忛晣锛堝搱灏肩編椋熷箍鍦猴級','hengzhiyuan_64480047078091','']
+# list_test3=['gf','琛℃櫤杩滅鎶�锛堟繁鍦筹級鏈夐檺鍏徃', '棣ㄨ繙缇庨灏忛晣锛堝搱灏肩編椋熷箍鍦猴級','hengzhiyuan_64480047078091','']
+# list_test4=['ds','琛℃櫤杩滅鎶�锛堟繁鍦筹級鏈夐檺鍏徃', '棣ㄨ繙缇庨灏忛晣锛堝搱灏肩編椋熷箍鍦猴級','hengzhiyuan_64480047078091','']
+# list_test5=['a','琛℃櫤杩滅鎶�锛堟繁鍦筹級鏈夐檺鍏徃', '棣ㄨ繙缇庨灏忛晣锛堝搱灏肩編椋熷箍鍦猴級','hengzhiyuan_64480047078091','']
+# list_test6=['df','琛℃櫤杩滅鎶�锛堟繁鍦筹級鏈夐檺鍏徃', '棣ㄨ繙缇庨灏忛晣锛堝搱灏肩編椋熷箍鍦猴級','hengzhiyuan_64480047078091','']
+
+# list_all=[]
+# list_all.append(list_test1)
+# list_all.append(list_test2)
+# list_all.append(list_test3)
+# list_all.append(list_test4)
+# list_all.append(list_test5)
+# list_all.append(list_test6)
+
+
+# print(remove_duplicates_dev_info(list_all))
\ No newline at end of file
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..93fc643
--- /dev/null
+++ b/main.py
@@ -0,0 +1,898 @@
+#sum 澶氶〉 鍏ュ簱鎴愬姛 鐖彇鏂囦欢涓墍鏈夌殑搴楅摵  缃戦〉瀹屾暣琛ㄧ 鍘婚櫎閲嶅鏁版嵁 閬囧埌绌洪〉闈細璺冲埌涓嬩竴瀹跺簵閾� 銆傞亣鍒版煇瀹跺簵閾烘棤鏁版嵁锛岃烦杩囧幓涓嬩竴瀹�
+#鐖幓鏌愬搴楅摵鎸囧畾鐨勯〉鏁帮紙涓�椤佃褰曟暟榛樿澶у皬涓�100鏉★級锛屾瘮濡傜埇鍙�12椤碉紝鍒欑埇鍙�12椤靛悗灏嗙粨鏋滀竴娆℃�у啓鍏ユ暟鎹簱
+#鐖幓鏁翠釜椤甸潰琛ㄧ粨鏋勶紝鍐嶅垎鍒啓鍏�4寮犺〃涓紙鐖彇鐨勬暟鎹瓨鍏ヤ袱寮犺〃涓紝杩樻湁瓒呮爣琛� 寮傚父琛級
+#缃戦〉涓婂瓧娈靛叡14涓紝瀛樺叆鏁版嵁搴撴槸15涓紙搴忓彿+14锛�
+import requests
+from bs4 import BeautifulSoup as bs  
+from aip import AipOcr  #鐧惧害鏂囧瓧璇嗗埆
+import re              #姝e垯琛ㄨ揪寮�
+from pymysql import *  # 杩炴帴mysql鏁版嵁搴�
+import pandas as pd
+from sqlalchemy import create_engine
+import urllib.parse                   #url鍙岄噸缂栫爜
+import time
+import uuid
+from datetime import datetime, timedelta
+
+import sys
+sys.path.append('D:\\z\workplace\\VsCode\\show')
+import core_modules.remove_duplicates_methods as rdm
+
+
+now_date = time.strftime("%Y-%m-%d", time.localtime())    #鑾峰彇褰撳墠骞存湀鏃�  #url缂栫爜骞存湀鏃ュ紑濮嬮粯璁ゆ椂闂�
+now_date1 = time.strftime("%Y-%m", time.localtime())  
+month_begin=now_date1+'-01'                 #璁剧疆褰撳墠鏈堜唤鐨勫紑濮�
+
+list_temp=[]  #涓存椂鍒楄〃  鍏ㄥ眬鍙橀噺
+
+
+
+def remove_Duplicates_list(list):        #鍒楄〃鑷韩鍘婚噸
+    global already_spider_datanum
+    list_store=[]
+    for item in list:
+        if item not in list_store:
+            list_store.append(item)
+        else:
+            print("鍙戠幇閲嶅")
+            already_spider_datanum=already_spider_datanum-1
+    #print(list_store)
+    return list_store
+
+def merge(list):  #鍚堝苟list鍊掓暟鍏釜鍏冪礌 
+    date_1=str(list.pop(-1))  #鍒犻櫎灏惧厓绱犲悗杩樿兘缁х画浣跨敤鏀瑰厓绱狅紝
+    date_2=str(list.pop(-1))
+    date1=date_2+' '+date_1       #鍚堝苟涓哄勾鏈堟棩鏃跺垎绉�
+
+    date_3=str(list.pop(-1))  
+    date_4=str(list.pop(-1))
+    date2=date_4+' '+date_3
+
+    date_5=str(list.pop(-1))
+    date_6=str(list.pop(-1))
+    date3=date_6+' '+date_5
+    list.append(date3)    #灏嗗悎骞剁殑鏁版嵁鍐欎細list鍒楄〃缁撳熬.
+    list.append(date2)      
+    list.append(date1)
+    
+    
+    return list
+
+def list_url(url,page_num):  #url涓殑i鏄〉 ,apge_num琛ㄧず鐖彇鐨勯〉鏁�  銆倁rl鍚庨潰鍔犱笂椤电殑鍙傛暟
+    urls = [url+'&page'+'={}'.format(str(i)) for i in range(1,page_num+1)]
+    return urls    # 杩斿洖璇rl瀵瑰簲椤电殑鎵�鏈夐摼鎺ュ舰寮忥紝杩斿洖鍊间负鍒楄〃
+
+
+def get_OnePage(url,count):     #鎶撳彇涓�椤电殑鏁版嵁,鏀惧叆list_data涓�.urls涓鸿璁块棶鐨勭綉椤靛湴鍧�
+    global ck
+    global list_temp    #浣跨敤鍏ㄥ眬鍙橀噺
+
+    list_temp.clear()  #娓呯┖涓存椂琛�
+    headers = {
+        # 姝ゅ娉ㄦ剰cookie锛岃鑷繁鎶撳彇
+        "Cookie":ck,
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
+    }
+    r = requests.get(url=url, headers=headers, verify=False).text
+    soup = bs(r,'html.parser')
+
+    list=[]                     #鍒涘缓鍒楄〃鏉ヤ繚瀛樼粨鏋�
+    tags = soup.find_all("tr")  # 鍒楄〃鎵�鏈夎
+    for tag in tags:  # 姣忎釜tag鏄竴琛�
+        count=count+1
+        element = tag.text  # 鑾峰彇<tr>鏍囩鍐呮墍鏈夋枃鏈俊鎭�
+        element = element.strip()  # 灏嗗瓧绗︿覆棣栧熬绌烘牸鍘婚櫎
+        list1 = element.split();  # 浠ョ┖鏍间负鍒嗛殧灏嗗瓧绗︿覆鍙樹负鍒楄〃
+
+        del (list1[-2:])            #鍒楄〃鏈�鍚庝袱涓厓绱犱笉闇�瑕�,鍒犻櫎
+        list1.insert(3,'')
+        list.append(list1)     #list淇濆瓨鎵�鏈夎
+
+    #print(list) 
+    
+    
+    list_data=[]    #淇濆瓨鍚堝苟鏃ユ湡鐨勪竴椤垫暟鎹�
+    for i in list:                       
+        list_data.append(merge(i))    #宸插皢灏炬棩鏈熸暟鎹悎骞舵垚骞存湀鏃� 鏃跺垎绉�  姝ゆ椂褰㈡垚瀹屾暣鐨勬暟鎹�. 
+       
+    del list_data[0]          #鍒犻櫎鏂囧瓧琛ㄥご 
+    count=count-1             #鍒犻櫎浜嗚〃澶�,鎬绘暟鎹殑琛屾暟鍑忎竴
+    #list_removeD= remove_Duplicates_list(list_data)     #list_date淇濆瓨鐨勬槸涓�椤电殑鏁版嵁 
+    
+    #print(list_data)
+    list_temp=list_data[:]
+    #list_temp=remove_Duplicates_list(list_data)[:]    #灏嗕竴椤垫墍鏈夋暟鎹鍒剁粰涓存椂鍒楄〃list_temp   鏄幓闄ら噸澶嶅悗鐨勫垪琛�
+    return count
+
+
+
+def get_MorePages(url,page_num):   #鐖彇鎸囧畾搴楅摵鍚嶇殑澶氶〉鏁版嵁,apge_num琛ㄧず鐖彇鐨勯〉鏁�
+    global sleeptime
+    global already_spider_datanum
+    urls=list_url(url,page_num)   #寰楀埌闇�瑕侀亶鍘嗙殑椤电殑url
+    count_all=0          #淇濆瓨鏁版嵁鐨勬�昏鏁�
+    list_all=[]          #淇濆瓨鐖彇鐨勬墍鏈夌殑鏁版嵁
+    page=1
+    for i in urls:
+        count=0
+        count_all=count_all+get_OnePage(i,count)
+        if len(list_temp)==0:        #濡傛灉璇ラ〉涓虹┖锛屽垯琛ㄧず璇ラ〉鍚庨潰閮芥棤鏁版嵁  閫�鍑哄惊鐜�
+            print('鍚庨潰椤垫暟涓虹┖锛岀埇鍘讳笅涓�涓簵閾�')
+            break                    #閫�鍑哄惊鐜�
+        list_all.extend(list_temp)   #灏嗕竴椤垫暟鎹垪琛ㄨ拷鍔犲埌list_all涓�
+        print("鐖彇浜嗙",page,"椤�")
+        page=page+1
+        print("\n")
+        time.sleep(sleeptime)         #闂撮殧2绉掕姹備竴娆�
+
+    for j in list_all:
+        print(j)              #鎵撳嵃鍒楄〃涓瘡涓�琛�
+    print("鎬昏鏁颁负:",count_all)
+    already_spider_datanum += count_all #宸茬埇鍙栨暟鎹殑鎬诲拰
+
+    return list_all    
+    #return remove_Duplicates_list(list_all)   #鍐嶆瀵瑰垪琛ㄨ繃婊ら噸澶�
+
+
+def url_more(): #杩斿洖鏂囦欢涓摵鍚嶇紪鐮佸舰鎴恥rl,杩斿洖鍊兼槸url鍒楄〃     榛樿鏌ョ湅缃戦〉鐨勬渶澶ф樉绀烘潯鏁�100
+    global shopnum
+    shopnames = []  #淇濆瓨涓枃搴楅摵鍚嶇О
+    with open("D:\\z\\workplace\\shopname.txt",encoding='utf-8') as file:    #灏嗘枃浠朵腑搴楅摵鍚嶅瓧淇濆瓨鍒板垪琛ㄤ腑
+        for line in file:
+            line = line.strip() #or some other preprocessing
+            shopnames.append(line) #storing everything in memory!
+    #print(type(shopnames[0]))
+    #缂栫爜
+    shopnum=len(shopnames)   #鏂囦欢涓簵閾烘�绘暟
+    shopname_encoding=[]  #淇濆瓨缂栫爜鍚庣殑搴楅摵鍚嶇О
+    i=0
+    for name in shopnames:
+        shopname_encoding.append(urllib.parse.quote(urllib.parse.quote(shopnames[i]))) #搴楅摵鍚嶇О杩涜鍙岄噸url缂栫爜
+        i=i+1
+    #鎷兼帴缃戝潃褰㈡垚鍙敤鐨剈rl
+    urls=[]  #淇濆瓨鎷兼帴鍚庣殑url
+    for shop in shopname_encoding:
+        url='http://xhhb.senzly.cn/sys/yyRealTimeValue_list.jsp?key1=&shop='+shop+'&pagesize=100'
+        urls.append(url)
+    # for i in urls:   
+    #     print(i)
+    return urls   #杩斿洖鏂囦欢涓簵閾哄悕绉板搴旂殑url
+
+#鏍规嵁寮�濮嬪拰缁撴潫鏃ユ湡鏉ユ嫾鎺rl
+def url_add_time(url,date_begin=month_begin,date_end=now_date):     #url,骞�-鏈�-鏃� 2023-05-03
+    url_date=url+'&key5='+date_begin+'&key6='+date_end
+    print(url_date)
+    return url_date
+
+#------------------------------------------------------------------------------------------------------------瓒呮爣娌圭儫鏁版嵁鍐欏叆寮傚父琛ㄤ腑
+#涓ゆ椂闂存槸鍚︾浉宸�10鍒嗛挓  鏄垯杩斿洖TRUE 鍚﹀垯杩斿洖FALSE
+def is_time_difference_equals_10_mins(datestr1, datestr2):    
+    date1 = datetime.strptime(datestr1, "%Y-%m-%d %H:%M")
+    date2 = datetime.strptime(datestr2, "%Y-%m-%d %H:%M")
+    time_diff = date2 - date1
+
+    return time_diff == timedelta(minutes = 10) or time_diff == timedelta(minutes = -10) #timedelta() 琛ㄧず涓や釜 date 瀵硅薄鎴栬�� time 瀵硅薄,鎴栬�� datetime 瀵硅薄涔嬮棿鐨勬椂闂撮棿闅�
+
+
+#姣忛殧鍗佸垎閽熶竴娆′负姝e父銆� 鎵惧嚭瓒呰繃10鍒嗛挓鐨勯棿鏂偣
+def find_break_point(list): #list涓鸿秴鏍囨暟鎹殑鍒楄〃
+    i=0
+    j=1
+    break_point = []     #淇濆瓨闂存柇鐐�
+    for item in list[1:]:  
+        if(is_time_difference_equals_10_mins(list[i][2],item[2]) == False): 
+            break_point.append(j) 
+        i=i+1
+        j=j+1
+    print('闂存柇鐐逛负锛�')
+    print(break_point)
+
+    #鍐欏叆闂存柇鐐�
+    return break_point
+    
+
+    
+#鏍规嵁闂存柇鐐瑰皢鍒楄〃鍒嗗壊鎴愬嚑涓瓙鍒楄〃锛岀敱result杩斿洖
+def point_write(list,b_point):   #list涓哄垪琛ㄣ�俠_point鍒楄〃鍏冪礌涓洪棿鏂偣锛岄棿鏂偣鍊间粠灏忓埌澶�
+    result = []
+    last_index = 0
+    for index in b_point:
+        result.append(list[last_index:index])   #鐏垫椿
+        last_index=index
+    result.append(list[last_index:])
+    return result
+
+
+#灏嗚澶囨晠闅滀俊鎭啓鍏bnormal_data寮傚父琛ㄤ腑 
+def abnormal_write_to_SQL(list,con): 
+    data = pd.DataFrame(list,columns=['dev_id','exception','exception_type','region','begin_time','end_time'])
+    print("\n\n")
+    print(data)
+    # engine = create_engine("mysql+mysqlconnector://root:1234@localhost:3306/qianduan_sql?charset=utf8")
+    # con = engine.connect()
+
+    # test3 瑕佸啓鍏ョ殑鏁版嵁琛紝杩欐牱鍐欑殑璇濊鎻愬墠鍦ㄦ暟鎹簱寤哄ソ琛�
+    data.to_sql(name="abnormal_data", con=con, if_exists="append",index=False,index_label=False)
+    # con.close()
+    
+    
+
+def exception(list,con):   #list涓鸿秴鏍囨暟鎹殑鍒楄〃
+    break_point=find_break_point(list) #杩斿洖闂存柇鐐�
+    split_list=point_write(list,break_point) #鏍规嵁闂存柇鐐瑰皢鍘熷鍒楄〃鍒嗗壊鎴愬嚑涓瓙鍒楄〃  split_list涓轰笁灞傛暟缁�,褰㈠紡涓篬[[1,2],[4,'g']],[[8,'2'],['4','g']],[[1,2],[4,'g']]]
+    # print('瓒呮爣鏃堕棿娈靛垝鍒嗘垚鐨勫瓙鍒楄〃涓猴細锛�')
+    # for i in split_list:
+    #     print(i)
+    print('\n')
+    abnormal=[]     #閲嶇粍濂界殑寮傚父琛ㄦ暟鎹�
+
+    for item in split_list:    #浠庡垎鍓茬殑鏁扮粍涓彁鍙栭渶瑕佺殑鏃堕棿淇℃伅锛屽苟娣诲姞鏂扮殑淇℃伅鏁版嵁
+        temp=[]  
+        temp.append(item[0][0])  #璁惧缂栧彿
+        temp.append('鏁版嵁寮傚父')  #璁惧缂栧彿
+        temp.append('0')  #娌圭儫娴撳害瓒呮爣
+        temp.append('寰愭眹鍖�')
+        temp.append(item[len(item)-1][2])  #鍓嶄竴鏉¤褰曠殑褰掑睘鏃堕棿  寮�濮嬫椂闂�
+        temp.append(item[0][2])  #褰掑睘鏃堕棿  缁撴潫鏃堕棿
+        abnormal.append(temp)
+
+        print(abnormal)
+
+    print('瓒呮爣寮傚父鏃堕棿娈垫暟鎹负锛�')
+    for j in abnormal:
+        print(j)
+    abnormal_write_to_SQL(abnormal,con) #鍐欏叆寮傚父琛ㄤ腑
+    print("瓒呮爣娌圭儫鏁版嵁寮傚父琛ㄥ啓鍏ュ畬鎴�!")
+
+#------------------------------------------------------------------------------------------------------------璁惧鏁呴殰鏁版嵁鍐欏叆寮傚父琛ㄤ腑
+#涓ゆ椂闂存槸鍚︾浉宸�30鍒嗛挓  鏄垯杩斿洖TRUE 鍚﹀垯杩斿洖FALSE
+def is_time_difference_equals_30_mins(datestr1, datestr2):    
+    date1 = datetime.strptime(datestr1, "%Y-%m-%d %H:%M")
+    date2 = datetime.strptime(datestr2, "%Y-%m-%d %H:%M")
+    time_diff = date2 - date1
+    return time_diff > timedelta(minutes=30) 
+
+#鎵惧嚭璁惧鏁呴殰鐨勪俊鎭紝骞跺皢姝や俊鎭啓鍏ュ紓甯歌〃涓�
+def is_minutes_exceed_30(list,con) :   # list涓烘煇搴楅摵鎸囧畾椤垫暟鐨勫叏閮ㄧ殑璁板綍 list鍏冪礌涓殑鏃堕棿涓哄�掑簭鎺掑垪锛屽嵆浠庡ぇ鍒板皬
+    device_failure=[]     #瀛樺偍璁惧鏁呴殰鐨勬暟鎹�
+    startTime = list[0][11]
+    print('寮�濮嬫椂闂达細',startTime)
+    for item in list[1:] :
+        if is_time_difference_equals_30_mins(item[11],startTime) :  #蹇呴』澶т簬30鍒嗛挓 涓嶈兘绛変簬30鍒嗛挓
+            temp=[] 
+            temp.append(item[2])  #璁惧缂栧彿
+            temp.append('璁惧鏁呴殰')  #璁惧缂栧彿
+            temp.append('1') #璁惧鏁呴殰
+            temp.append('寰愭眹鍖�')
+            temp.append(item[11])    #鏁呴殰寮�濮嬫椂闂�
+            startTimeSub= datetime.strptime(startTime,"%Y-%m-%d %H:%M") - timedelta(minutes = 10) #缁撴灉涓篸atetime.datetime绫诲瀷 锛岄渶瑕佸啀杞负瀛楃涓茬被鍨�
+            print('鐩稿噺鍚庣粨鏋滐細',str(startTimeSub))
+            print('鐩稿噺鍚庣被鍨嬶細',type(str(startTimeSub)))
+            temp.append(str(startTimeSub)[:16])  #鏁呴殰缁撴潫鏃堕棿
+            device_failure.append(temp)
+        startTime = item[11]
+    print('璁惧鏁呴殰鐨勬暟鎹负锛�')
+    for i in device_failure :
+        print(i)
+    not_Key_period_exceed_30_minutes(device_failure,con)  #灏嗕緵鐢靛紓甯镐俊鎭啓鍏ュ紓甯歌〃
+    #abnormal_write_to_SQL(device_failure,con)   #灏嗚澶囨晠闅滀俊鎭啓鍏ュ紓甯歌〃
+    print('渚涚數寮傚父/鎺夌嚎淇℃伅鍐欏叆寮傚父琛ㄥ畬鎴�!')
+#-----------------------------------------------------------------------------------------------------------渚涚數寮傚父鏁版嵁鍐欏叆寮傚父琛ㄤ腑
+#寮�濮嬪拰缁撴潫鏃堕棿閮藉浜庨潪閲嶇偣鏃舵鏃�,杩斿洖true
+def is_time_not_between_key_period(begin_time,end_time) :  #褰㈠弬涓烘棩鏈熷瓧绗︿覆,褰㈠ '2023-06-21 14:30'
+    global Key_period_noon_begin,Key_period_noon_end,Key_period_night_begin,Key_period_night_end
+    # #涓崍閲嶇偣鏃舵
+    # Key_period_noon_begin = datetime.strptime('10:00',"%H:%M")
+    # Key_period_noon_end = datetime.strptime('14:00',"%H:%M")
+
+    # #鏅氫笂閲嶇偣鏃舵
+    # Key_period_night_begin = datetime.strptime('17:00',"%H:%M")
+    # Key_period_night_end = datetime.strptime('21:00',"%H:%M")
+
+    begin1 = datetime.strptime(begin_time[11:],"%H:%M")
+    end1 = datetime.strptime(end_time[11:],"%H:%M")
+
+    #褰撳紑濮嬪拰缁撴潫鏃堕棿閮藉浜庨潪閲嶇偣鏃舵鏃讹紝灏嗚鏉℃晠闅滀俊鎭悓鏃惰褰曚负锛� 鐤戜技渚涚數寮傚父
+    if ((( begin1 > Key_period_noon_begin and begin1 < Key_period_noon_end ) or ( begin1 > Key_period_night_begin and begin1 < Key_period_night_end )) or (( end1 > Key_period_noon_begin and end1 < Key_period_noon_end ) or ( end1 > Key_period_night_begin and end1 < Key_period_night_end ))) ==False :
+        print('寮�濮嬫垨缁撴潫鏃堕棿鏃堕棿鍦ㄩ潪閲嶇偣鏃舵')
+        return True
+    print('澶勪簬閲嶇偣鏃舵')
+    return False
+
+#寮�濮嬪拰缁撴潫鏃堕棿閮藉浜庨噸鐐规椂娈垫椂,杩斿洖true
+def is_time_between_key_period(begin_time,end_time) :  #褰㈠弬涓烘棩鏈熷瓧绗︿覆,褰㈠ '2023-06-21 14:30'
+    global Key_period_noon_begin,Key_period_noon_end,Key_period_night_begin,Key_period_night_end
+    # #涓崍閲嶇偣鏃舵
+    # Key_period_noon_begin = datetime.strptime('10:00',"%H:%M")
+    # Key_period_noon_end = datetime.strptime('14:00',"%H:%M")
+
+    # #鏅氫笂閲嶇偣鏃舵
+    # Key_period_night_begin = datetime.strptime('17:00',"%H:%M")
+    # Key_period_night_end = datetime.strptime('21:00',"%H:%M")
+
+    begin1 = datetime.strptime(begin_time[11:],"%H:%M")
+    end1 = datetime.strptime(end_time[11:],"%H:%M")
+
+    #褰撳紑濮嬪拰缁撴潫鏃堕棿閮藉浜庨噸鐐规椂娈垫椂锛屽皢璇ユ潯鏁呴殰淇℃伅鍚屾椂璁板綍涓猴細 鎺夌嚎
+    if ((begin1 > Key_period_noon_begin and begin1 < Key_period_noon_end) and ( end1 > Key_period_noon_begin and end1 < Key_period_noon_end )) or ( (begin1 > Key_period_night_begin and begin1 < Key_period_night_end) and ( end1 > Key_period_night_begin and end1 < Key_period_night_end )) :
+        print('寮�濮嬫垨缁撴潫鏃堕棿澶勪簬閲嶇偣鏃舵')
+        return True
+    print('澶勪簬闈為噸鐐规椂娈�')
+    return False
+
+
+
+def not_Key_period_exceed_30_minutes(list,con) :  #list涓鸿澶囨晠闅滅殑鏃堕棿娈垫暟鎹�
+    power_supply_abnormal = []  #淇濆瓨渚涚數寮傚父鎴栨帀绾跨殑淇℃伅
+    for item in list :
+        if is_time_not_between_key_period(item[4],item[5]) :   #else:
+            temp = []
+            temp.append(item[0])
+            temp.append('璁惧鏁呴殰')
+            temp.append('1')  #鐤戜技渚涚數寮傚父
+            temp.append('寰愭眹鍖�')
+            temp.append(item[4])
+            temp.append(item[5])
+            power_supply_abnormal.append(temp)
+        elif is_time_between_key_period(item[4],item[5]) :
+            temp = []
+            temp.append(item[0])
+            temp.append('璁惧鏁呴殰')
+            temp.append('2')  #鎺夌嚎
+            temp.append('寰愭眹鍖�')
+            temp.append(item[4])
+            temp.append(item[5])
+            power_supply_abnormal.append(temp)
+    print('渚涚數寮傚父鐨勬暟鎹负锛�')
+    for i in power_supply_abnormal :
+        print(i)
+    
+    #灏嗕緵鐢靛紓甯哥殑淇℃伅鍐欏叆鏁版嵁搴撳紓甯歌〃涓�
+    abnormal_write_to_SQL(power_supply_abnormal,con)   #灏嗚澶囨晠闅滀俊鎭啓鍏ュ紓甯歌〃
+    print('渚涚數寮傚父鐨勪俊鎭啓鍏ュ紓甯歌〃瀹屾垚!')
+
+
+
+#------------------------------------------------------------------------------------------------------------鍐欏叆瓒呮爣琛ㄤ腑
+
+#杩斿洖閲嶇粍鍚庣殑鍒楄〃
+def refind_ex(list):  #list涓虹綉椤电殑涓�鏉¤褰�
+    temp=[]
+    temp.append(list[2])  #璁惧缂栧彿
+    temp.append(list[12]) #涓婃姤鏃堕棿
+    temp.append(list[11])  #褰掑睘鏃堕棿
+    temp.append(list[6])   #椋庢満鐢垫祦 6
+    temp.append(list[7])   #鍑�鍖栧櫒鐢垫祦7
+    temp.append(list[4])   #杩涙补鐑熸祿搴﹀��
+    temp.append(list[5])   #鎺掓补鐑熸祿搴﹀��
+
+    print(temp)
+    return temp
+
+
+#灏嗗垪琛ㄥ啓鍏xceeding_st_data琛ㄤ腑 
+def ex_write_to_SQL(list,con): 
+    data = pd.DataFrame(list,columns=['MV_Stat_Code','MV_Create_Time','MV_Data_Time','MV_Fan_Electricity','MV_Purifier_Electricity','MV_Fume_Concentration','MV_Fume_Concentration2'])
+    print("\n\n")
+    print(data)
+    #engine = create_engine("mysql+mysqlconnector://root:1234@localhost:3306/qianduan_sql?charset=utf8")
+    #con = engine.connect()
+
+    # test3 瑕佸啓鍏ョ殑鏁版嵁琛紝杩欐牱鍐欑殑璇濊鎻愬墠鍦ㄦ暟鎹簱寤哄ソ琛�
+    data.to_sql(name="exceeding_st_data", con=con, if_exists="append",index=False,index_label=False)
+    #con.close()
+    print("瓒呮爣琛ㄥ啓鍏ュ畬鎴�!")
+    
+
+# list涓烘煇搴楅摵鎸囧畾椤垫暟鐨勫叏閮ㄧ殑璁板綍 灏嗚秴鏍囨暟鎹啓鍏ヨ秴鏍囪〃
+def isExceeding(list,con):  #list涓烘煇搴楅摵鎸囧畾椤垫暟鐨勫叏閮ㄧ殑璁板綍  list鍏冪礌涓哄垪琛ㄥ舰寮�
+    exceedingData=[]     #淇濆瓨瓒呮爣鐨勬暟鎹�
+    for item in list:       #鏌ユ壘瓒呮爣鐨勬暟鎹紝骞惰褰曚笅
+        if float(item[5]) > 1:   # 鎺掔儫娴撳害澶т簬1鍒欒秴鏍�
+            print("璇ユ潯鏁版嵁瓒呮爣")
+            #淇濆瓨璇ユ潯璁板綍锛屾彁鍙栭渶瑕佺殑鍊硷紝骞舵坊鍔犲叾浠栧瓧娈�
+            exceedingData.append(refind_ex(item))
+            
+            
+    for i in exceedingData:  #閬嶅巻鍒楄〃
+        print(i)   
+
+    if(len(exceedingData) != 0) :  #鏈夎秴鏍囨暟鎹椂鎵嶆墽琛�
+        #灏嗚秴鏍囨暟鎹椂闂村垎绫诲啀鍐檃bnormal_data寮傚父琛ㄤ腑
+        exception(exceedingData,con) 
+
+        #灏嗚秴鏍囨暟鎹洿鎺ュ啓鍏ユ暟鎹簱瓒呮爣琛ㄤ腑  
+        ex_write_to_SQL(exceedingData,con)
+    else:
+        print('璇ュ簵閾烘棤瓒呮爣鏁版嵁')
+
+
+#------------------------------------------------------------------------------------------------------------鏁版嵁鍐欏叆璁惧淇℃伅琛�
+def generate_short_uuid():
+    arrayOf=[
+            "a",
+            "b",
+            "c",
+            "d",
+            "e",
+            "f",
+            "g",
+            "h",
+            "i",
+            "j",
+            "k",
+            "l",
+            "m",
+            "n",
+            "o",
+            "p",
+            "q",
+            "r",
+            "s",
+            "t",
+            "u",
+            "v",
+            "w",
+            "x",
+            "y",
+            "z",
+            "0",
+            "1",
+            "2",
+            "3",
+            "4",
+            "5",
+            "6",
+            "7",
+            "8",
+            "9",
+            "A",
+            "B",
+            "C",
+            "D",
+            "E",
+            "F",
+            "G",
+            "H",
+            "I",
+            "J",
+            "K",
+            "L",
+            "M",
+            "N",
+            "O",
+            "P",
+            "Q",
+            "R",
+            "S",
+            "T",
+            "U",
+            "V",
+            "W",
+            "X",
+            "Y",
+            "Z"
+        ]
+    list=[]
+    ui=str(uuid.uuid4()).replace('-', '')
+    for i in range(0,16):
+        a1=ui[i*2:i*2+2]
+        x=int(a1,16)
+        list.append(arrayOf[x % 0x3E])
+    return ''.join(list)
+
+
+#杩斿洖閲嶇粍鍚庣殑鍒楄〃
+def refind_ea(list):  #涓�鏉¤褰曪紝涔熷氨鏄竴涓垪琛�
+    temp=[]
+    temp.append(generate_short_uuid())
+    temp.append(list[2])
+    temp.append(list[1])
+    temp.append(list[0])
+    temp.append(1)
+    print(temp)
+    return temp
+
+#灏嗗垪琛ㄥ啓鍏ヨ澶囦俊鎭澶囦俊鎭痚a_t_dev琛ㄤ腑 
+def ea_write_to_SQL(list,con):   
+    data = pd.DataFrame(list,columns=['DI_GUID','DI_Code','DI_Name','DI_Supplier','DI_Online'])
+    print("\n\n")
+    print('鍐欏叆鏁版嵁琛� 锛孌ateFrame涓猴細',data)
+
+    # test3 瑕佸啓鍏ョ殑鏁版嵁琛紝杩欐牱鍐欑殑璇濊鎻愬墠鍦ㄦ暟鎹簱寤哄ソ琛�
+    data.to_sql(name="ea_t_device_info", con=con, if_exists="append",index=False,index_label=False)
+    print("璁惧淇℃伅琛ㄥ啓鍏ュ畬鎴�!")
+
+
+def dev_info_data_if_exisitd(list,con):  #list涓虹埇鍙栨煇瀹跺簵閾烘寚瀹氶〉鏁拌浆鎹㈠悗鐨勬暟鎹�
+    global con_read
+    #鍒涘缓绗簩涓暟鎹簱杩炴帴
+    # engine = create_engine("mysql+mysqlconnector://root:1234@localhost:3306/qianduan_sql?charset=utf8")
+    # con_read = engine.connect()
+
+    df = pd.read_sql('SELECT DI_Code,DI_Name,DI_Supplier FROM ea_t_device_info',con=con_read)   #浠庤澶囦俊鎭〃涓鍙栬澶囩紪鍙凤紝搴楅摵鍚嶏紝渚涘簲鍟嗗瓧娈电殑鏁版嵁銆傝繑鍥炲�兼槸DateFrame绫诲瀷
+    # con_read.close()  #鍏抽棴閾炬帴
+
+    res = df.values.tolist()  #DateFrame鎸夌収琛岃浆鎴恖ist绫诲瀷锛宺es瀛樻斁鐨勬槸璁惧淇℃伅琛ㄤ腑鐨勬暟鎹�
+    print('******** 璁惧淇℃伅******')
+    for i in res:
+        print(i)
+    print('璁惧淇℃伅琛ㄨ褰曟潯鏁颁负锛�',len(res))
+
+    list1 = rdm.remove_duplicates_dev_info(list)  #璁惧缂栧彿锛屽簵閾哄悕锛屼緵搴斿晢鐩哥瓑鏃讹紝鍒欎负閲嶅锛屽幓闄ゃ�俵ist1涓哄幓閲嶅悗鐨�
+    if len(res) > 0 :  #璁惧琛ㄤ腑鏈夋暟鎹�
+        #姣旇緝
+        temp=list1[:]  #灏唋ist1鏁版嵁缁檛emp锛岄亶鍘唗emp,鑻ョ浉绛夛紝浠巐ist涓垹闄ゆ暟鎹紝閬垮厤涓�涓垪琛ㄥ悓鏃堕亶鍘嗕笖鍒犻櫎
+        print('鍘婚櫎閲嶅涓�:')
+        print(list1)
+        for item in temp:
+            if item[1:4] in ( x[:] for x in res ) :  #寰呭瓨鍏ユ暟鎹簱鐨勫�间笌璁惧琛ㄤ腑鏁版嵁鐩哥瓑鏃�,灏嗗緟瀛樺叆鐨勫�间粠list涓Щ闄�               
+                list1=rdm.remove_given_data_dev_info(list1,item[1:4])   #璇tem浠巐ist1涓Щ闄�
+                   
+        print('璁惧淇℃伅琛ㄤ腑鏈夋暟鎹椂锛屽幓閲嶅悗鐨刲ist涓猴細',list1)
+        if( len(list1) != 0 ) :  #鍒犻櫎鍚庝笉涓虹┖鏃讹紝鍐欏叆 
+            ea_write_to_SQL(list1,con)   #灏嗗垪琛ㄥ啓鍏a_t_dev琛ㄤ腑 
+    else :      #璁惧琛ㄤ腑鏃犳暟鎹�
+        # a=rdm.remove_duplicates_dev_info(list)  #璁惧缂栧彿锛屽簵閾哄悕锛屼緵搴斿晢鐩哥瓑鏃讹紝鍒欎负閲嶅锛屽幓闄�
+        print('璁惧琛ㄦ棤鏁版嵁锛屽鐞嗗悗寰呭啓鍏ョ殑璁惧淇℃伅涓猴細',list1) 
+        #灏嗗幓閲嶅悗鏁版嵁鍐欏叆璁惧淇℃伅琛�
+        ea_write_to_SQL(list1,con)   #灏嗗垪琛ㄥ啓鍏ヨ澶囪〃涓� 銆�             绗竴涓弬鏁帮細璁惧缂栧彿锛屽簵閾哄悕锛屼緵搴斿晢鐩哥瓑鏃讹紝鍒欎负閲嶅锛屽幓闄�
+
+
+
+#灏嗗師濮嬫暟鎹浆鍖栨垚鏂扮殑鍒楄〃锛屽啀鍐欏叆璁惧淇℃伅璁惧淇℃伅琛ㄤ腑  /瀛樺叆
+def ea_t_dev(list,con):  #鏌愬搴楅摵鐨勫埗瀹氶〉鐨勬暟鎹褰� 锛宭ist鍒楄〃鍏冪礌渚濈劧涓哄垪琛紝姣斿[[1,2,3,'a'],[52,3,'a'],[6,2,3,'a']] 锛宑on涓烘暟鎹簱鐨勫缓绔�   
+    staging=[]    #琛ㄧず杞崲鍚庣殑鍒楄〃
+    for item in list:
+        #鎻愬彇闇�瑕佺殑鍊硷紝骞舵坊鍔犲叾浠栧瓧娈�
+        staging.append(refind_ea(item))   #杞寲
+    print('璁惧鏁版嵁杞寲鍚庯細')
+    for i in staging:
+        print(i) 
+
+    #鏌ヨ璁惧琛ㄥ凡瀛樼殑鏁版嵁锛岃嫢宸插瓨鍦ㄨ澶囦俊鎭紝鍒欎笉鍐欏叆
+    dev_info_data_if_exisitd(staging,con)
+    
+
+#----------------------------------鍐欏叆鍒嗛挓鏁版嵁琛�
+
+#杩斿洖閲嶇粍鍚庣殑鍒楄〃
+def refind_fd(list):  #涓�鏉¤褰曪紝涔熷氨鏄竴涓垪琛�
+    temp=[]
+    temp.append(list[2])  #璁惧缂栧彿
+    temp.append(list[12]) #涓婃姤鏃堕棿
+    temp.append(list[11])  #褰掑睘鏃堕棿
+    temp.append(list[6])   #椋庢満鐢垫祦 6
+    temp.append(list[7])   #鍑�鍖栧櫒鐢垫祦 7
+    temp.append(list[4])   #杩涙补鐑熸祿搴﹀��
+    temp.append(list[5])   #鎺掓补鐑熸祿搴﹀��
+
+    print(temp)
+    return temp
+
+
+#灏嗗垪琛ㄥ啓鍏ュ垎閽熸暟鎹〃涓� 
+def fd_write_to_SQL(list,con): 
+    data = pd.DataFrame(list,columns=['MV_Stat_Code','MV_Create_Time','MV_Data_Time','MV_Fan_Electricity','MV_Purifier_Electricity','MV_Fume_Concentration','MV_Fume_Concentration2'])
+    print("鍐欏叆鍒嗘暟鏁版嵁琛�,DateFrame涓猴細")
+    print(data)
+
+    # test3 瑕佸啓鍏ョ殑鏁版嵁琛紝杩欐牱鍐欑殑璇濊鎻愬墠鍦ㄦ暟鎹簱寤哄ソ琛�
+    data.to_sql(name="fd_t_minutevalue", con=con, if_exists="append",index=False,index_label=False)
+
+    print("鍒嗛挓鏁版嵁琛ㄥ啓鍏ュ畬鎴�!")
+    
+#杞寲 鍐嶅啓鍏d_t_minbute琛ㄤ腑 
+def fd_t_minbute(list,con):  #涓�椤电殑鏁版嵁璁板綍 锛宑on涓烘暟鎹簱鐨勫缓绔�
+    staging=[]    #淇濆瓨杞崲鍚庣殑鍒楄〃
+    for item in list:
+        #鎻愬彇闇�瑕佺殑鍊硷紝骞舵坊鍔犲叾浠栧瓧娈�
+        staging.append(refind_fd(item))  
+    print('鍒嗛挓鏁版嵁杞寲鍚庯細')
+    for i in staging:
+        print(i) 
+    fd_write_to_SQL(staging,con)   #灏嗗垪琛ㄥ啓鍏a_t_dec琛ㄤ腑
+
+
+#--------------------------------------------------------------------------------------------------------------椋熷叾瀹�
+def get_OnePage_teshu_shiqijia(url,count):
+    global ck
+    global list_temp    #浣跨敤鍏ㄥ眬鍙橀噺
+    
+
+    list_temp.clear()  #娓呯┖涓存椂琛�
+    headers = {
+        # 姝ゅ娉ㄦ剰cookie锛岃鑷繁鎶撳彇
+        "Cookie":ck,
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
+    }
+    r = requests.get(url=url, headers=headers, verify=False).text
+    soup = bs(r,'html.parser')
+
+    list=[]                     #鍒涘缓鍒楄〃鏉ヤ繚瀛樼粨鏋�
+   
+    tags = soup.find_all("tr")  # 鍒楄〃鎵�鏈夎
+    for tag in tags:  # 姣忎釜tag鏄竴琛�
+        count=count+1
+        element = tag.text  # 鑾峰彇<tr>鏍囩鍐呮墍鏈夋枃鏈俊鎭�
+        element = element.strip()  # 灏嗗瓧绗︿覆棣栧熬绌烘牸鍘婚櫎
+        list1 = element.split();  # 浠ョ┖鏍间负鍒嗛殧灏嗗瓧绗︿覆鍙樹负鍒楄〃
+
+        del (list1[-2:])            #鍒楄〃鏈�鍚庝袱涓厓绱犱笉闇�瑕�,鍒犻櫎
+        print('鍒犻櫎鐗规畩鐨勫悗涓や釜')
+        print(list1)
+
+        str_temp1=list1[4]+list1[5]  #绗�5,6鍚堝苟涓や釜鍏冪礌涓轰竴涓�
+        print(str_temp1)
+        del list1[5]
+        list1[4]=str_temp1
+        print("鍏冪礌鍚堝苟瀹屾垚")
+        print(list1)
+
+        str_temp2=list1[1]+list1[2]    #绗簩涓夊厓绱犲悎骞跺畬鎴�
+        del list1[2]
+        list1[1]=str_temp2
+        
+        list.append(list1) 
+        print("鏈�缁堣鏁版嵁")
+        print(list1)
+        #list1.clear()       
+
+    #print(list) 
+    list_data=[]  
+    for i in list:                   #宸插皢灏炬棩鏈熸暟鎹悎骞舵垚骞存湀鏃� 鏃跺垎绉�
+        list_data.append(merge(i))
+    del list_data[0]          #鍒犻櫎鏂囧瓧琛ㄥご   
+    count=count-1             #鍒犻櫎浜嗚〃澶�,鎬绘暟鎹殑琛屾暟鍑忎竴
+    #print(list_data)
+    #list_temp=remove_Duplicates_list(list_data)[:]    #灏嗘墍鏈夋暟鎹鍒剁粰涓存椂鍒楄〃list_temp   鏄幓闄ら噸澶嶅悗鐨勫垪琛�
+    list_temp=list_data[:]
+    return count
+
+
+def get_MorePages_teshu_shiqijia(url,page_num):
+    global sleeptime
+    global already_spider_datanum
+    urls=list_url(url,page_num)   #寰楀埌闇�瑕侀亶鍘嗙殑椤电殑url
+    count_all=0          #淇濆瓨鏁版嵁鐨勬�昏鏁�
+    list_all=[]          #淇濆瓨鐖彇鐨勬墍鏈夌殑鏁版嵁
+    page=1
+    for i in urls:
+        count=0
+        count_all=count_all+get_OnePage_teshu_shiqijia(i,count)
+        if len(list_temp)==0:        #濡傛灉璇ラ〉涓虹┖锛屽垯琛ㄧず璇ラ〉鍚庨潰閮芥棤鏁版嵁  閫�鍑哄惊鐜�
+            print('鍚庨潰椤垫暟涓虹┖锛岀埇鍘讳笅涓�涓簵閾�')
+            break  
+        list_all.extend(list_temp)   #灏嗗垪琛ㄨ拷鍔犲埌list_all涓�
+        print("鐖彇浜嗙",page,"椤�")
+        page=page+1
+        print("\n")
+        time.sleep(sleeptime)         #闂撮殧2绉掕姹備竴娆�
+
+    for j in list_all:
+        print(j)              #鎵撳嵃鍒楄〃涓瘡涓�琛�
+    print("鎬昏鏁颁负:",count_all)
+    already_spider_datanum += count_all #宸茬埇鍙栨暟鎹殑鎬诲拰
+    return list_all
+
+
+
+#-------------------------------------------------------------------------------------------------------------鐗规畩鐨剈rl
+def get_OnePage_teshu(url,count):     #鎶撳彇涓�椤电殑鏁版嵁,鏀惧叆list_data涓�.urls涓鸿璁块棶鐨勭綉椤靛湴鍧�
+    global ck
+    global list_temp    #浣跨敤鍏ㄥ眬鍙橀噺
+
+    list_temp.clear()  #娓呯┖涓存椂琛�
+    headers = {
+        # 姝ゅ娉ㄦ剰cookie锛岃鑷繁鎶撳彇
+        "Cookie":ck,
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36",
+    }
+    r = requests.get(url=url, headers=headers, verify=False).text
+    soup = bs(r,'html.parser')
+
+    list=[]                     #鍒涘缓鍒楄〃鏉ヤ繚瀛樼粨鏋�
+   
+    tags = soup.find_all("tr")  # 鍒楄〃鎵�鏈夎
+    for tag in tags:  # 姣忎釜tag鏄竴琛�
+        count=count+1
+        element = tag.text  # 鑾峰彇<tr>鏍囩鍐呮墍鏈夋枃鏈俊鎭�
+        element = element.strip()  # 灏嗗瓧绗︿覆棣栧熬绌烘牸鍘婚櫎
+        list1 = element.split();  # 浠ョ┖鏍间负鍒嗛殧灏嗗瓧绗︿覆鍙樹负鍒楄〃
+
+        del (list1[-2:])            #鍒楄〃鏈�鍚庝袱涓厓绱犱笉闇�瑕�,鍒犻櫎
+        print('鍒犻櫎鐗规畩鐨勫悗涓や釜')
+        print(list1)
+        list.append(list1) 
+        #list1.clear()       
+
+    #print(list) 
+    list_data=[]  
+    for i in list:                   
+        list_data.append(merge(i))   #灏嗗熬鏃ユ湡鏁版嵁鍚堝苟鎴愬勾鏈堟棩 鏃跺垎绉�
+    del list_data[0]          #鍒犻櫎鏂囧瓧琛ㄥご   
+    count=count-1             #鍒犻櫎浜嗚〃澶�,鎬绘暟鎹殑琛屾暟鍑忎竴
+    #print(list_data)
+    #list_temp=remove_Duplicates_list(list_data)[:]    #灏嗘墍鏈夋暟鎹鍒剁粰涓存椂鍒楄〃list_temp   鏄幓闄ら噸澶嶅悗鐨勫垪琛�
+    list_temp=list_data[:]
+    return count
+
+
+def get_MorePages_teshu(url,page_num):   #鐖彇鎸囧畾搴楅摵鍚嶇殑澶氶〉鏁版嵁,pge_num琛ㄧず鐖彇鐨勯〉鏁�
+    global sleeptime
+    global already_spider_datanum
+    urls=list_url(url,page_num)   #寰楀埌闇�瑕侀亶鍘嗙殑椤电殑url  杩斿洖璇rl瀵瑰簲椤电殑鎵�鏈夐摼鎺ュ舰寮忥紝杩斿洖鍊间负鍒楄〃
+    count_all=0          #淇濆瓨鏁版嵁鐨勬�昏鏁�
+    list_all=[]          #淇濆瓨鐖彇鐨勬墍鏈夌殑鏁版嵁
+    page=1
+    for i in urls:
+        count=0
+        count_all=count_all+get_OnePage_teshu(i,count)
+        if len(list_temp)==0:        #濡傛灉璇ラ〉涓虹┖锛屽垯琛ㄧず璇ラ〉鍚庨潰閮芥棤鏁版嵁  閫�鍑哄惊鐜�
+            print('鍚庨潰椤垫暟涓虹┖锛岀埇鍘讳笅涓�涓簵閾�')
+            break  
+        list_all.extend(list_temp)   #灏嗗垪琛ㄨ拷鍔犲埌list_all涓�
+        print("鐖彇浜嗙",page,"椤�")
+        page=page+1
+        print("\n")
+        time.sleep(sleeptime)         #闂撮殧2绉掕姹備竴娆�
+
+    for j in list_all:
+        print(j)              #鎵撳嵃鍒楄〃涓瘡涓�琛�
+    print("鎬昏鏁颁负:",count_all)
+    already_spider_datanum += count_all #宸茬埇鍙栨暟鎹殑鎬诲拰
+    return list_all
+
+
+def spilt_url_teshu(con,page,date_begin=month_begin,date_end=now_date):        #鍏堝鐗规畩鐨剈rl鍋氬鐞�,鍐嶈繃婊�
+    global already_spider_shopnum
+    urls=url_more()   #杩斿洖鏂囦欢涓墍鏈夊簵閾虹殑url,甯︽渶澶ф樉绀烘潯鏁�100 銆倁rls鏄垪琛�
+    #print(urls)
+    teshu_url=[]
+    #'shop=%25E9%25A3%259F%25E5%2585%25B6%25E5%25AE%25B6'  椋熷叾瀹�
+    special_url=['shop=%25E4%25BA%25BA%25E7%2594%259F%25E4%25B8%2580%25E4%25B8%25B2','shop=%25E7%25BC%2598%25E5%25AE%25B6','shop=%25E4%25B8%25B0%25E8%258C%2582%25E7%2583%25A4%25E4%25B8%25B2','shop=%25E6%25B3%25B0%25E7%2585%258C%25E9%25B8%25A1','shop=%25E5%25B0%258F%25E9%2593%2581%25E5%2590%259B']
+
+    for url in urls:          #閬嶅巻鎵�鏈夊簵閾虹殑url
+        begin=url.find('&')+1
+        end=url.rfind('&')
+        #print(begin,end)
+        #鎵惧埌鐗规畩鐨剈rl,杩涜鐗规畩澶勭悊
+        if url[begin:end] in special_url:   
+            print('鍙戠幇鐗规畩鐨勶紒')
+            already_spider_shopnum += 1   #鐖幓鐨勫簵閾烘暟閲忓姞1
+            teshu_url.append(url)
+            #鍐嶅垹闄や竴涓垪琛ㄥ厓绱�
+            url_teshu=url_add_time(url,date_begin,date_end)  #缁欐墍鏈塽rl鍔犱笂鏃ユ湡
+            list_to_MySql=get_MorePages_teshu(url_teshu,page)    #搴楅摵鐨剈rl,鐖彇鐨勯〉鏁�
+            # a=remove_Duplicates_list(list_to_MySql)
+            # print('\n')
+            # for item in a:
+            #     print(item)
+            if len(list_to_MySql) == 0 :
+                print('璇ュ搴楅摵鏃堕棿娈垫棤鏁版嵁锛屽凡璺宠繃')
+                continue
+            has_remove_duplicates = remove_Duplicates_list(list_to_MySql)   #鍘婚櫎鏌愪釜搴楅摵鎸囧畾椤垫暟鎵�鏈夐噸澶嶇殑鏁版嵁
+            is_minutes_exceed_30(has_remove_duplicates,con)      # 灏嗘寚瀹氶〉鏁扮殑璁惧鏁呴殰鏁版嵁鍐欏叆鏁版嵁搴撳紓甯歌〃涓�
+            isExceeding(has_remove_duplicates,con)               # 灏嗘寚瀹氶〉鏁版暟鎹啓鍏ユ暟鎹簱瓒呮爣琛ㄤ腑
+            ea_t_dev(has_remove_duplicates,con)                  # 灏嗘寚瀹氶〉鏁版暟鎹啓鍏ユ暟鎹簱璁惧淇℃伅琛ㄤ腑
+            fd_t_minbute(has_remove_duplicates,con)              #灏嗘寚瀹氶〉鏁版暟鎹啓鍏ユ暟鎹簱鍒嗛挓鏁版嵁琛ㄤ腑 
+
+            list_to_MySql.clear()
+        if url[begin:end]=='shop=%25E9%25A3%259F%25E5%2585%25B6%25E5%25AE%25B6':   #椋熷叾瀹�
+            print('鍙戠幇鐗规畩鐨勶紒')
+            already_spider_shopnum += 1   #鐖幓鐨勫簵閾烘暟閲忓姞1
+            teshu_url.append(url)
+            #鍐嶅垹闄や竴涓垪琛ㄥ厓绱�
+            url_teshu=url_add_time(url,date_begin,date_end)  #缁欐墍鏈塽rl鍔犱笂鏃ユ湡
+            list_to_MySql=get_MorePages_teshu_shiqijia(url_teshu,page)    #搴楅摵鐨剈rl,鐖彇鐨勯〉鏁�
+            # b=remove_Duplicates_list(list_to_MySql)
+            # for item in b:
+            #     print(item)
+            if len(list_to_MySql) == 0 :
+                print('璇ュ搴楅摵鏃堕棿娈垫棤鏁版嵁锛屽凡璺宠繃')
+                continue
+            has_remove_duplicates = remove_Duplicates_list(list_to_MySql)   #鍘婚櫎鏌愪釜搴楅摵鎸囧畾椤垫暟鎵�鏈夐噸澶嶇殑鏁版嵁
+            is_minutes_exceed_30(has_remove_duplicates,con)      # 灏嗘寚瀹氶〉鏁扮殑璁惧鏁呴殰鏁版嵁鍐欏叆鏁版嵁搴撳紓甯歌〃涓�
+            isExceeding(has_remove_duplicates,con)               # 灏嗘寚瀹氶〉鏁版暟鎹啓鍏ユ暟鎹簱瓒呮爣琛ㄤ腑
+            ea_t_dev(has_remove_duplicates,con)                  # 灏嗘寚瀹氶〉鏁版暟鎹啓鍏ユ暟鎹簱璁惧淇℃伅琛ㄤ腑
+            fd_t_minbute(has_remove_duplicates,con)              #灏嗘寚瀹氶〉鏁版暟鎹啓鍏ユ暟鎹簱鍒嗛挓鏁版嵁琛ㄤ腑 
+      
+            list_to_MySql.clear()
+    for t in teshu_url:     #浠巙rls琛ㄤ腑鍒犻櫎鐗规畩鐨�
+        urls.remove(t)
+    print(len(urls))
+    return urls
+
+#-------------------------------------------------------------------------------------------------------------
+
+
+def spider_all(con,page,date_begin=month_begin,date_end=now_date):    #鐖彇鏂囦欢涓墍鏈夊簵閾�(鍖呮嫭鐗规畩鐨剈rl搴楅摵)    鏁版嵁搴撹繛鎺ュ璞� ,瑕佺埇鍙栫殑椤垫暟,寮�濮嬫椂闂�,缁撴潫鏃堕棿
+    global already_spider_shopnum 
+    url_all=[]
+    #urls=url_more()   #杩斿洖鏂囦欢涓墍鏈夊簵閾虹殑url,甯︽渶澶ф樉绀烘潯鏁�100   
+    #鍋氫笉绗﹀悎鐨勫厛澶勭悊
+    urls=spilt_url_teshu(con,page,date_begin,date_end)
+
+    for url in urls:  #缁欐墍鏈塽rl鍔犱笂鏃ユ湡
+        url_all.append(url_add_time(url,date_begin,date_end))
+  
+    for i in url_all:   #鎵撳嵃鏈�缁堢殑url
+        print(i)
+
+    for j in url_all:     #鏍规嵁鎵�鏈塽rl鍐欏叆鏁版嵁搴�     
+        list_to_MySql=get_MorePages(j,page)    #搴楅摵鐨剈rl,鐖彇鐨勯〉鏁�
+        already_spider_shopnum += 1   #鐖幓鐨勫簵閾烘暟閲忓姞1
+        # a=remove_Duplicates_list(list_to_MySql)
+        # print('\n\n')
+        # for item in a:
+        #     print(item)
+        if len(list_to_MySql) == 0 :
+            print('璇ュ搴楅摵鏃堕棿娈垫棤鏁版嵁锛屽凡璺宠繃')
+            continue
+        has_remove_duplicates = remove_Duplicates_list(list_to_MySql)   #鍘婚櫎鏌愪釜搴楅摵鎸囧畾椤垫暟鎵�鏈夐噸澶嶇殑鏁版嵁
+        is_minutes_exceed_30(has_remove_duplicates,con)      # 灏嗘寚瀹氶〉鏁扮殑璁惧鏁呴殰鏁版嵁鍐欏叆鏁版嵁搴撳紓甯歌〃涓�
+        isExceeding(has_remove_duplicates,con)               # 灏嗘寚瀹氶〉鏁版暟鎹啓鍏ユ暟鎹簱瓒呮爣琛ㄤ腑 鍐欏叆寮傚父琛ㄤ腑
+        ea_t_dev(has_remove_duplicates,con)                  # 灏嗘寚瀹氶〉鏁版暟鎹啓鍏ユ暟鎹簱璁惧淇℃伅琛ㄤ腑
+        fd_t_minbute(has_remove_duplicates,con)              #灏嗘寚瀹氶〉鏁版暟鎹啓鍏ユ暟鎹簱鍒嗛挓鏁版嵁琛ㄤ腑 
+    
+        list_to_MySql.clear()
+
+def back_cookie():   #浠庢枃浠朵腑璇诲彇cookie
+    global ck
+    with open("D:\\z\\workplace\\cookie.txt",'r') as fp:
+        ck=fp.read()
+
+
+def write_Sql(list,con):      #灏嗙綉绔欐暟鎹啓鍏ユ暟鎹簱
+    data = pd.DataFrame(list,columns=['provider','shop_name','equipment_number','equipment_name','smoke_push_density','smoke_pop_density','wind_turbine','purifier','level','alarm_required','alarm_triggered','attribution_time','reporting_time','data_time'])
+    print("\n\n")
+    print(data)
+    # engine = create_engine("mysql+mysqlconnector://root:1234@localhost:3306/qianduan_sql?charset=utf8")
+    # con = engine.connect()
+
+    # test3 瑕佸啓鍏ョ殑鏁版嵁琛紝杩欐牱鍐欑殑璇濊鎻愬墠鍦ㄦ暟鎹簱寤哄ソ琛�
+    data.to_sql(name="ed_data", con=con, if_exists="append",index=False,index_label=False)
+    # con.close()
+    print("鍐欏叆瀹屾垚!")
+
+ck=""     #淇濆瓨cookie
+shopnum=0   #鏂囦欢涓簵閾烘�绘暟
+already_spider_shopnum=0   #宸茬埇鍘荤殑搴楅摵鏁伴噺
+already_spider_datanum=0   #宸茬埇鍘荤殑鏁版嵁鏉℃暟
+sleeptime=4
+
+Key_period_noon_begin = datetime.strptime('10:00',"%H:%M")    #涓崍閲嶇偣鏃舵
+Key_period_noon_end = datetime.strptime('14:00',"%H:%M")
+
+
+Key_period_night_begin = datetime.strptime('17:00',"%H:%M")   #鏅氫笂閲嶇偣鏃舵
+Key_period_night_end = datetime.strptime('21:00',"%H:%M")
+
+def pass_login():
+    global con_read
+    #"mysql+mysqlconnector://root:1234@localhost:3306/qianduan_sql?charset=utf8"
+    #engine = create_engine("mysql+mysqlconnector://root:1234@localhost:3306/qianduan_sql?charset=utf8")
+    engine = create_engine("mysql+mysqlconnector://root:1234@localhost:3306/qianduan_sql?charset=utf8")
+    con = engine.connect()
+
+    back_cookie()   # 浠庢枃浠朵腑璇诲彇cookie
+
+    #鐖彇鎵�鏈夊簵閾�  骞惰绠楄�楁椂
+    start_time=time.time()
+
+    spider_all(con,55,'2023-06-01','2023-06-30')  #鐖彇鏂囦欢涓墍鏈夌殑搴楅摵鍚�
+
+    end_time=time.time()
+    # 鍏抽棴鏁版嵁搴撹繛鎺�
+    con_read.close()
+    con.close()
+    print("鍐欏叆瀹屾垚!")
+    print("璁剧疆鐖彇鐨勬椂闂撮棿闅斾负",sleeptime,"绉�")
+    print("鍏辨湁",shopnum,"瀹�","宸茬埇鍙�",already_spider_shopnum,"瀹�")
+    print("鍏辩埇鍙�",already_spider_datanum,"鏉¤褰�")
+    print("鍏辫�楁椂:{:.2f}绉�".format(end_time-start_time))
+
+engine = create_engine("mysql+mysqlconnector://root:1234@localhost:3306/qianduan_sql?charset=utf8")
+# 涓撻棬璇诲彇璁惧淇℃伅琛�
+con_read = engine.connect()
+pass_login()

--
Gitblit v1.9.3