1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
U
ϤIegã@sHddlZddlmZddlZddlTddlZddlm    Z    ddl
Z ddl Z ddl Z ddlmZmZddlZej d¡ddlmmZe  de  ¡¡Ze  de  ¡¡Zed    Zgad
d „Zd d „Zdd„Zdd„Z dd„Z!dd„Z"eefdd„Z#dd„Z$dd„Z%dd„Z&dd„Z'd d!„Z(d"d#„Z)d$d%„Z*d&d'„Z+d(d)„Z,d*d+„Z-d,d-„Z.d.d/„Z/d0d1„Z0d2d3„Z1d4d5„Z2d6d7„Z3d8d9„Z4d:d;„Z5d<d=„Z6d>d?„Z7d@dA„Z8dBdC„Z9dDdE„Z:dFdG„Z;dHdI„Z<eefdJdK„Z=eefdLdM„Z>dNdO„Z?e @¡a@gaAdaBdaCdaDdPaEe FdQdR¡aGe FdSdR¡aHe FdTdR¡aIe FdUdR¡aJe    dVƒZKeK L¡aMgaNdWdX„ZOdS)YéN)Ú BeautifulSoup)Ú*)Ú create_engine)ÚdatetimeÚ    timedeltaz../../z%Y-%m-%dz%Y-%mz-01cCs6g}|D](}||kr | |¡qtdƒtdaq|S)Nu 发现重复é)ÚappendÚprintÚalready_spider_datanum)ÚlistZ
list_storeÚitem©r ú5d:\z\workplace\VsCode\pyvenv\venv_web\src\Crawling.pyÚremove_Duplicates_list s 
rc
Csšt| d¡ƒ}t| d¡ƒ}|d|}t| d¡ƒ}t| d¡ƒ}|d|}t| d¡ƒ}t| d¡ƒ}|d|}    | |    ¡| |¡| |¡|S)Néÿÿÿÿú )ÚstrÚpopr)
r Zdate_1Zdate_2Údate1Zdate_3Zdate_4Údate2Zdate_5Zdate_6Zdate3r r rÚmerge,s   
 
 
rcs ‡fdd„td|dƒDƒ}|S)Ncs"g|]}ˆdd t|ƒ¡‘qS)z&pagez={})Úformatr)Ú.0Úi©Úurlr rÚ
<listcomp>@szlist_url.<locals>.<listcomp>r)Úrange)rÚpage_numÚurlsr rrÚlist_url?sr c Cs²t ¡tj|ddj}t|dƒ}g}| d¡}|D]B}|d}|j}| ¡}| ¡}|dd…=|     dd¡| 
|¡q4g}    |D]}
|     
t |
ƒ¡q€|    d    =|d}|    dd…a|S)
NF©Zverifyú html.parserÚtrréþÿÿÿéÚr) Ú    list_tempÚclearÚsessionÚgetÚtextÚbsÚfind_allÚstripÚsplitÚinsertrr© rÚcountÚrÚsoupr ÚtagsÚtagÚelementÚlist1Ú    list_datarr r rÚ get_OnePageDs(
 
 
   r:c    Cs¢t||ƒ}d}g}d}|D]^}d}|t||ƒ}ttƒdkrHtdƒqz| t¡td|dƒ|d}tdƒt t¡q|D] }t|ƒq~td|ƒt    |7a    |S©Nrru*后面页数为空,爬去下一个店铺u 爬取了第u页Ú
u 总行数为:)
r r:Úlenr'r    ÚextendÚtimeÚsleepÚ    sleeptimer
©    rrrZ    count_allZlist_allÚpagerr2Újr r rÚ get_MorePagesos(
 
 
 
rEc    Cs~tdd…}td|ƒt|ƒag}d}|D]*}| tj tj ||¡¡¡|d}q*g}|D]}d|d}| |¡q^|S)Nz    url_more:rrz>http://xhhb.senzly.cn/sys/yyRealTimeValue_list.jsp?key1=&shop=z &pagesize=100)Úwebshopsr    r=ÚshopnumrÚurllibÚparseÚquote)Z    shopnamesZshopname_encodingrÚnamerZshoprr r rÚurl_moreŒs 
 
  rLcCs |d|d|}t|ƒ|S)Nz&key5=z&key6=)r    )rÚ
date_beginÚdate_endZurl_dater r rÚ url_add_time¨srOcCs<t |d¡}t |d¡}||}|tddkp:|tddkS)Nú%Y-%m-%d %H:%Mé
©Úminutesiöÿÿÿ©rÚstrptimer©Zdatestr1Zdatestr2rrZ    time_diffr r rÚ!is_time_difference_equals_10_mins¯s  rWcCsfd}d}g}|dd…D]8}t||d|dƒdkr@| |¡|d}|d}qtdƒt|ƒ|S)NrréFu间断点为:)rWrr    )r rrDÚ break_pointr r r rÚfind_break_point¸s
 
rZcCs>g}d}|D]}| |||…¡|}q | ||d…¡|S)Nr)r)r Zb_pointÚresultÚ
last_indexÚindexr r rÚ point_writeÊsr^cCsBtj|ddddddgd}tdƒt|ƒ|jd    |d
d d d dS) NZdev_idÚ    exceptionZexception_typeZregionÚ
begin_timeÚend_time©Úcolumnsú
 
Z abnormal_datarF©rKÚconZ    if_existsr]Z index_label©ÚpdZ    DataFramer    Zto_sql©r rfÚdatar r rÚabnormal_write_to_SQLÕsrkcCsÊt|ƒ}t||ƒ}tdƒg}|D]v}g}| |dd¡| d¡| d¡| d¡| |t|ƒdd¡| |dd¡| |¡t|ƒq"tdƒ|D] }t|ƒq¦t||ƒtd    ƒdS)
Nr<ru 数据异常Ú0õ    å¾æ±‡åŒºrrXu!超标异常时间段数据为:u(超标油烟数据异常表写入完成!)rZr^r    rr=rk)r rfrYZ
split_listZabnormalr ÚtemprDr r rr_âs&
 
 
 
 
 
 
 
r_cCs.t |d¡}t |d¡}||}|tddkS)NrPérRrTrVr r rÚ!is_time_difference_equals_30_minsÿs  rpcCsg}|dd}td|ƒ|dd…D]®}t|d|ƒrÌg}| |d¡| d¡| d¡| d¡| |d¡t |d    ¡td
d }td t|ƒƒtd tt|ƒƒƒ| t|ƒdd…¡| |¡|d}q&tdƒ|D] }t|ƒqât||ƒtdƒdS)Nré u开始时间:rrXõ 设备故障Ú1rmrPrQrRu相减后结果:u相减后类型:éu设备故障的数据为:u/供电异常/掉线信息写入异常表完成!)    r    rprrrUrrÚtypeÚ not_Key_period_exceed_30_minutes)r rfZdevice_failureZ    startTimer rnZ startTimeSubrr r rÚis_minutes_exceed_30s, 
 
 
 
 
 
 
 
rwcCs„t |dd…d¡}t |dd…d¡}|tkr8|tkpf|tkrH|tkpf|tkrX|tkpf|tkof|tkdkrxtdƒdStdƒdS)Nrqú%H:%MFu-开始或结束时间时间在非重点时段Tu处于重点时段©rrUÚKey_period_noon_beginÚKey_period_noon_endÚKey_period_night_beginÚKey_period_night_endr    ©r`raZbegin1Zend1r r rÚis_time_not_between_key_period s
DrcCs€t |dd…d¡}t |dd…d¡}|tkrH|tkrH|tkrH|tksh|tkrt|tkrt|tkrt|tkrttdƒdStdƒdS)Nrqrxu'开始或结束时间处于重点时段Tu处于非重点时段Fryr~r r rÚis_time_between_key_period5s
@r€cCsg}|D]Ö}t|d|dƒrvg}| |d¡| d¡| d¡| d¡| |d¡| |d¡| |¡qt|d|dƒrg}| |d¡| d¡| d¡| d¡| |d¡| |d¡| |¡qtdƒ|D] }t|ƒqìt||ƒtd    ƒdS)
NéérrrrsrmÚ2u供电异常的数据为:u+供电异常的信息写入异常表完成!)rrr€r    rk)r rfZpower_supply_abnormalr rnrr r rrvKs2
 
 
 
 
 
 
 
rvcCsrg}| |d¡| |d¡| |d¡| |d¡| |d¡| |d¡| |d¡t|ƒ|S©NrXé rqéérr‚©rr    ©r rnr r rÚ    refind_exmsrŠc    CsLtj|dddddddgd}td    ƒt|ƒ|jd
|d d d d tdƒdS)NÚ MV_Stat_CodeÚMV_Create_TimeÚ MV_Data_TimeÚMV_Fan_ElectricityÚMV_Purifier_ElectricityÚMV_Fume_ConcentrationÚMV_Fume_Concentration2rbrdZexceeding_st_datarFreu超标表写入完成!rgrir r rÚex_write_to_SQL|s
r’cCstg}|D]*}t|dƒdkrtdƒ| t|ƒ¡q|D] }t|ƒq8t|ƒdkrht||ƒt||ƒntdƒdS)Nr‚ru该条数据超标ru该店铺无超标数据)Úfloatr    rrŠr=r_r’)r rfZ exceedingDatar rr r rÚ isExceedingŠs
 
 r”c>Csæddddddddd    d
d d d ddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>g>}g}tt ¡ƒ d?d@¡}tdAdBƒD]8}||dC|dCdC…}t|dBƒ}| ||dD¡q¢d@ |¡S)ENÚaÚbÚcÚdÚeÚfÚgÚhrrDÚkÚlÚmÚnÚoÚpÚqr3ÚsÚtÚuÚvÚwÚxÚyÚzrlrsrƒÚ3Ú4Ú5Ú6Ú7Ú8Ú9ÚAÚBÚCÚDÚEÚFÚGÚHÚIÚJÚKÚLÚMÚNÚOÚPÚQÚRÚSÚTÚUÚVÚWÚXÚYÚZú-r&rrtrXé>)rÚuuidÚuuid4ÚreplacerÚintrÚjoin)ZarrayOfr ZuirZa1r©r r rÚgenerate_short_uuid¡sŒÂ@
rÔcCsPg}| tƒ¡| |d¡| |d¡| |d¡| d¡t|ƒ|S)NrXrr)rrÔr    r‰r r rÚ    refind_eaìs 
rÕcCsJtj|dddddgd}tdƒtd|ƒ|jd    |d
d d d td ƒdS)NZDI_GUIDZDI_CodeZDI_NameZ DI_SupplierZ    DI_Onlinerbrdu"写入数据表 ï¼ŒDateFrame为:Zea_t_device_inforFreu设备信息表写入完成!rgrir r rÚea_write_to_SQL÷s
 
rÖcCsätjdtd}|j ¡}tdƒ|D] }t|ƒq$tdt|ƒƒt |¡}t|ƒdkrÌ|dd…}tdƒt|ƒ|D]2}|dd…d    d
„|Dƒkrvt     ||dd…¡}qvtd |ƒt|ƒdkràt
||ƒntd |ƒt
||ƒdS) Nz8SELECT DI_Code,DI_Name,DI_Supplier FROM ea_t_device_info)rfu******** è®¾å¤‡ä¿¡æ¯******u!设备信息表记录条数为:ru去除重复为:rrcss|]}|dd…VqdS)Nr )rr©r r rÚ    <genexpr>sz+dev_info_data_if_exisitd.<locals>.<genexpr>u7设备信息表中有数据时,去重后的list为:u<设备表无数据,处理后待写入的设备信息为:) rhÚread_sqlÚcon_readÚvaluesÚtolistr    r=ÚrdmZremove_duplicates_dev_infoZremove_given_data_dev_inforÖ)r rfÚdfÚresrr8rnr r r rÚdev_info_data_if_exisitds&
 
 
 
 
rßcCsDg}|D]}| t|ƒ¡qtdƒ|D] }t|ƒq(t||ƒdS)Nu设备数据转化后:)rrÕr    rß©r rfZstagingr rr r rÚea_t_dev&s
rácCsrg}| |d¡| |d¡| |d¡| |d¡| |d¡| |d¡| |d¡t|ƒ|Sr„rˆr‰r r rÚ    refind_fd6srâc    CsLtj|dddddddgd}td    ƒt|ƒ|jd
|d d d d tdƒdS)Nr‹rŒrrŽrrr‘rbu%写入分数数据表,DateFrame为:Zfd_t_minutevaluerFreu分钟数据表写入完成!rgrir r rÚfd_write_to_SQLEs
rãcCsDg}|D]}| t|ƒ¡qtdƒ|D] }t|ƒq(t||ƒdS)Nu分钟数据转化后:)rrâr    rãràr r rÚ fd_t_minbutePs
räc Cst ¡tj|ddj}t|dƒ}g}| d¡}|D]ª}|d}|j}| ¡}| ¡}|dd…=t    dƒt    |ƒ|d|d    }    t    |    ƒ|d    =|    |d<t    d
ƒt    |ƒ|d|d }
|d =|
|d<| 
|¡t    d ƒt    |ƒq4g} |D]} |  
t | ƒ¡qè| d =|d}| dd…a|S)NFr!r"r#rr$õ删除特殊的后两个rr‚u元素合并完成rXu最终行数据r© r'r(r)r*r+r,r-r.r/r    rr) rr2r3r4r r5r6r7r8Z    str_temp1Z    str_temp2r9rr r rÚget_OnePage_teshu_shiqijia\s@
 
 
 
 
 rçc    Cs¢t||ƒ}d}g}d}|D]^}d}|t||ƒ}ttƒdkrHtdƒqz| t¡td|dƒ|d}tdƒt t¡q|D] }t|ƒq~td|ƒt    |7a    |Sr;)
r rçr=r'r    r>r?r@rAr
rBr r rÚget_MorePages_teshu_shiqijia‘s(
 
 
 
rèc Cs¶t ¡tj|ddj}t|dƒ}g}| d¡}|D]F}|d}|j}| ¡}| ¡}|dd…=t    dƒt    |ƒ| 
|¡q4g}    |D]}
|     
t |
ƒ¡q„|    d=|d}|    dd…a|S)    NFr!r"r#rr$rårrær1r r rÚget_OnePage_teshu­s*
 
 
  réc    Cs¢t||ƒ}d}g}d}|D]^}d}|t||ƒ}ttƒdkrHtdƒqz| t¡td|dƒ|d}tdƒt t¡q|D] }t|ƒq~td|ƒt    |7a    |Sr;)
r rér=r'r    r>r?r@rAr
rBr r rÚget_MorePages_teshuÕs(
 
 
 
rêcCsZtƒ}g}dddddg}|D]}| d¡d}| d¡}    |||    …|kr´tdƒtd7a| |¡t|||ƒ}
t|
|ƒ} t| ƒd    krtd
ƒqt    | ƒ} | D]} t
 | ¡qœ|   ¡|||    …d krtdƒtd7a| |¡t|||ƒ}
t |
|ƒ} t| ƒd    kr td
ƒqt    | ƒ} | D]} t
 | ¡q|   ¡q|D]}|  |¡q8tt|ƒƒ|S) NzAshop=%25E4%25BA%25BA%25E7%2594%259F%25E4%25B8%2580%25E4%25B8%25B2z#shop=%25E7%25BC%2598%25E5%25AE%25B6zAshop=%25E4%25B8%25B0%25E8%258C%2582%25E7%2583%25A4%25E4%25B8%25B2z2shop=%25E6%25B3%25B0%25E7%2585%258C%25E9%25B8%25A1z2shop=%25E5%25B0%258F%25E9%2593%2581%25E5%2590%259Bú&ru发现特殊的!rõ*该家店铺时间段无数据,已跳过z2shop=%25E9%25A3%259F%25E5%2585%25B6%25E5%25AE%25B6)rLÚfindÚrfindr    Úalready_spider_shopnumrrOrêr=rÚall_datar(rèÚremove)rfrCrMrNrZ    teshu_urlZ special_urlrÚbeginÚendZ    url_teshuÚ list_to_MySqlÚhas_remove_duplicatesr r¥r r rÚspilt_url_teshuïsH
 
 
 
 
 
 
 röc Csšg}t||||ƒ}|D]}| t|||ƒ¡q|D] }t|ƒq2|D]P}t||ƒ}    td7at|    ƒdkrptdƒqDt|    ƒ}
|
D]} t | ¡q||         ¡qDdS)Nrrrì)
rörrOr    rErïr=rrðr() rfrCrMrNZurl_allrrrrDrôrõr r r rÚ
spider_all1s 
 
  r÷cCsZtj|ddddddddd    d
d d d dgd}tdƒt|ƒ|jd|ddddtdƒdS)NÚproviderZ    shop_nameZequipment_numberZequipment_nameZsmoke_push_densityZsmoke_pop_densityZ wind_turbineZpurifierÚlevelZalarm_requiredZalarm_triggeredZattribution_timeZreporting_timeZ    data_timerbrdZed_datarFreu 写入完成!rgrir r rÚ    write_SqlUs
*rúéz10:00rxz14:00z17:00z21:00úKmysql+pymysql://fumeRemote:feiyu2023@114.215.109.124:3306/fume?charset=utf8c
Cs:t ¡dadada|a|dd…atd|›d|›ƒtd|ƒtdƒ}|     ¡}t
 
¡}t |d||ƒtD]}|  d¡qlt
 
¡}t  ¡| ¡tdƒtdtd    ƒtd
td d td ƒtd tdƒtd ||¡ƒg}    |      d¡|      d
ttƒdttƒd ¡|      d ttƒd¡|      d ||¡¡|    tfS)Nru!获取数据的时间区间为:rÍu传入的店铺名称为:rüédu获取数据完成!u设置获取的时间间隔为u秒u共有uå®¶u    å·²èŽ·å–u    å…±èŽ·å–u    æ¡è®°å½•u共耗时:{:.2f}秒u家,已获取)rðr(rGrïr
r)rFr    rÚconnectr?r÷rrÙÚcloserArr)
ZsesÚ    beginTimeÚendTimeZshopsÚenginerfÚ
start_timer rar[r r rÚ
pass_loginws8  
 
"r)PZrequestsZbs4rr,ÚreZpymysqlÚpandasrhÚ
sqlalchemyrÚ urllib.parserHr?rÏrrÚsysÚpathrZ*src.core_modules.remove_duplicates_methodsZ core_modulesZremove_duplicates_methodsrÜÚstrftimeÚ    localtimeZnow_dateZ    now_date1Z month_beginr'rrr r:rErLrOrWrZr^rkr_rprwrr€rvrŠr’r”rÔrÕrÖrßrárârãrärçrèrérêrör÷rúr)rFrGrïr
rArUrzr{r|r}rrþrÙrðrr r r rÚ<module>s‚    +      "K 
%  5(B$