1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
U
ÊÔVe»‘ã@sHddlZddlmZddlZddlTddlZddlm    Z    ddl
Z ddl Z ddl Z ddlmZmZddlZej d¡ddlmmZe  de  ¡¡Ze  de  ¡¡Zed    Zgad
d „Zd d „Zdd„Zdd„Z dd„Z!dd„Z"eefdd„Z#dd„Z$dd„Z%dd„Z&dd„Z'd d!„Z(d"d#„Z)d$d%„Z*d&d'„Z+d(d)„Z,d*d+„Z-d,d-„Z.d.d/„Z/d0d1„Z0d2d3„Z1d4d5„Z2d6d7„Z3d8d9„Z4d:d;„Z5d<d=„Z6d>d?„Z7d@dA„Z8dBdC„Z9dDdE„Z:dFdG„Z;dHdI„Z<eefdJdK„Z=eefdLdM„Z>dNdO„Z?e @¡a@gaAdaBdaCdaDdPaEe FdQdR¡aGe FdSdR¡aHe FdTdR¡aIe FdUdR¡aJe    dVƒZKeK L¡aMgaNdWdX„ZOdS)YéN)Ú BeautifulSoup)Ú*)Ú create_engine)ÚdatetimeÚ    timedeltaz../../z%Y-%m-%dz%Y-%mz-01cCs6g}|D](}||kr | |¡qtdƒtdaq|S)Nu 发现重复é)ÚappendÚprintÚalready_spider_datanum)ÚlistZ
list_storeÚitem©r ú7d:\z\workplace\VsCode\pyvenv\venv_web\src\Crawling_1.pyÚremove_Duplicates_lists 
rc
Csšt| d¡ƒ}t| d¡ƒ}|d|}t| d¡ƒ}t| d¡ƒ}|d|}t| d¡ƒ}t| d¡ƒ}|d|}    | |    ¡| |¡| |¡|S)Néÿÿÿÿú )ÚstrÚpopr)
r Zdate_1Zdate_2Údate1Zdate_3Zdate_4Údate2Zdate_5Zdate_6Zdate3r r rÚmerge*s   
 
 
rcs ‡fdd„td|dƒDƒ}|S)Ncs"g|]}ˆdd t|ƒ¡‘qS)z&pagez={})Úformatr)Ú.0Úi©Úurlr rÚ
<listcomp>>szlist_url.<locals>.<listcomp>r)Úrange)rÚpage_numÚurlsr rrÚlist_url=sr c Cst ¡tj|ddj}t|dƒ}| d¡}g}|D]š}|d}g}| d¡}|D]`}    |     d¡rž|     d¡j ¡}
d     |    jd    dd
¡ ¡} | 
| ¡| 
|
¡qR|    j ¡} | 
| ¡qR|d d…=|d =| 
|¡q4t d |ƒ|d=|d}|dd…at dƒt dƒt tƒt dƒ|S)NF©Zverifyú html.parserÚtrrÚtdÚdivÚT)ÚtextÚ    recursiveéþÿÿÿéu一页的结果为:rz
----------u'获取到的数据并且处理后为:) Ú    list_tempÚclearÚsessionÚgetr'ÚbsÚfind_allÚfindÚstripÚjoinrr    ) rÚcountÚrÚsoupÚrowsÚresultÚrowÚdataÚcolsÚcolZ div_contentZ
td_contentr r rÚ get_OnePageBs:
 
 
 
 
 
 
 
 r=c    Cs¢t||ƒ}d}g}d}|D]^}d}|t||ƒ}ttƒdkrHtdƒqz| t¡td|dƒ|d}tdƒt t¡q|D] }t|ƒq~td|ƒt    |7a    |S©Nrru*后面页数为空,爬去下一个店铺u 爬取了第u页Ú
u 总行数为:)
r r=Úlenr+r    ÚextendÚtimeÚsleepÚ    sleeptimer
©    rrrZ    count_allZlist_allÚpagerr4Újr r rÚ get_MorePagesys(
 
 
 
rHc    Cs~tdd…}td|ƒt|ƒag}d}|D]*}| tj tj ||¡¡¡|d}q*g}|D]}d|d}| |¡q^|S)Nz    url_more:rrz>http://xhhb.senzly.cn/sys/yyRealTimeValue_list.jsp?key1=&shop=z &pagesize=100)Úwebshopsr    r@ÚshopnumrÚurllibÚparseÚquote)Z    shopnamesZshopname_encodingrÚnamerZshoprr r rÚurl_more•s 
 
  rOcCs |d|d|}t|ƒ|S)Nz&key5=z&key6=)r    )rÚ
date_beginÚdate_endZurl_dater r rÚ url_add_time«srRcCs<t |d¡}t |d¡}||}|tddkp:|tddkS)Nú%Y-%m-%d %H:%Mé
©Úminutesiöÿÿÿ©rÚstrptimer©Zdatestr1Zdatestr2rrZ    time_diffr r rÚ!is_time_difference_equals_10_mins²s  rZcCsfd}d}g}|dd…D]8}t||d|dƒdkr@| |¡|d}|d}qtdƒt|ƒ|S)Nrrr*Fu间断点为:)rZrr    )r rrGÚ break_pointr r r rÚfind_break_point»s
 
r\cCs>g}d}|D]}| |||…¡|}q | ||d…¡|S)Nr)r)r Zb_pointr8Ú
last_indexÚindexr r rÚ point_writeÍsr_cCsBtj|ddddddgd}tdƒt|ƒ|jd    |d
d d d dS) NZdev_idÚ    exceptionZexception_typeZregionÚ
begin_timeÚend_time©Úcolumnsú
 
Z abnormal_datarF©rNÚconZ    if_existsr^Z index_label©ÚpdZ    DataFramer    Zto_sql©r rgr:r r rÚabnormal_write_to_SQLØsrkcCsÊt|ƒ}t||ƒ}tdƒg}|D]v}g}| |dd¡| d¡| d¡| d¡| |t|ƒdd¡| |dd¡| |¡t|ƒq"tdƒ|D] }t|ƒq¦t||ƒtd    ƒdS)
Nr?ru 数据异常Ú0õ    å¾æ±‡åŒºrr*u!超标异常时间段数据为:u(超标油烟数据异常表写入完成!)r\r_r    rr@rk)r rgr[Z
split_listZabnormalr ÚtemprGr r rr`âs&
 
 
 
 
 
 
 
r`cCs.t |d¡}t |d¡}||}|tddkS)NrSérUrWrYr r rÚ!is_time_difference_equals_30_minsÿs  rpcCsg}|dd}td|ƒ|dd…D]®}t|d|ƒrÌg}| |d¡| d¡| d¡| d¡| |d¡t |d    ¡td
d }td t|ƒƒtd tt|ƒƒƒ| t|ƒdd…¡| |¡|d}q&tdƒ|D] }t|ƒqât||ƒtdƒdS)Nré u开始时间:rr*õ 设备故障Ú1rmrSrTrUu相减后结果:u相减后类型:éu设备故障的数据为:u/供电异常/掉线信息写入异常表完成!)    r    rprrrXrrÚtypeÚ not_Key_period_exceed_30_minutes)r rgZdevice_failureZ    startTimer rnZ startTimeSubrr r rÚis_minutes_exceed_30s, 
 
 
 
 
 
 
 
rwcCs„t |dd…d¡}t |dd…d¡}|tkr8|tkpf|tkrH|tkpf|tkrX|tkpf|tkof|tkdkrxtdƒdStdƒdS)Nrqú%H:%MFu-开始或结束时间时间在非重点时段Tu处于重点时段©rrXÚKey_period_noon_beginÚKey_period_noon_endÚKey_period_night_beginÚKey_period_night_endr    ©rarbZbegin1Zend1r r rÚis_time_not_between_key_period s
DrcCs€t |dd…d¡}t |dd…d¡}|tkrH|tkrH|tkrH|tksh|tkrt|tkrt|tkrt|tkrttdƒdStdƒdS)Nrqrxu'开始或结束时间处于重点时段Tu处于非重点时段Fryr~r r rÚis_time_between_key_period5s
@r€cCsg}|D]Ö}t|d|dƒrvg}| |d¡| d¡| d¡| d¡| |d¡| |d¡| |¡qt|d|dƒrg}| |d¡| d¡| d¡| d¡| |d¡| |d¡| |¡qtdƒ|D] }t|ƒqìt||ƒtd    ƒdS)
NéérrrrsrmÚ2u供电异常的数据为:u+供电异常的信息写入异常表完成!)rrr€r    rk)r rgZpower_supply_abnormalr rnrr r rrvKs2
 
 
 
 
 
 
 
rvcCsrg}| |d¡| |d¡| |d¡| |d¡| |d¡| |d¡| |d¡t|ƒ|S©Nr*é rqéérr‚©rr    ©r rnr r rÚ    refind_exmsrŠc    CsLtj|dddddddgd}td    ƒt|ƒ|jd
|d d d d tdƒdS)NÚ MV_Stat_CodeÚMV_Create_TimeÚ MV_Data_TimeÚMV_Fan_ElectricityÚMV_Purifier_ElectricityÚMV_Fume_ConcentrationÚMV_Fume_Concentration2rcreZexceeding_st_datarFrfu超标表写入完成!rhrjr r rÚex_write_to_SQL|s
r’cCstg}|D]*}t|dƒdkrtdƒ| t|ƒ¡q|D] }t|ƒq8t|ƒdkrht||ƒt||ƒntdƒdS)Nr‚ru该条数据超标ru该店铺无超标数据)Úfloatr    rrŠr@r`r’)r rgZ exceedingDatar rr r rÚ isExceedingŠs
 
 r”c>Csæddddddddd    d
d d d ddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>g>}g}tt ¡ƒ d?d@¡}tdAdBƒD]8}||dC|dCdC…}t|dBƒ}| ||dD¡q¢d@ |¡S)ENÚaÚbÚcÚdÚeÚfÚgÚhrrGÚkÚlÚmÚnÚoÚpÚqr5ÚsÚtÚuÚvÚwÚxÚyÚzrlrsrƒÚ3Ú4Ú5Ú6Ú7Ú8Ú9ÚAÚBÚCÚDÚEÚFÚGÚHÚIÚJÚKÚLÚMÚNÚOÚPÚQÚRÚSÚTÚUÚVÚWÚXÚYÚZú-r&rrtr*é>)rÚuuidÚuuid4ÚreplacerÚintrr3)ZarrayOfr ZuirZa1r©r r rÚgenerate_short_uuid¡sŒÂ@
rÓcCsPg}| tƒ¡| |d¡| |d¡| |d¡| d¡t|ƒ|S)Nr*rr)rrÓr    r‰r r rÚ    refind_eaìs 
rÔcCsJtj|dddddgd}tdƒtd|ƒ|jd    |d
d d d td ƒdS)NZDI_GUIDZDI_CodeZDI_NameZ DI_SupplierZ    DI_Onlinercreu"写入数据表 ï¼ŒDateFrame为:Zea_t_device_inforFrfu设备信息表写入完成!rhrjr r rÚea_write_to_SQL÷s
 
rÕcCsätjdtd}|j ¡}tdƒ|D] }t|ƒq$tdt|ƒƒt |¡}t|ƒdkrÌ|dd…}tdƒt|ƒ|D]2}|dd…d    d
„|Dƒkrvt     ||dd…¡}qvtd |ƒt|ƒdkràt
||ƒntd |ƒt
||ƒdS) Nz8SELECT DI_Code,DI_Name,DI_Supplier FROM ea_t_device_info)rgu******** è®¾å¤‡ä¿¡æ¯******u!设备信息表记录条数为:ru去除重复为:rrcss|]}|dd…VqdS)Nr )rr©r r rÚ    <genexpr>sz+dev_info_data_if_exisitd.<locals>.<genexpr>u7设备信息表中有数据时,去重后的list为:u<设备表无数据,处理后待写入的设备信息为:) riÚread_sqlÚcon_readÚvaluesÚtolistr    r@ÚrdmZremove_duplicates_dev_infoZremove_given_data_dev_inforÕ)r rgÚdfÚresrÚlist1rnr r r rÚdev_info_data_if_exisitds&
 
 
 
 
rßcCsDg}|D]}| t|ƒ¡qtdƒ|D] }t|ƒq(t||ƒdS)Nu设备数据转化后:)rrÔr    rß©r rgZstagingr rr r rÚea_t_dev&s
rácCsrg}| |d¡| |d¡| |d¡| |d¡| |d¡| |d¡| |d¡t|ƒ|Sr„rˆr‰r r rÚ    refind_fd6srâc    CsLtj|dddddddgd}td    ƒt|ƒ|jd
|d d d d tdƒdS)Nr‹rŒrrŽrrr‘rcu%写入分数数据表,DateFrame为:Zfd_t_minutevaluerFrfu分钟数据表写入完成!rhrjr r rÚfd_write_to_SQLEs
rãcCsDg}|D]}| t|ƒ¡qtdƒ|D] }t|ƒq(t||ƒdS)Nu分钟数据转化后:)rrâr    rãràr r rÚ fd_t_minbutePs
räc Cst ¡tj|ddj}t|dƒ}g}| d¡}|D]ª}|d}|j}| ¡}| ¡}|dd…=t    dƒt    |ƒ|d|d    }    t    |    ƒ|d    =|    |d<t    d
ƒt    |ƒ|d|d }
|d =|
|d<| 
|¡t    d ƒt    |ƒq4g} |D]} |  
t | ƒ¡qè| d =|d}| dd…a|S)NFr!r"r#rr)õ删除特殊的后两个rr‚u元素合并完成r*u最终行数据r© r+r,r-r.r'r/r0r2Úsplitr    rr) rr4r5r6r ÚtagsÚtagÚelementrÞZ    str_temp1Z    str_temp2Ú    list_datarr r rÚget_OnePage_teshu_shiqijia\s@
 
 
 
 
 rìc    Cs¢t||ƒ}d}g}d}|D]^}d}|t||ƒ}ttƒdkrHtdƒqz| t¡td|dƒ|d}tdƒt t¡q|D] }t|ƒq~td|ƒt    |7a    |Sr>)
r rìr@r+r    rArBrCrDr
rEr r rÚget_MorePages_teshu_shiqijia‘s(
 
 
 
ríc Cs¶t ¡tj|ddj}t|dƒ}g}| d¡}|D]F}|d}|j}| ¡}| ¡}|dd…=t    dƒt    |ƒ| 
|¡q4g}    |D]}
|     
t |
ƒ¡q„|    d=|d}|    dd…a|S)    NFr!r"r#rr)rårræ) rr4r5r6r rèrérêrÞrërr r rÚget_OnePage_teshu­s*
 
 
  rîc    Cs¢t||ƒ}d}g}d}|D]^}d}|t||ƒ}ttƒdkrHtdƒqz| t¡td|dƒ|d}tdƒt t¡q|D] }t|ƒq~td|ƒt    |7a    |Sr>)
r rîr@r+r    rArBrCrDr
rEr r rÚget_MorePages_teshuÕs(
 
 
 
rïcCsZtƒ}g}dddddg}|D]}| d¡d}| d¡}    |||    …|kr´tdƒtd7a| |¡t|||ƒ}
t|
|ƒ} t| ƒd    krtd
ƒqt    | ƒ} | D]} t
 | ¡qœ|   ¡|||    …d krtdƒtd7a| |¡t|||ƒ}
t |
|ƒ} t| ƒd    kr td
ƒqt    | ƒ} | D]} t
 | ¡q|   ¡q|D]}|  |¡q8tt|ƒƒ|S) NzAshop=%25E4%25BA%25BA%25E7%2594%259F%25E4%25B8%2580%25E4%25B8%25B2z#shop=%25E7%25BC%2598%25E5%25AE%25B6zAshop=%25E4%25B8%25B0%25E8%258C%2582%25E7%2583%25A4%25E4%25B8%25B2z2shop=%25E6%25B3%25B0%25E7%2585%258C%25E9%25B8%25A1z2shop=%25E5%25B0%258F%25E9%2593%2581%25E5%2590%259Bú&ru发现特殊的!rõ*该家店铺时间段无数据,已跳过z2shop=%25E9%25A3%259F%25E5%2585%25B6%25E5%25AE%25B6)rOr1Úrfindr    Úalready_spider_shopnumrrRrïr@rÚall_datar,ríÚremove)rgrFrPrQrZ    teshu_urlZ special_urlrÚbeginÚendZ    url_teshuÚ list_to_MySqlÚhas_remove_duplicatesr r¥r r rÚspilt_url_teshuïsH
 
 
 
 
 
 
 rúc Cs’g}tƒ}|D]}| t|||ƒ¡q|D] }t|ƒq*|D]P}t||ƒ}    td7at|    ƒdkrhtdƒq<t|    ƒ}
|
D]} t | ¡qt|         ¡q<dS)Nrrrñ)
rOrrRr    rHrór@rrôr,) rgrFrPrQZurl_allrrrrGrørùr r r rÚ
spider_all$s 
 
  rûcCsZtj|ddddddddd    d
d d d dgd}tdƒt|ƒ|jd|ddddtdƒdS)NÚproviderZ    shop_nameZequipment_numberZequipment_nameZsmoke_push_densityZsmoke_pop_densityZ wind_turbineZpurifierÚlevelZalarm_requiredZalarm_triggeredZattribution_timeZreporting_timeZ    data_timercreZed_datarFrfu 写入完成!rhrjr r rÚ    write_Sql@s
*rþéz10:00rxz14:00z17:00z21:00úKmysql+pymysql://fumeRemote:feiyu2023@114.215.109.124:3306/fume?charset=utf8c
Cs:t ¡dadada|a|dd…atd|›d|›ƒtd|ƒtdƒ}|     ¡}t
 
¡}t |d||ƒtD]}|  d¡qlt
 
¡}t  ¡| ¡tdƒtdtd    ƒtd
td d td ƒtd tdƒtd ||¡ƒg}    |      d¡|      d
ttƒdttƒd ¡|      d ttƒd¡|      d ||¡¡|    tfS)Nru!获取数据的时间区间为:rÍu传入的店铺名称为:rédu获取数据完成!u设置获取的时间间隔为u秒u共有uå®¶u    å·²èŽ·å–u    å…±èŽ·å–u    æ¡è®°å½•u共耗时:{:.2f}秒u家,已获取)rôr,rJrór
r-rIr    rÚconnectrBrûrrØÚcloserDrr)
ZsesÚ    beginTimeÚendTimeZshopsÚenginergÚ
start_timer rbr8r r rÚ
pass_login`s8  
 
"r)PZrequestsZbs4rr/ÚreZpymysqlÚpandasriÚ
sqlalchemyrÚ urllib.parserKrBrÏrrÚsysÚpathrZ*src.core_modules.remove_duplicates_methodsZ core_modulesZremove_duplicates_methodsrÛÚstrftimeÚ    localtimeZnow_dateZ    now_date1Z month_beginr+rrr r=rHrOrRrZr\r_rkr`rprwrr€rvrŠr’r”rÓrÔrÕrßrárârãrärìrírîrïrúrûrþr-rIrJrór
rDrXrzr{r|r}rrrØrôrr r r rÚ<module>s‚    7     
"K 
%  5(5