from utils.log_utils import LogUtils
|
from utils.date_utils import DateUtils
|
from decorator.singleton import singleton
|
|
|
@singleton
|
class DataStatistics(object):
|
"""数据统计"""
|
|
def __init__(self) -> None:
|
pass
|
|
def data_grouping(self, data_list, group_key):
|
"""按照给定的key, 对列表进行分组"""
|
map = {}
|
for item in data_list:
|
type = item[group_key]
|
if not type in map:
|
map[type] = []
|
map[type].append(item)
|
return map
|
|
def count_by_type(self, data_list, type_key):
|
"""按照类型计数"""
|
|
map = self.data_grouping(data_list, type_key)
|
total = 0
|
info = ''
|
for k in map.keys():
|
size = len(map[k])
|
info = info + f', {k}: {size}'
|
total = total + size
|
info = f'获取数据总条数为:{total}' + info
|
LogUtils.info(info)
|
|
def data_deduplication(self, data_map: map, ref_map: map, key: str, reverse=False):
|
"""
|
按照给定的属性key, 对每一组进行去重
|
|
Args:
|
data_map
|
ref_map
|
key
|
reverse
|
"""
|
total_list = {}
|
count = 0
|
for k in data_map.keys():
|
d_list = data_map[k]
|
# 对列表中的数据按给定的属性key升序排列
|
sorted_list = sorted(d_list, key=lambda x: x[key])
|
# 根据参照属性ref_map,以及去重方式sort,进行去重
|
index = 0
|
|
if k in ref_map:
|
n = ref_map[k]
|
for item in sorted_list:
|
t = DateUtils.str_to_time(item[key])
|
if t > n:
|
break
|
index = index + 1
|
|
l = sorted_list[index:]
|
if len(l) > 0:
|
total_list[k] = l
|
count = count + len(l)
|
LogUtils.info(f'内存中去重完成! 可写入数据数位: {count}')
|
return total_list
|