from utils.log_utils import LogUtils from utils.date_utils import DateUtils from decorator.singleton import singleton @singleton class DataStatistics(object): """数据统计""" def __init__(self) -> None: pass def data_grouping(self, data_list, group_key): """按照给定的key, 对列表进行分组""" map = {} for item in data_list: type = item[group_key] if not type in map: map[type] = [] map[type].append(item) return map def count_by_type(self, data_list, type_key): """按照类型计数""" map = self.data_grouping(data_list, type_key) total = 0 info = '' for k in map.keys(): size = len(map[k]) info = info + f', {k}: {size}' total = total + size info = f'获取数据总条数为:{total}' + info LogUtils.info(info) def data_deduplication(self, data_map: map, ref_map: map, key: str, reverse=False): """ 按照给定的属性key, 对每一组进行去重 Args: data_map ref_map key reverse """ total_list = {} count = 0 for k in data_map.keys(): d_list = data_map[k] # 对列表中的数据按给定的属性key升序排列 sorted_list = sorted(d_list, key=lambda x: x[key]) # 根据参照属性ref_map,以及去重方式sort,进行去重 index = 0 if k in ref_map: n = ref_map[k] for item in sorted_list: t = DateUtils.str_to_time(item[key]) if t > n: break index = index + 1 l = sorted_list[index:] if len(l) > 0: total_list[k] = l count = count + len(l) LogUtils.info(f'内存中去重完成! 可写入数据数位: {count}') return total_list