本文介绍分析文件变化记录,得出统计信息,并画出文件大小随着时间变化的散点图。
1. 统计信息
文件变化记录格式如下:
2022-05-30 13:58:37,201761000037,reader_writer_pv.c,769
2022-05-30 13:58:37,201821121027,reader_writer_pv.c,769
2022-05-30 13:58:37,202021121001,reader_writer_pv.c,2148
...
分析文件变化记录,得到每位学生的文件变化次数、总变化字节数、速度,核心代码如下:
def calculate_seconds_size_speed(filename):
# Step 1: accumualte seconds and size
d_user_seconds_size = {} # username : [seconds, acculated_size, previous_size]
with open(filename, 'r', encoding='utf-8') as f:
for row in csv.reader(f, delimiter=','):
username = row[1]
size = int(row[-1])
if username not in d_user_seconds_size:
# the first record
d_user_seconds_size[username] = [0, 0, size]
else:
d_user_seconds_size[username] = [d_user_seconds_size[username][0] + 1,
d_user_seconds_size[username][1] + abs(size - d_user_seconds_size[username][2]),
size]
# Step 2: calulate speed and sorted
d_user_seconds_size_speed = {}
for username, t in d_user_seconds_size.items():
if t[0] == 0:
d_user_seconds_size_speed[username] = [t[0], t[1], 0]
else:
d_user_seconds_size_speed[username] = [t[0], t[1], t[1] / t[0]]
d_user_seconds_size_speed_sorted = OrderedDict(sorted(d_user_seconds_size_speed.items(), key=lambda x: x[1][-1], reverse=True))
return d_user_seconds_size_speed_sorted
2. 画散点图
指量画出每位学生文件大小随时间变化的散点图,核心代码如下:
def draw_file_changes(input_file, output_dir, d_user_name_info, d_user_seconds_size_speed):
d_user_time_size = {}
with open(input_file, 'r', encoding='utf-8') as f: # 2022-05-30 13:58:37,201761000037,reader_writer_pv.c,769
for row in csv.reader(f, delimiter=','):
username = row[1]
size = int(row[-1])
dt = datetime.datetime.strptime(row[0], '%Y-%m-%d %H:%M:%S')
# if username != '202021121054': #'202021121018': #201761000037
# continue
if username not in d_user_time_size:
d_user_time_size[username] = [(dt, size)]
else:
d_user_time_size[username].append((dt, size))
# draw a graph
for username, lists in d_user_time_size.items():
# use user_info as output filename
if username == '201761000037':
username_os = 'qiankun'
else:
username_os = 'u{}'.format(username)
user_info = d_user_name_info[username_os]
output_filename = '{}/{}.jpg'.format(output_dir, user_info)
# draw a graph
fig, ax = plt.subplots()
columns = list(zip(*lists))
# plt.plot(columns[0], columns[1], 'r-x', linewidth=0.5, label='20202112****') # label=user_info
plt.plot(columns[0], columns[1], 'r-x', linewidth=0.5, label=user_info) # label=user_info
seconds, size, speed = d_user_seconds_size_speed[username]
title = '文件变化次数={} 累计文件大小={} 速度={}'.format(seconds, size, round(speed, 2))
plt.title(title)
plt.xlabel('时间')
plt.ylabel('文件大小(单位:字节)')
plt.grid(True)
plt.legend(loc='best')
start_dt = datetime.datetime.strptime('2022-05-30 13:50', '%Y-%m-%d %H:%M')
end_dt = datetime.datetime.strptime('2022-05-30 15:40', '%Y-%m-%d %H:%M')
plt.xlim(start_dt, end_dt)
# date format
date_format = mdates.DateFormatter('%H:%M')
ax.xaxis.set_major_formatter(date_format)
#plt.xticks(columns[0]) #rotation='vertical'
fig.tight_layout()
plt.savefig(output_filename)
完整的代码见file_changes_analysis.py。