Commit 590f1944 authored by lkx33's avatar lkx33

修改了数据结构,提高日志查询效率

parent 36c2dda9
# Chat33Pro压测日志解析脚本 # Chat33Pro压测日志解析脚本
### 依赖的python库 ### 依赖的python库
安装方式:pip install name
* matplotlib * matplotlib
* numpy * numpy
* pandas * pandas
...@@ -8,4 +9,4 @@ ...@@ -8,4 +9,4 @@
### 使用说明 ### 使用说明
* 将要解析的日志文件夹放在log文件夹下,路径示例:../log/log(n=100,m=10) * 将要解析的日志文件夹放在log文件夹下,路径示例:../log/log(n=100,m=10)
* 在config.py中设置文件夹名称'log(n=100,m=10)' * 在config.py中设置文件夹名称'log(n=100,m=10)'
* 在chart.py文件中执行运行对应的图表方法 * 在chart.py文件中执行对应的图表方法
\ No newline at end of file \ No newline at end of file
...@@ -13,35 +13,13 @@ from reader import initData ...@@ -13,35 +13,13 @@ from reader import initData
# 读取msgReply # 读取msgReply
def formatData(sortType=0): def sortData(sortType='msgSendTime'):
# 先获得初始化数据,sortType是要根据第几个元素排序返回数据 # 先获得初始化数据,sortType是要根据哪个元素排序返回数据
# 0为logId,1为发送时间,2为reply时间,3为接收时间,4reply延迟,5接收延迟 data = initData()
logData = [] data.sort_values(by=sortType, inplace=True)
source = initData()
for i in source:
# 格式化时间
# time1 = datetime.datetime.strptime(
# sendTime, "%Y-%m-%dT%H:%M:%S.%f%z")
# time2 = datetime.datetime.strptime(
# replyTime, "%Y-%m-%dT%H:%M:%S.%f%z")
# time3 = datetime.datetime.strptime(
# receiveTime, "%Y-%m-%dT%H:%M:%S.%f%z")
# 转换为时间戳
# stamp1 = time.mktime(time1.timetuple())*1000+time1.microsecond/1000
# stamp2 = time.mktime(time2.timetuple())*1000+time2.microsecond/1000
# stamp3 = time.mktime(time3.timetuple())*1000+time3.microsecond/1000
# 计算延迟
replyLag = i['msgSendReplyTime']-i['msgSendTime']
msgLag = i['msgReceiveTime']-i['msgSendTime']
logData.append([i['logId'], i['msgSendTime'],
i['msgSendReplyTime'], i['msgReceiveTime'], replyLag, msgLag])
# print(sorted(logData, key=lambda x: x[3])) # print(sorted(logData, key=lambda x: x[3]))
logData.sort(key=lambda x: x[sortType]) # data.sort(key=lambda x: x[sortType])
return logData return data
# def timeReport(sortType=1): # def timeReport(sortType=1):
...@@ -51,26 +29,17 @@ def formatData(sortType=0): ...@@ -51,26 +29,17 @@ def formatData(sortType=0):
# return time # return time
def lagReport(sortType=5): def lagReport(sortType='msgLag'):
# 读取msg收到的时间 # 读取msg收到的时间
data = formatData(sortType) data = initData()
lagData = [] data.sort_values(by=sortType, inplace=True)
lagData = data[sortType].tolist()
count = 0 count = 0
line90 = 0 line90 = 0
line95 = 0 line95 = 0
line99 = 0 line99 = 0
for i in data:
lagData.append(i[sortType])
# 格式化时间
# dateArray1 = datetime.datetime.fromtimestamp(i[1]/1000)
# dateArray2 = datetime.datetime.fromtimestamp(i[2]/1000)
# dateArray3 = datetime.datetime.fromtimestamp(i[3]/1000)
# time1 = dateArray1.strftime("%Y-%m-%d %H:%M:%S.%f")
# time2 = dateArray1.strftime("%Y-%m-%d %H:%M:%S.%f")
# print('logID:', i[0], '发送时间:', dateArray1, 'reply时间:', dateArray2,
# '接收时间:', dateArray3, 'reply耗时(ms):', i[4], '接收耗时(ms):', i[5])
count = len(lagData) count = len(lagData)
line90 = math.floor(0.9*count) line90 = math.floor(0.9*count)
line95 = math.floor(0.95*count) line95 = math.floor(0.95*count)
...@@ -92,14 +61,13 @@ def lagReport(sortType=5): ...@@ -92,14 +61,13 @@ def lagReport(sortType=5):
def perSecondReport(): def perSecondReport():
# 统计每秒发送的消息数量 # 统计每秒发送的消息数量
secondCount = [] secondCount = []
source = initData() source = initData()['msgSendTime'].tolist()
for i in source: for i in source:
i['msgSendTime'] //= 1000 i //= 1000
secondCount.append(i['msgSendTime']) secondCount.append(i)
data = list(zip(*np.unique(secondCount, return_counts=True))) data = list(zip(*np.unique(secondCount, return_counts=True)))
# print(data) # print(data)
return data return data
# def errorReport(): # def errorReport():
# 读取错误 # 读取错误
...@@ -5,7 +5,7 @@ import pandas as pd ...@@ -5,7 +5,7 @@ import pandas as pd
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
from matplotlib import ticker as tk from matplotlib import ticker as tk
from matplotlib import dates as dt from matplotlib import dates as dt
from analyzer import formatData, lagReport, perSecondReport from analyzer import sortData, lagReport, perSecondReport
def chartByPercent(): def chartByPercent():
...@@ -14,8 +14,8 @@ def chartByPercent(): ...@@ -14,8 +14,8 @@ def chartByPercent():
plt.xlabel("Percentage") plt.xlabel("Percentage")
plt.ylabel("Lag") plt.ylabel("Lag")
y1 = lagReport(4) y1 = lagReport('replyLag')
y2 = lagReport(5) y2 = lagReport('msgLag')
x = np.arange(1/len(y1), 1+1/len(y1), 1/len(y1)) x = np.arange(1/len(y1), 1+1/len(y1), 1/len(y1))
...@@ -46,11 +46,11 @@ def chartByTime(): ...@@ -46,11 +46,11 @@ def chartByTime():
plt.xlabel("SendTime") plt.xlabel("SendTime")
plt.ylabel("Lag") plt.ylabel("Lag")
logData = formatData(1) # 获取日志,按照发送时间排序 logData = sortData('msgSendTime') # 获取日志,按照发送时间排序
time = [i[1] for i in logData] # 获取时间 time = logData['msgSendTime'].tolist() # 获取时间
y1 = [i[4] for i in logData] # 获取reply y1 = logData['replyLag'].tolist() # 获取reply
y2 = [i[5] for i in logData] # 获取receive y2 = logData['msgLag'].tolist() # 获取receive
# x = [datetime.datetime.fromtimestamp( # x = [datetime.datetime.fromtimestamp(
# t/1000).strftime("%Y-%m-%d %H:%M:%S.%f") for t in time] # t/1000).strftime("%Y-%m-%d %H:%M:%S.%f") for t in time]
...@@ -73,6 +73,7 @@ def chartByTime(): ...@@ -73,6 +73,7 @@ def chartByTime():
def chartByPerSecond(): def chartByPerSecond():
# 绘制图表(统计每秒的消息数)
plt.title("SendMessage per Second") plt.title("SendMessage per Second")
plt.xlabel("SendTime") plt.xlabel("SendTime")
plt.ylabel("Times") plt.ylabel("Times")
...@@ -94,6 +95,6 @@ def chartByPerSecond(): ...@@ -94,6 +95,6 @@ def chartByPerSecond():
plt.show() plt.show()
chartByPercent() # chartByPercent()
# chartByTime() chartByTime()
# chartByPerSecond() # chartByPerSecond()
import os import os
import json
import config import config
import pandas as pd
def readFile(folderName='', keyword='msg'): def readFile(folderName='', keyword='msg'):
# 传入文件夹名称、关键词(默认为msg),返回该文件夹下相关文件的log # 传入文件夹名称、关键词(默认为msg),返回该文件夹下相关文件的log
...@@ -13,28 +16,27 @@ def readFile(folderName='', keyword='msg'): ...@@ -13,28 +16,27 @@ def readFile(folderName='', keyword='msg'):
data = [] data = []
with open(os.path.join(f'./log/{folderName}/', i)) as f: with open(os.path.join(f'./log/{folderName}/', i)) as f:
for line in f: for line in f:
line = line.strip('\n') # line = line.strip('\n')
data.append(line) data.append(json.loads(line))
log.extend(data) log.extend(data)
return log return log
def initData(): def initData():
# 初始化数据 # 初始化数据
data = []
log = readFile(config.GLOBAL_FOLDER) log = readFile(config.GLOBAL_FOLDER)
sendLog = [eval(x) for x in log if 'sendMsg' in x] sendLog = [x for x in log if 'sendMsg' in x['message']]
replyLog = [eval(x) for x in log if 'sendReplyMsg' in x] replyLog = [x for x in log if 'sendReplyMsg' in x['message']]
receiveLog = [eval(x) for x in log if 'receivedMsg' in x] receiveLog = [x for x in log if 'receivedMsg' in x['message']]
print(sendLog[0]) print(sendLog[0])
print(replyLog[0]) print(replyLog[0])
print(receiveLog[0]) print(receiveLog[0])
# sd = sorted(sendLog, key=lambda x: x['msgSendTime'], reverse=False) setSD = set([x['msgId'] for x in sendLog])
rp = sorted(replyLog, key=lambda x: x['logId'], reverse=False) setRC = set([x['msgId'] for x in receiveLog])
rc = sorted(receiveLog, key=lambda x: x['logId'], reverse=False) print('数据丢失%s条' % len(setSD - setRC))
# 找出sendLog和receiveLog中发送时间不一致的记录 # 找出sendLog和receiveLog中发送时间不一致的记录
# for i in sd: # for i in sd:
...@@ -46,15 +48,17 @@ def initData(): ...@@ -46,15 +48,17 @@ def initData():
# else: # else:
# print(i) # print(i)
# 这段代码写的好nm蠢,先凑合用 df1 = pd.DataFrame(sendLog, columns=('msgSendTime', 'msgId'))
for i in rc: df2 = pd.DataFrame(replyLog, columns=('logId', 'msgSendReplyTime'))
for index, j in enumerate(rp): df3 = pd.DataFrame(receiveLog, columns=(
if i['logId'] == j['logId']: 'logId', 'msgReceiveTime', 'msgId'))
data.append( df4 = pd.merge(df1, df3, on='msgId')
{'logId': i['logId'], 'msgSendTime': i['msgSendTime'], 'msgSendReplyTime': j['msgSendReplyTime'], 'msgReceiveTime': i['msgReceiveTime']}) data = pd.merge(df2, df4, on='logId')
del rp[index]
break data = data[['logId', 'msgId', 'msgSendTime', 'msgSendReplyTime', 'msgReceiveTime']]
else: data["replyLag"] = data["msgSendReplyTime"] - data["msgSendTime"]
print('重复/未找到的logId:', i['logId'], i['msgReceiveTime']) data["msgLag"] = data["msgReceiveTime"] - data["msgSendTime"]
return data return data
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment