import matplotlib.pyplot as plt import xdg import re import pycurl import certifi import os import sys from bs4 import BeautifulSoup from io import BytesIO
# parse HTML and remove tags # NB since coq was made default language on zulip the tables are full of <span> tags # so we really don't want to keep them
str = BeautifulSoup(str, "html.parser").text
# NB: the line with the date starts with a space # (in the original there is the avatar broken image)
datere = re.compile(' Bench bot \((.*)\):')
for line in iter(str.splitlines()):
match = re.match(datere, line) if match: # Next post
curdate = match[1] else:
match = re.match(jobre, line) if match:
job = match[1]
benches += [(curdate, match[1])]
m = pycurl.CurlMulti()
m.setopt(pycurl.M_MAX_HOST_CONNECTIONS, 8)
files=[]
for _, job in benches:
fname = cachedir / (job + '.log') ifnot(os.path.exists(fname)):
f = open(fname, 'xb') # x: will error if already exists
files += [f]
c = pycurl.Curl()
c.setopt(c.URL, 'https://gitlab.com/coq/coq/-/jobs/' + job + '/raw')
c.setopt(c.WRITEFUNCTION, f.write)
c.setopt(c.CAINFO, certifi.where())
c.setopt(c.FOLLOWLOCATION, True)
m.add_handle(c)
num_handles = len(files) while num_handles:
print (f'Downloading {num_handles} log files.')
ret = m.select(5.0) if ret == -1: continue while 1:
ret, num_handles = m.perform() if ret != pycurl.E_CALL_MULTI_PERFORM: break
for f in files:
f.flush()
f.close()
## parse job logs
print ('Parsing job logs.')
# captures package name and OLD time # the numerals are to avoid matching the table header
packre = re.compile('│[ ]+([^ ]+) │[ ]+[^ ]+[ ]+([0-9][^ ]*)[ ]+([^ ]+)')
parsed=[] for date, job in benches:
cur={}
fname = cachedir / (job + '.log') with open(fname) as f: for l in f:
match = re.match(packre, l) if match: # the table appears multiple times in the log so this may be overriding the value # that is OK (it's not worth bothering to find the last table)
cur[match[1]] = (match[2], match[3]) if cur:
parsed += [(date, job, cur)]
def filter_to(packname):
dates=[]
jobs=[]
times=[]
changes=[] for date, job, packs in parsed: if packname in packs:
dates += [date]
jobs += [job]
time, change = packs[packname]
times += [float(time)]
changes += [float(change)] return dates, jobs, times, changes
def plot(packname):
dates, jobs, times, changes = filter_to(packname) # alternatively you can use dates for x axis, times for y axis # with x = jobs, if you find an interesting point, hover the mouse on it # pyplot will say somewhere in the window "x=..., y=..." # now if you don't want to read and type the whole 10 digit number # export as svg and search for the last 3 digits followed by a space # it should be easy to find a xml comment with the job number eg "<!-- 2100592270 -->"
plt.plot(jobs, changes)
plt.ylabel(packname)
plt.show()
def list_packs():
known={} for _, _, packs in parsed: for pack in packs.keys(): if pack in known.keys():
known[pack] += 1 else:
known[pack] = 1 return known
plot(sys.argv[1])
[ Dauer der Verarbeitung: 0.21 Sekunden
(vorverarbeitet)
]