This work was done in collaboration with Alessandro Broggio, William J. Torres Bobadilla, Andrea Ferroglia, Manoj Kumar Mandal, Pierpaolo Mastrolia, Jonathan Ronca, Max Zoller
import tabulate
import re
import tarfile
import datetime
import pymule
Let us begin by defining a few regular expression
re_start = re.compile(
r'\[[A-Z][a-z][a-z] (.*)\] Runner start as'
)
re_finish = re.compile(
r'\[[A-Z][a-z][a-z] (.*)\] Runner .* finishes'
)
re_cancel = re.compile(
r'CANCELLED AT (20\d\d-\d\d-\d\dT\d\d:\d\d:\d\d)'
)
re_hostname = re.compile(
r'(merlin-c-\d*|lcth\d*)'
)
We now need a parser for dates :/
re_dat_en = re.compile(r'([A-Z][a-z][a-z]) *(\d*) (\d\d):(\d\d):(\d\d) ([A-Z]*) 2022')
re_dat_de = re.compile(r' *(\d*) ([A-Z][a-z][a-z]) (\d\d):(\d\d):(\d\d) ([A-Z]*) 2022')
re_dat_is = re.compile(r'2022-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)')
def parser(date):
months = {
'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Okt': 10
}
if ma := re_dat_en.match(date):
mo, d, h, m, s, tz = ma.groups()
mo = months[mo]
elif ma := re_dat_de.match(date):
d, mo, h, m, s, tz = ma.groups()
mo = months[mo]
elif ma := re_dat_is.match(date):
mo, d, h, m, s = ma.groups()
mo = int(mo)
return datetime.datetime(2022, mo, int(d), int(h), int(m), int(s))
with open('info.txt') as fp:
info = [
i.split(' ') for i in fp.read().splitlines()
if len(i.split(' ')) == 4
]
info = {
hostname: tuple(features.split(',') + [int(ncpu)])
for hostname, features, _, ncpu in info
}
For lcth21
(which ran the OpenLoops test) we add it manually
info['lcth21'] = ('mem_31gb', 'xeon-e5-2670', 16)
We now have the following feature matrix
features = set(info.values())
Let us load the data and parse the files
tf = tarfile.open('em2em_muone_paper/workers.tar.bz2')
def get_runtime(re_pats):
runtime_by_feature = {i: 0 for i in features}
for ti in tf:
if not ti.isreg():
continue
if all([re.match(i, ti.name) == None for i in re_pats]):
continue
fp = tf.extractfile(ti)
txt = fp.read().decode()
start = parser(re_start.search(txt).group(1))
if m := re_finish.search(txt):
end = parser(m.group(1))
elif m := re_cancel.search(txt):
end = parser(m.group(1))
else:
end = datetime.datetime(2022, 11, 23, 14, 3)
runtime = end - start
hostname = re_hostname.search(txt).group(1)
runtime_by_feature[info[hostname]] += runtime.total_seconds()
return runtime_by_feature
To calculate the carbon footprint we need to calculate the energy required. This can be done by \begin{align} E = \Bigg( {\tt TPD} + 0.3725\,\frac{\rm W}{\rm GB}\times \frac{\tt mem}{\# cpu} \Bigg) \times {\rm PUE} \times t \end{align} We can get the TDP (thermal disipated power) for the two CPUs we've used from the specs at Intel's website.
TDP = {
'xeon-gold-6152': 140 / 22,
'xeon-e5-2670': 115 / 8.
}
For the PUE (power usage efficency, accounting for overhead from eg. aircon) we take 1.67. Switzerland has a carbon footprint of $11.82\,{\rm gCO}_2{rm e}/{\rm kWh}$.
def visualise(runtime_by_feature):
en = 0
t = 0
urls = []
for k, v in runtime_by_feature.items():
if v <= 0:
continue
mem, cpu, ncpu = k
mem = int(re.findall('mem_(\d*)gb', mem)[0])
pue = 1.67
en += (TDP[cpu] + 0.3725 * mem / ncpu) * v * pue / 3600 / 1e3
t += v
m = v / 60. / ncpu
url = (
f"http://calculator.green-algorithms.org//?runTime_hour={int(m//60)}&runTime_min={int(m%60)}"
f"&locationContinent=Europe&locationCountry=Switzerland&locationRegion=CH&coreType=CPU"
f"&numberCPUs={ncpu}&CPUmodel=other&tdpCPU={TDP[cpu]}&memory={mem}&platformType=localServer"
)
urls.append(f'<a href="{url}">green-algorithms for {cpu}</a>')
return [t / 3600 / 24. / 365.25, en, en * 11.82 / 1e3, "<br>".join(urls)]
Let us calculate the full runtime,
full = visualise(get_runtime([re.compile('.*')]))
for a single observable w/o split
nosplit = visualise(get_runtime([
re.compile('photonic-/worker_\d*_[\.\d]*_em2em[RF][RF](MIXDz?|[EM]+)\d*_muone160-_0_\d*'),
re.compile('photonic-/worker_\d*_[\.\d]*_em2em[RF][EM].*_muone160-_0_\d*'),
re.compile('photonic-/worker_\d*_[\.\d]*_em2em0.*_muone160-_0_\d*'),
re.compile('(hadronic|leptonic)-/worker.*_2_\d*')
]))
for a single observable w/ split
yessplit = visualise(get_runtime([
re.compile('photonic-/worker_\d*_[\.\d]*_em2em[RF][RF](\d+z?|[EM]+\d*)_muone160-_0_\d*'),
re.compile('photonic-/worker_\d*_[\.\d]*_em2em[RF][EM].*_muone160-_0_\d*'),
re.compile('photonic-/worker_\d*_[\.\d]*_em2em0.*_muone160-_0_\d*'),
re.compile('(hadronic|leptonic)-/worker.*_2_\d*')
]))
if 'nb' not in matplotlib.get_backend():
print(tabulate.tabulate(
[
["No split"] + nosplit[:-1],
["Split"] + yessplit[:-1],
["Total"] + full[:-1]
],
headers=["configuration", "Run time (CPU yr)", "Energy (kWh)", "kgCO2e"]
))
tabulate.tabulate(
[
["No split"] + nosplit,
["Split"] + yessplit,
["Total"] + full
],
headers=["configuration", "Run time (CPU yr)", "Energy (kWh)", "kgCO2e", "Green-algorithms"],
tablefmt="unsafehtml"
)