Compare commits

...

8 commits

2 changed files with 49 additions and 23 deletions

View file

@ -23,3 +23,9 @@ Requirements:
Created file detailed_power.2335001.hawk-pbs5.100000-1000004.csv Created file detailed_power.2335001.hawk-pbs5.100000-1000004.csv
Created file detailed_power.7678069.hawk-pbs5.2432345-423423.csv Created file detailed_power.7678069.hawk-pbs5.2432345-423423.csv
``` ```
Querrying Hawk-AI nodes requires the option `--hawk-ai`.
The time resolution can be set with the option `--interval`.
Note, that small intervals will lead to data gaps. In particular, not all nodes will report power values for a given time stamp. Multiple values within an interval are averaged arithmetically. This migth make it difficult to use the output of this script to calculate total energy used by a job.

View file

@ -25,41 +25,61 @@ def parse_jobid(s):
import re import re
hawkpbs = r'.hawk-pbs5' hawkpbs = r'.hawk-pbs5'
jobid = re.sub(hawkpbs, '', s) jobid = re.sub(hawkpbs, '', s)
if not jobid.isdigit(): not_allowed = r'[^0-9\[\]]' # Jobid can be more complex than just digits, eg 2444420[201]
if re.search(not_allowed, jobid):
raise argparse.ArgumentTypeError(f'invalid job ID "{s}"') raise argparse.ArgumentTypeError(f'invalid job ID "{s}"')
return jobid return jobid
class Power: class Power:
def __init__(self, nodes, interval=-1): def __init__(self, nodes):
self.nodes = nodes self.nodes = nodes
self.epochs = OrderedDict() self.epochs = OrderedDict()
self.first_ts = None self.first_ts = None
self.last_ts = None self.last_ts = None
self.interval = interval self.warnings = ""
@classmethod @classmethod
def from_list(cls, data): def from_list(cls, data):
"""Assumes data is a list of tuples (timestamp, node, value)""" """
idx_ts = 0; idx_node = 1; idx_value = 2 Returns a Power instance from a list of tuples (timestamp, node, value).
nodes = list(set([line[idx_node] for line in data]))
cls = Power(nodes) Assumptions:
- data is sorted by timestamp ascending
- for each timestamp, there is the same set of nodes and in the same order
"""
idx_ts = 0; idx_node = 1; idx_value = 2
nodes = list(OrderedDict.fromkeys([line[idx_node] for line in data])) # preserves order of nodes
power = Power(nodes)
# for now ignore order to nodes
values = {} values = {}
for l in data: for l in data:
ts = l[idx_ts] ts = l[idx_ts]
if ts not in values: if ts not in values:
values[ts] = [] values[ts] = []
# node = l[1] value = l[idx_value]
power = l[idx_value] values[ts].append(value)
values[ts].append(power)
epochs = sorted(values.keys()) epochs = values.keys()
for epoch in epochs: for epoch in epochs:
cls.insert_epoch(epoch, values[epoch]) power.insert_epoch(epoch, values[epoch])
return cls # check implicit assumptions: 1) ts/epochs are sorted
e = list(epochs)
k = list(values.keys())
if not e == k:
power.warnings += "# Warning: Unexpected unsorted timestamps.\n"
# check implicit assumptions: 2) each line has #nodes values
nnodes = len(nodes)
for epoch in epochs:
actual = len(values[epoch])
if actual != nnodes:
power.warnings += "# Warning: Unexpected number of nodes ({actual}/{expected})\n".format(actual=actual, expected=nnodes)
break
return power
@classmethod @classmethod
def from_db(cls, db, jobid, interval, hawk_ai): def from_db(cls, db, jobid, interval, hawk_ai):
@ -68,7 +88,6 @@ class Power:
raise RuntimeError raise RuntimeError
power = cls.from_list(all_list) power = cls.from_list(all_list)
power.set_interval(interval*1000) # milliseconds
return power return power
@ -79,6 +98,7 @@ class Power:
print("Error: cowardly refusing to overwrite file ", fname) print("Error: cowardly refusing to overwrite file ", fname)
return None return None
header += self.warnings
try: try:
with open(fname, "w+") as f: with open(fname, "w+") as f:
f.write(header + self.header()) f.write(header + self.header())
@ -89,9 +109,6 @@ class Power:
return fname return fname
def set_interval(self, interval):
self.interval = interval
def insert_epoch(self, ts, values): def insert_epoch(self, ts, values):
self.epochs[ts] = values self.epochs[ts] = values
if not self.first_ts: if not self.first_ts:
@ -101,9 +118,10 @@ class Power:
def header(self): def header(self):
hd = "# all timestamp have unit miliseconds since unix epoch\n" hd = "# all timestamp have unit miliseconds since unix epoch\n"
hd += "# all power values have unit Watt\n" hd += "# all power values have unit Watt\n"
hd += "timestamp,delta_t,head_node_power,avg_node_power,median_node_power,min_node_power,max_node_power,std_dev_node_power" hd += "timestamp,RESERVED,head_node_power,avg_node_power,median_node_power,min_node_power,max_node_power,std_dev_node_power"
# add node names here instead # add node names here instead
hd += ",NO_NODE_NAMES_YET\n" hd += "," + ",".join(self.nodes)
hd += "\n"
return hd return hd
def body(self): def body(self):
@ -113,7 +131,7 @@ class Power:
return _body return _body
def summarize_time(self, ts): def summarize_time(self, ts):
return ts, self.interval return ts, ""
@staticmethod @staticmethod
def summarize_values(val): def summarize_values(val):
@ -127,8 +145,8 @@ class Power:
def summarize_epoch(self, epoch): def summarize_epoch(self, epoch):
ts, values = epoch ts, values = epoch
return self.summarize_time(ts) \ return self.summarize_time(ts) \
+ self.summarize_values(values) + self.summarize_values(values) \
# + values + tuple(values)
@staticmethod @staticmethod
def pretty_print(args): def pretty_print(args):
@ -232,6 +250,8 @@ class App:
fn = power.to_file(jobid, header) fn = power.to_file(jobid, header)
if fn: if fn:
print('Created file {fn}'.format(fn=fn)) print('Created file {fn}'.format(fn=fn))
if power.warnings:
print(power.warnings)
if __name__ == "__main__": if __name__ == "__main__":