Update README

Improve parsing of job IDs
Save warnings in output
2024-02-15 11:18:11 +01:00 · 2024-02-15 11:06:54 +01:00 · 2024-02-15 10:29:45 +01:00 · 2024-02-13 14:34:08 +01:00 · 2024-02-12 17:37:40 +01:00 · 2024-02-12 17:37:40 +01:00
2 changed files with 49 additions and 23 deletions
--- a/monitoring/db/README.md
+++ b/monitoring/db/README.md
@ -23,3 +23,9 @@ Requirements:
 Created file detailed_power.2335001.hawk-pbs5.100000-1000004.csv
 Created file detailed_power.7678069.hawk-pbs5.2432345-423423.csv
 ```
+
+Querrying Hawk-AI nodes requires the option `--hawk-ai`.
+
+The time resolution can be set with the option `--interval`.
+
+Note, that small intervals will lead to data gaps. In particular, not all nodes will report power values for a given time stamp. Multiple values within an interval are averaged arithmetically. This migth make it difficult to use the output of this script to calculate total energy used by a job.
--- a/monitoring/db/scripts/get_detailed_power.py
+++ b/monitoring/db/scripts/get_detailed_power.py
@ -25,41 +25,61 @@ def parse_jobid(s):
 	import re
 	hawkpbs = r'.hawk-pbs5'
 	jobid = re.sub(hawkpbs, '', s)
-	if not jobid.isdigit():
+	not_allowed = r'[^0-9\[\]]' # Jobid can be more complex than just digits, eg 2444420[201]
+	if re.search(not_allowed, jobid): 
 		raise argparse.ArgumentTypeError(f'invalid job ID "{s}"')
 	return jobid


 class Power:
-	def __init__(self, nodes, interval=-1):
+	def __init__(self, nodes):
 		self.nodes = nodes
 		self.epochs = OrderedDict()
 		self.first_ts = None
 		self.last_ts = None
-		self.interval = interval
+		self.warnings = ""

 	@classmethod
 	def from_list(cls, data):
-		"""Assumes data is a list of tuples (timestamp, node, value)"""
-		idx_ts = 0; idx_node = 1; idx_value = 2
-		nodes = list(set([line[idx_node] for line in data]))
-		cls = Power(nodes)
+		"""
+		Returns a Power instance from a list of tuples (timestamp, node, value).
+
+		Assumptions:
+		 - data is sorted by timestamp ascending
+		 - for each timestamp, there is the same set of nodes and in the same order
+		"""
+
+		idx_ts = 0; idx_node = 1; idx_value = 2
+		nodes = list(OrderedDict.fromkeys([line[idx_node] for line in data]))   # preserves order of nodes
+		power = Power(nodes)

-		# for now ignore order to nodes
 		values = {}
 		for l in data:
 			ts = l[idx_ts]
 			if ts not in values:
 				values[ts] = []
-			# node = l[1]
-			power = l[idx_value]
-			values[ts].append(power)
+			value = l[idx_value]
+			values[ts].append(value)

-		epochs = sorted(values.keys())
+		epochs = values.keys()
 		for epoch in epochs:
-			cls.insert_epoch(epoch, values[epoch])
+			power.insert_epoch(epoch, values[epoch])

-		return cls
+		# check implicit assumptions: 1) ts/epochs are sorted
+		e = list(epochs)
+		k = list(values.keys())
+		if not e == k:
+			power.warnings += "# Warning: Unexpected unsorted timestamps.\n"
+
+		# check implicit assumptions: 2) each line has #nodes values
+		nnodes = len(nodes)
+		for epoch in epochs:
+			actual = len(values[epoch])
+			if actual != nnodes:
+				power.warnings += "# Warning: Unexpected number of nodes ({actual}/{expected})\n".format(actual=actual, expected=nnodes)
+				break
+		
+		return power

 	@classmethod
 	def from_db(cls, db, jobid, interval, hawk_ai):
@ -68,7 +88,6 @@ class Power:
 			raise RuntimeError

 		power = cls.from_list(all_list)
-		power.set_interval(interval*1000)    # milliseconds

 		return power 
 
@ -79,6 +98,7 @@ class Power:
 			print("Error: cowardly refusing to overwrite file ", fname)
 			return None

+		header += self.warnings
 		try:
 			with open(fname, "w+") as f:
 				f.write(header + self.header())
@ -89,9 +109,6 @@ class Power:

 		return fname
 	
-	def set_interval(self, interval):
-		self.interval = interval
-	
 	def insert_epoch(self, ts, values):
 		self.epochs[ts] = values
 		if not self.first_ts:
@ -101,9 +118,10 @@ class Power:
 	def header(self):
 		hd  = "# all timestamp have unit miliseconds since unix epoch\n"
 		hd += "# all power values have unit Watt\n"
-		hd += "timestamp,delta_t,head_node_power,avg_node_power,median_node_power,min_node_power,max_node_power,std_dev_node_power"
+		hd += "timestamp,RESERVED,head_node_power,avg_node_power,median_node_power,min_node_power,max_node_power,std_dev_node_power"
 		# add node names here instead
-		hd += ",NO_NODE_NAMES_YET\n"
+		hd += "," + ",".join(self.nodes)
+		hd += "\n"
 		return hd

 	def body(self):
@ -113,7 +131,7 @@ class Power:
 		return _body
 	
 	def summarize_time(self, ts):
-		return ts, self.interval
+		return ts, ""

 	@staticmethod
 	def summarize_values(val):
@ -127,8 +145,8 @@ class Power:
 	def summarize_epoch(self, epoch):
 		ts, values = epoch
 		return self.summarize_time(ts) \
-				+ self.summarize_values(values)
-				# + values
+				+ self.summarize_values(values) \
+				+ tuple(values)
 	
 	@staticmethod
 	def pretty_print(args):
@ -232,6 +250,8 @@ class App:
 			fn = power.to_file(jobid, header)
 			if fn:
 				print('Created file {fn}'.format(fn=fn))
+			if power.warnings:
+				print(power.warnings)


 if __name__ == "__main__":
Author	SHA1	Message	Date
Jose Gracia	f8b0f86f22	Update README	2024-02-15 11:18:11 +01:00
Jose Gracia	11f334d2b4	Improve parsing of job IDs	2024-02-15 11:06:54 +01:00
Jose Gracia	a5435e65dc	Save warnings in output	2024-02-15 10:29:45 +01:00
Jose Gracia	1f7dbd80b3	Refactor variable name	2024-02-13 14:34:08 +01:00
Jose Gracia	9c0bfd79f9	Add nodes to output	2024-02-12 17:37:40 +01:00
Jose Gracia	87a0f17b28	Preserve order of nodes	2024-02-12 17:37:40 +01:00
Jose Gracia	f58a6d62ab	State and check assumptions in Power.from_list()	2024-02-12 17:37:40 +01:00
Jose Gracia	fa1f981f89	Power class knows nothing about actual delta_t in outputs	2024-02-12 16:29:25 +01:00