From 1e09e934009a8712d99aba027b3db4b1dd277e3c Mon Sep 17 00:00:00 2001 From: Jose Gracia Date: Wed, 7 Feb 2024 15:54:13 +0100 Subject: [PATCH] Add query and warnings for hawk-ai jobs --- monitoring/db/scripts/get_detailed_power.py | 39 +++++++++++++++------ 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/monitoring/db/scripts/get_detailed_power.py b/monitoring/db/scripts/get_detailed_power.py index f9ee851..883d77f 100755 --- a/monitoring/db/scripts/get_detailed_power.py +++ b/monitoring/db/scripts/get_detailed_power.py @@ -13,6 +13,8 @@ def parse_arguments(args): help='Show database querries, etc.') parser.add_argument('-t', '--interval', action='store', type=float, default=5.0, help="Interval between power values in seconds") + parser.add_argument('--hawk-ai', action='store_true', + help="Job did run on Hawk-AI") parser.add_argument('jobid', type=parse_jobid, nargs='+', help='Job ID such as "2260215" or "2260215.hawk-pbs5"') @@ -60,8 +62,8 @@ class Power: return cls @classmethod - def from_db(cls, db, jobid, interval): - all_list = db.db_to_list(jobid, interval) + def from_db(cls, db, jobid, interval, hawk_ai): + all_list = db.db_to_list(jobid, interval, hawk_ai) if not all_list: raise RuntimeError @@ -150,6 +152,18 @@ select * from get_job_data( array['time','node'] -- sort by time first than node (ascending) ) as t(time bigint, name varchar, avg double precision); +""" + + QUERY_STRING_HAWK_AI = """ +-- For description of get_ai_job_data(), see https://kb.hlrs.de/monitoring/index.php/TimescaleDB_-_Query_Guidelines#Function:_get_job_data_and_get_ai_job_data +select * from get_ai_job_data( + '{jobid}.hawk-pbs5', + 'telegraf_ipmi_power_meter', -- power data source + '{interval} seconds', + array['avg'], -- aggregation: average across samples in bucket + array['time','node'] -- sort by time first than node (ascending) +) +as t(time bigint, name varchar, avg double precision); """ def __init__(self, verbose): @@ -167,17 +181,20 @@ as t(time bigint, name varchar, avg double precision); return self.connection.close() @classmethod - def build_query(cls, jobid, interval): + def build_query(cls, jobid, interval, hawk_ai): import sqlalchemy as db - query_string = cls.QUERY_STRING_HAWK + if hawk_ai: + query_string = cls.QUERY_STRING_HAWK_AI + else: + query_string = cls.QUERY_STRING_HAWK return db.text(query_string.format(jobid=jobid, interval=interval)) - def db_to_list(self, jobid, interval): - query = self.build_query(jobid, interval) + def db_to_list(self, jobid, interval, hawk_ai): + query = self.build_query(jobid, interval, hawk_ai) return self.connection.execute(query).fetchall() - def db_to_pf(self, jobid, inerval): - query = self.build_query(jobid, interval) + def db_to_pf(self, jobid, intrval, hawk_ai): + query = self.build_query(jobid, interval, hawk_ai) return pd.read_sql(query, con=self.connection) @@ -189,8 +206,10 @@ class App: @staticmethod def warnings(config): warn = "" - if config.interval < 5: + if not config.hawk_ai and config.interval < 5: warn += '# Warning: interval<5 is very small and may lead to data gaps.' + if config.hawk_ai and config.interval < 60: + warn += '# Warning: interval<60 is very small for Hawk-AI nodes and may lead to data gaps.' return warn def run_all(self): @@ -205,7 +224,7 @@ class App: for jobid in self.config.jobid: try: - power = Power.from_db(self.db, jobid, self.config.interval) + power = Power.from_db(self.db, jobid, self.config.interval, self.config.hawk_ai) except RuntimeError: print('No data found for job ID "{}"'.format(jobid)) continue