diff --git a/var/spack/repos/builtin/packages/axom/package.py b/var/spack/repos/builtin/packages/axom/package.py
index df30667dde..8cba91e7a8 100644
--- a/var/spack/repos/builtin/packages/axom/package.py
+++ b/var/spack/repos/builtin/packages/axom/package.py
@@ -45,6 +45,7 @@ class Axom(CachedCMakePackage, CudaPackage, ROCmPackage):
 
     version("main", branch="main")
     version("develop", branch="develop")
+    version("0.9.0", tag="v0.9.0", commit="5f531595d941d16fa3b8583bfc347a845d9feb6d")
     version("0.8.1", tag="v0.8.1", commit="0da8a5b1be596887158ac2fcd321524ba5259e15")
     version("0.8.0", tag="v0.8.0", commit="71fab3262eb7e1aa44a04c21d072b77f06362f7b")
     version("0.7.0", tag="v0.7.0", commit="ea5158191181c137117ae37959879bdc8b107f35")
@@ -84,6 +85,13 @@ class Axom(CachedCMakePackage, CudaPackage, ROCmPackage):
     variant("mpi", default=True, description="Build MPI support")
     variant("openmp", default=True, description="Turn on OpenMP support.")
 
+    variant(
+        "profiling",
+        default=False,
+        when="@develop",
+        description="Build with hooks for Adiak/Caliper performance analysis",
+    )
+
     variant("mfem", default=False, description="Build with mfem")
     variant("hdf5", default=True, description="Build with hdf5")
     variant("lua", default=True, description="Build with Lua")
@@ -104,7 +112,8 @@ class Axom(CachedCMakePackage, CudaPackage, ROCmPackage):
     depends_on("cmake@3.21:", type="build", when="+rocm")
 
     depends_on("blt", type="build")
-    depends_on("blt@0.5.1:0.5.3", type="build", when="@0.6.1:")
+    depends_on("blt@0.5.1:0.5.3", type="build", when="@0.6.1:0.8")
+    depends_on("blt@0.6.2:", type="build", when="@0.9:")
 
     depends_on("mpi", when="+mpi")
 
@@ -122,29 +131,49 @@ class Axom(CachedCMakePackage, CudaPackage, ROCmPackage):
     depends_on("scr~fortran", when="+scr~fortran")
 
     with when("+umpire"):
-        depends_on("umpire@2022.03.0:2023.06", when="@0.7.0:")
+        depends_on("umpire")
+        depends_on("umpire@2024.02.0:", when="@0.9:")
+        depends_on("umpire@2022.03.0:2023.06", when="@0.7.0:0.8")
         depends_on("umpire@6.0.0", when="@0.6.0")
         depends_on("umpire@5:5.0.1", when="@:0.5.0")
-        depends_on("umpire +openmp", when="+openmp")
+        depends_on("umpire+openmp", when="+openmp")
 
     with when("+raja"):
-        depends_on("raja@2022.03.0:2023.06", when="@0.7.0:")
+        depends_on("raja")
+        depends_on("raja@2024.02.0:", when="@0.9:")
+        depends_on("raja@2022.03.0:2023.06", when="@0.7.0:0.8")
         depends_on("raja@0.14.0", when="@0.6.0")
         depends_on("raja@:0.13.0", when="@:0.5.0")
         depends_on("raja~openmp", when="~openmp")
         depends_on("raja+openmp", when="+openmp")
 
+    with when("+profiling"):
+        depends_on("adiak")
+        depends_on("caliper+adiak~papi")
+
+        depends_on("caliper+cuda", when="+cuda")
+        depends_on("caliper~cuda", when="~cuda")
+
+        depends_on("caliper+rocm", when="+rocm")
+        depends_on("caliper~rocm", when="~rocm")
+
+        for dep in ["adiak", "caliper"]:
+            depends_on(f"{dep}+mpi", when="+mpi")
+            depends_on(f"{dep}~mpi", when="~mpi")
+            depends_on(f"{dep}+shared", when="+shared")
+            depends_on(f"{dep}~shared", when="~shared")
+
     for val in CudaPackage.cuda_arch_values:
-        raja_cuda = "raja +cuda cuda_arch={0}".format(val)
-        umpire_cuda = "umpire +cuda cuda_arch={0}".format(val)
-        depends_on(raja_cuda, when="+{0}".format(raja_cuda))
-        depends_on(umpire_cuda, when="+{0}".format(umpire_cuda))
+        ext_cuda_dep = f"+cuda cuda_arch={val}"
+        depends_on(f"raja {ext_cuda_dep}", when=f"+raja {ext_cuda_dep}")
+        depends_on(f"umpire {ext_cuda_dep}", when=f"+umpire {ext_cuda_dep}")
+        depends_on(f"caliper {ext_cuda_dep}", when=f"+profiling {ext_cuda_dep}")
 
     for val in ROCmPackage.amdgpu_targets:
-        raja_rocm = "raja +rocm amdgpu_target={0}".format(val)
-        umpire_rocm = "umpire +rocm amdgpu_target={0}".format(val)
-        depends_on(raja_rocm, when="+{0}".format(raja_rocm))
-        depends_on(umpire_rocm, when="+{0}".format(umpire_rocm))
+        ext_rocm_dep = f"+rocm amdgpu_target={val}"
+        depends_on(f"raja {ext_rocm_dep}", when=f"+raja {ext_rocm_dep}")
+        depends_on(f"umpire {ext_rocm_dep}", when=f"+umpire {ext_rocm_dep}")
+        depends_on(f"caliper {ext_rocm_dep}", when=f"+profiling {ext_rocm_dep}")
 
     depends_on("rocprim", when="+rocm")
 
@@ -156,15 +185,19 @@ class Axom(CachedCMakePackage, CudaPackage, ROCmPackage):
     depends_on("python", when="+python")
 
     # Devtools
-    depends_on("cppcheck", when="+devtools")
-    depends_on("doxygen", when="+devtools")
-    depends_on("graphviz", when="+devtools")
-    depends_on("python", when="+devtools")
-    depends_on("py-sphinx", when="+devtools")
-    depends_on("py-shroud", when="+devtools")
-    depends_on("py-jsonschema", when="+devtools")
-    depends_on("llvm+clang@10.0.0", when="+devtools", type="build")
+    with when("+devtools"):
+        depends_on("cppcheck")
+        depends_on("doxygen")
+        depends_on("graphviz")
+        depends_on("python")
+        depends_on("py-sphinx")
+        depends_on("py-shroud")
+        depends_on("py-jsonschema")
+        depends_on("llvm+clang@10.0.0", type="build")
 
+    # -----------------------------------------------------------------------
+    # Conflicts
+    # -----------------------------------------------------------------------
     # Hard requirement after Axom 0.6.1
     conflicts("~cpp14", when="@0.6.2:")
 
@@ -244,11 +277,9 @@ def initconfig_compiler_entries(self):
         if "+cpp14" in spec and spec.satisfies("@:0.6.1"):
             entries.append(cmake_cache_string("BLT_CXX_STD", "c++14", ""))
 
-        # Add optimization flag workaround for Debug builds with
-        # cray compiler or newer HIP
+        # Add optimization flag workaround for Debug builds with cray compiler or newer HIP
         if "+rocm" in spec:
-            if spec.satisfies("%cce") or spec.satisfies("%clang@16"):
-                entries.append(cmake_cache_string("CMAKE_CXX_FLAGS_DEBUG", "-O1 -g -DNDEBUG"))
+            entries.append(cmake_cache_string("CMAKE_CXX_FLAGS_DEBUG", "-O1 -g -DNDEBUG"))
 
         return entries
 
@@ -260,28 +291,20 @@ def initconfig_hardware_entries(self):
             entries.append(cmake_cache_option("ENABLE_CUDA", True))
             entries.append(cmake_cache_option("CMAKE_CUDA_SEPARABLE_COMPILATION", True))
 
-            entries.append(cmake_cache_option("AXOM_ENABLE_ANNOTATIONS", True))
-
             # CUDA_FLAGS
-            cudaflags = "-restrict --expt-extended-lambda "
+            cudaflags = "${CMAKE_CUDA_FLAGS} -restrict --expt-extended-lambda "
 
             # Pass through any cxxflags to the host compiler via nvcc's Xcompiler flag
             host_cxx_flags = spec.compiler_flags["cxxflags"]
             cudaflags += " ".join(["-Xcompiler=%s " % flag for flag in host_cxx_flags])
 
-            if not spec.satisfies("cuda_arch=none"):
-                cuda_arch = spec.variants["cuda_arch"].value[0]
-                entries.append(cmake_cache_string("CMAKE_CUDA_ARCHITECTURES", cuda_arch))
-            else:
-                entries.append("# cuda_arch could not be determined\n\n")
-
             if spec.satisfies("^blt@:0.5.1"):
                 # This is handled internally by BLT now
                 if "+cpp14" in spec:
                     cudaflags += " -std=c++14"
                 else:
                     cudaflags += " -std=c++11"
-            entries.append(cmake_cache_string("CMAKE_CUDA_FLAGS", cudaflags))
+            entries.append(cmake_cache_string("CMAKE_CUDA_FLAGS", cudaflags, force=True))
 
             entries.append("# nvcc does not like gtest's 'pthreads' flag\n")
             entries.append(cmake_cache_option("gtest_disable_pthreads", True))
@@ -469,6 +492,13 @@ def initconfig_package_entries(self):
             else:
                 entries.append("# %s not built\n" % dep.upper())
 
+        if "+profiling" in spec:
+            dep_dir = get_spec_path(spec, "adiak", path_replacements)
+            entries.append(cmake_cache_path("ADIAK_DIR", dep_dir))
+
+            dep_dir = get_spec_path(spec, "caliper", path_replacements)
+            entries.append(cmake_cache_path("CALIPER_DIR", dep_dir))
+
         if "+umpire" in spec and spec.satisfies("^camp"):
             dep_dir = get_spec_path(spec, "camp", path_replacements)
             entries.append(cmake_cache_path("CAMP_DIR", dep_dir))