diff --git a/scheds/rust/scx_p2dq/src/bpf/intf.h b/scheds/rust/scx_p2dq/src/bpf/intf.h
index ee5b8b4e6..1e2a08e6d 100644
--- a/scheds/rust/scx_p2dq/src/bpf/intf.h
+++ b/scheds/rust/scx_p2dq/src/bpf/intf.h
@@ -88,6 +88,11 @@ enum stat_idx {
 	P2DQ_STAT_EXEC_BALANCE,
 	P2DQ_STAT_FORK_SAME_LLC,
 	P2DQ_STAT_EXEC_SAME_LLC,
+	P2DQ_STAT_THERMAL_KICK,
+	P2DQ_STAT_THERMAL_AVOID,
+	P2DQ_STAT_EAS_LITTLE_SELECT,
+	P2DQ_STAT_EAS_BIG_SELECT,
+	P2DQ_STAT_EAS_FALLBACK,
 	P2DQ_NR_STATS,
 };
 
diff --git a/scheds/rust/scx_p2dq/src/bpf/main.bpf.c b/scheds/rust/scx_p2dq/src/bpf/main.bpf.c
index f0dc413d7..6ae00f704 100644
--- a/scheds/rust/scx_p2dq/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_p2dq/src/bpf/main.bpf.c
@@ -137,6 +137,10 @@ const volatile struct {
 	bool pelt_enabled;
 	bool fork_balance;
 	bool exec_balance;
+	bool enable_eas;
+	bool thermal_enabled;
+	u16 small_task_threshold;
+	u16 large_task_threshold;
 } p2dq_config = {
 	.sched_mode = MODE_DEFAULT,
 	.nr_dsqs_per_llc = 3,
@@ -158,6 +162,10 @@ const volatile struct {
 	.pelt_enabled = true,
 	.fork_balance = true,
 	.exec_balance = true,
+	.enable_eas = false,
+	.thermal_enabled = false,
+	.small_task_threshold = 256,
+	.large_task_threshold = 768,
 };
 
 /* Latency priority and preemption configuration */
@@ -187,6 +195,10 @@ u64 cpu_node_ids[MAX_CPUS];
 u64 big_core_ids[MAX_CPUS];
 u64 dsq_time_slices[MAX_DSQS_PER_LLC];
 
+/* Energy and capacity per CPU for energy-aware scheduling */
+u16 cpu_energy_cost[MAX_CPUS];  // Energy cost coefficient (0-65535)
+u16 cpu_capacity[MAX_CPUS];     // CPU capacity (0-1024)
+
 /* DHQ per LLC pair for migration (MAX_LLCS / 2 DHQs) */
 scx_dhq_t *llc_pair_dhqs[MAX_LLCS / 2];
 /* Track number of LLCs per NUMA node for strand assignment */
@@ -332,6 +344,13 @@ static __always_inline u32 pelt_decay(u32 val, u32 periods)
 	return val;
 }
 
+/* Forward declarations for energy-aware scheduling helpers */
+static __always_inline u32 get_cpu_capacity(s32 cpu);
+static __always_inline u32 get_cpu_energy_cost(s32 cpu);
+static __always_inline u32 get_task_util(struct task_struct *p);
+static __always_inline bool prefer_little_core(struct task_struct *p);
+static __always_inline bool prefer_big_core(struct task_struct *p);
+
 /*
  * Update task's PELT metrics based on runtime.
  * Called when task stops running or starts running (for decay).
@@ -339,11 +358,14 @@ static __always_inline u32 pelt_decay(u32 val, u32 periods)
  * @taskc: Task context to update
  * @now: Current timestamp in ns
  * @delta_ns: Runtime delta (0 for decay-only update)
+ * @task_cpu: CPU the task is running on
  */
-static __always_inline void update_task_pelt(task_ctx *taskc, u64 now, u64 delta_ns)
+static __always_inline void update_task_pelt(task_ctx *taskc, u64 now, u64 delta_ns, s32 task_cpu)
 {
 	u64 elapsed_ns, elapsed_ms;
 	u32 periods, delta_ms;
+	u32 capacity, freq;
+	u64 scaled_delta_ms, scaled_period_contrib;
 
 	if (!p2dq_config.pelt_enabled)
 		return;
@@ -378,13 +400,27 @@ static __always_inline void update_task_pelt(task_ctx *taskc, u64 now, u64 delta
 		taskc->util_sum = pelt_decay(taskc->util_sum, periods);
 	}
 
+	capacity = get_cpu_capacity(task_cpu);
+	freq = scx_bpf_cpuperf_cur(task_cpu);
+	if (freq == 0)
+		freq = SCX_CPUPERF_ONE;
+
+	/*
+	 * Scale period contribution by capacity and frequency
+	 * This makes the PELT metric represent "work done at max CPU capacity at max freq"
+	 *
+	 * Formula: scaled_time = wall_time * (capacity / 1024) * (freq / 1024)
+	 *         = wall_time * capacity * freq / (1024 * 1024)
+	 */
 	if (taskc->period_contrib > 0) {
-		taskc->util_sum += taskc->period_contrib;
+		scaled_period_contrib = (taskc->period_contrib * capacity * freq) / (1024ULL * 1024ULL);
+		taskc->util_sum += scaled_period_contrib;
 		taskc->period_contrib = 0;
 	}
 
 	delta_ms = delta_ns / NSEC_PER_MSEC;
-	taskc->util_sum += delta_ms;
+	scaled_delta_ms = (delta_ms * capacity * freq) / (1024ULL * 1024ULL);
+	taskc->util_sum += scaled_delta_ms;
 
 	if (unlikely(taskc->util_sum > PELT_SUM_MAX))
 		taskc->util_sum = PELT_SUM_MAX;
@@ -692,6 +728,125 @@ static task_ctx *lookup_task_ctx(struct task_struct *p)
 	return taskc;
 }
 
+/*
+ * Get CPU capacity (0-1024)
+ * Used for frequency/capacity-invariant PELT and energy-aware scheduling
+ */
+static __always_inline u32 get_cpu_capacity(s32 cpu)
+{
+	if (cpu < 0 || cpu >= MAX_CPUS)
+		return 1024;
+	return cpu_capacity[cpu] ? cpu_capacity[cpu] : 1024;
+}
+
+/*
+ * Get CPU energy cost (lower is more efficient)
+ */
+static __always_inline u32 get_cpu_energy_cost(s32 cpu)
+{
+	if (cpu < 0 || cpu >= MAX_CPUS)
+		return 65535;
+	return cpu_energy_cost[cpu] ? cpu_energy_cost[cpu] : 1024;
+}
+
+/*
+ * Get task utilization from custom PELT
+ * Returns util_avg in range 0-1024
+ * NOTE: Frequency and capacity invariant after modifications
+ */
+static __always_inline u32 get_task_util(struct task_struct *p)
+{
+	task_ctx *taskc;
+
+	taskc = lookup_task_ctx(p);
+	if (!taskc)
+		return 0;
+
+	return taskc->util_avg;
+}
+
+/*
+ * Check if task should prefer little cores based on utilization
+ */
+static __always_inline bool prefer_little_core(struct task_struct *p)
+{
+	if (!p2dq_config.enable_eas || !topo_config.has_little_cores)
+		return false;
+
+	u32 util = get_task_util(p);
+	return util < p2dq_config.small_task_threshold;
+}
+
+/*
+ * Check if task should prefer big cores based on utilization
+ */
+static __always_inline bool prefer_big_core(struct task_struct *p)
+{
+	if (!p2dq_config.enable_eas || !topo_config.has_little_cores)
+		return false;
+
+	u32 util = get_task_util(p);
+	return util > p2dq_config.large_task_threshold;
+}
+
+/*
+ * Get effective CPU capacity accounting for thermal pressure AND frequency
+ * Returns capacity in range 0-1024
+ */
+static __always_inline u32 get_effective_cpu_capacity(s32 cpu)
+{
+	struct cpu_ctx *cpuc;
+	u32 base_capacity, thermal_pressure;
+	u32 cur_freq;
+	u64 effective_capacity;
+
+	if (cpu < 0 || cpu >= MAX_CPUS)
+		return 0;
+
+	cpuc = lookup_cpu_ctx(cpu);
+	if (!cpuc)
+		return 0;
+
+	base_capacity = get_cpu_capacity(cpu);
+
+	thermal_pressure = cpuc->perf;
+
+	cur_freq = scx_bpf_cpuperf_cur(cpu);
+	if (cur_freq == 0)
+		cur_freq = SCX_CPUPERF_ONE;
+
+	/*
+	 * Effective capacity = (base - thermal) * freq / 1024
+	 * Combines thermal throttling and frequency scaling
+	 */
+	if (thermal_pressure >= base_capacity)
+		return 0;  /* Fully throttled */
+
+	effective_capacity = (u64)(base_capacity - thermal_pressure) * cur_freq / SCX_CPUPERF_ONE;
+
+	return (u32)effective_capacity;
+}
+
+/*
+ * Check if CPU is thermally throttled
+ * Returns true if pressure > 25% capacity loss
+ */
+static __always_inline bool is_cpu_throttled(s32 cpu)
+{
+	struct cpu_ctx *cpuc;
+
+	if (cpu < 0 || cpu >= MAX_CPUS)
+		return false;
+
+	cpuc = lookup_cpu_ctx(cpu);
+	if (!cpuc)
+		return false;
+
+	/* Throttled if pressure > 256 (25% of 1024) */
+	return cpuc->perf > 256;
+}
+
+
 struct {
 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
 	__type(key, u32);
@@ -901,7 +1056,6 @@ static s32 pick_idle_affinitized_cpu(struct task_struct *p, task_ctx *taskc,
 		}
 	}
 
-	// Fallback to anywhere the task can run
 	cpu = bpf_cpumask_any_distribute(p->cpus_ptr);
 
 found_cpu:
@@ -1006,6 +1160,207 @@ u32 __attribute__((noinline)) find_least_loaded_llc_for_fork(u32 parent_llc_id)
 	return best_id;
 }
 
+/*
+ * Pick idle CPU from mask, avoiding thermally throttled CPUs.
+ * Simpler/faster than full energy-aware selection - used for MODE_PERF/MODE_EFFICIENCY.
+ */
+static __always_inline s32 pick_idle_thermal_aware(struct bpf_cpumask *mask,
+						   struct task_struct *p)
+{
+	s32 cpu, best_cpu = -1;
+	u32 best_capacity = 0;
+
+	if (!mask)
+		return -1;
+
+	/* First pass: try to find unthrottled idle CPU */
+	bpf_for(cpu, 0, topo_config.nr_cpus) {
+		if (!bpf_cpumask_test_cpu(cpu, cast_mask(mask)))
+			continue;
+		if (!bpf_cpumask_test_cpu(cpu, p->cpus_ptr))
+			continue;
+		if (is_cpu_throttled(cpu))
+			continue;
+		if (scx_bpf_test_and_clear_cpu_idle(cpu))
+			return cpu;
+	}
+
+	/* Second pass: allow throttled CPUs, prefer least throttled */
+	bpf_for(cpu, 0, topo_config.nr_cpus) {
+		u32 capacity;
+
+		if (mask && !bpf_cpumask_test_cpu(cpu, cast_mask(mask)))
+			continue;
+		if (!bpf_cpumask_test_cpu(cpu, p->cpus_ptr))
+			continue;
+		if (!scx_bpf_test_and_clear_cpu_idle(cpu))
+			continue;
+
+		capacity = get_effective_cpu_capacity(cpu);
+		if (capacity > best_capacity) {
+			best_capacity = capacity;
+			best_cpu = cpu;
+		}
+	}
+
+	if (best_cpu >= 0)
+		stat_inc(P2DQ_STAT_THERMAL_AVOID);
+
+	return best_cpu;
+}
+
+/*
+ * Select best idle CPU from mask based on:
+ * - Not thermally throttled (priority 1)
+ * - High effective capacity (accounts for thermal + freq)
+ * - Low energy cost
+ *
+ * Returns CPU ID or -1 if no suitable CPU found
+ * Updates best_score with score of selected CPU (higher is better)
+ */
+static __always_inline s32 select_best_idle_cpu(struct task_struct *p,
+						struct bpf_cpumask *mask,
+						u32 *best_score)
+{
+	s32 cpu, best_cpu = -1;
+	u32 highest_score = 0;
+
+	if (!mask || !best_score)
+		return -1;
+
+	*best_score = 0;
+
+	bpf_for(cpu, 0, topo_config.nr_cpus) {
+		u32 capacity, energy_cost, score;
+
+		if (!bpf_cpumask_test_cpu(cpu, cast_mask(mask)))
+			continue;
+		if (!bpf_cpumask_test_cpu(cpu, p->cpus_ptr))
+			continue;
+		if (!scx_bpf_test_and_clear_cpu_idle(cpu))
+			continue;
+
+		capacity = get_effective_cpu_capacity(cpu);
+		energy_cost = get_cpu_energy_cost(cpu);
+
+		/*
+		 * Score formula: prioritize capacity, penalize energy cost
+		 * score = capacity * 10 - (energy_cost / 10)
+		 * Higher score is better
+		 *
+		 * Throttled CPUs (capacity=0) get score=0
+		 */
+		if (is_cpu_throttled(cpu)) {
+			score = 0;  /* Heavily penalize throttled CPUs */
+		} else {
+			/* Multiply capacity by 10 for more weight vs energy cost */
+			score = (capacity * 10);
+			/* Subtract scaled energy cost (divide by 10 to reduce impact) */
+			if (energy_cost < score)
+				score -= (energy_cost / 10);
+			else
+				score = 1;
+		}
+
+		if (score > highest_score) {
+			highest_score = score;
+			best_cpu = cpu;
+		}
+	}
+
+	*best_score = highest_score;
+	return best_cpu;
+}
+
+/*
+ * Pick idle CPU using comprehensive energy-aware scheduling
+ * Tries preferred core type first, then fallback type
+ */
+static __always_inline s32 pick_idle_energy_aware(struct task_struct *p,
+						  struct llc_ctx *llcx,
+						  bool *is_idle)
+{
+	s32 cpu = -1;
+	u32 pref_score = 0, fallback_score = 0;
+
+	if (!llcx || !is_idle)
+		return -1;
+
+	/* Determine preferred and fallback cpumasks based on task utilization */
+	struct bpf_cpumask *pref_mask = NULL;
+	struct bpf_cpumask *fallback_mask = NULL;
+	bool prefer_little = prefer_little_core(p);
+	bool prefer_big = prefer_big_core(p);
+
+	if (prefer_little) {
+		pref_mask = llcx->little_cpumask;
+		fallback_mask = llcx->big_cpumask;
+	} else if (prefer_big) {
+		pref_mask = llcx->big_cpumask;
+		fallback_mask = llcx->little_cpumask;
+	} else {
+		/* No strong preference, try both and pick best score */
+		s32 little_cpu = -1, big_cpu = -1;
+		u32 little_score = 0, big_score = 0;
+
+		if (llcx->little_cpumask)
+			little_cpu = select_best_idle_cpu(p, llcx->little_cpumask, &little_score);
+		if (llcx->big_cpumask)
+			big_cpu = select_best_idle_cpu(p, llcx->big_cpumask, &big_score);
+
+		/* Pick whichever has better score */
+		if (little_cpu >= 0 && big_cpu >= 0) {
+			if (little_score >= big_score) {
+				*is_idle = true;
+				stat_inc(P2DQ_STAT_EAS_LITTLE_SELECT);
+				return little_cpu;
+			} else {
+				*is_idle = true;
+				stat_inc(P2DQ_STAT_EAS_BIG_SELECT);
+				return big_cpu;
+			}
+		} else if (little_cpu >= 0) {
+			*is_idle = true;
+			stat_inc(P2DQ_STAT_EAS_LITTLE_SELECT);
+			return little_cpu;
+		} else if (big_cpu >= 0) {
+			*is_idle = true;
+			stat_inc(P2DQ_STAT_EAS_BIG_SELECT);
+			return big_cpu;
+		}
+		return -1;
+	}
+
+	/* Try preferred core type first */
+	if (pref_mask) {
+		cpu = select_best_idle_cpu(p, pref_mask, &pref_score);
+		if (cpu >= 0) {
+			*is_idle = true;
+			if (prefer_little)
+				stat_inc(P2DQ_STAT_EAS_LITTLE_SELECT);
+			else
+				stat_inc(P2DQ_STAT_EAS_BIG_SELECT);
+			return cpu;
+		}
+	}
+
+	/* Fallback to opposite core type if preferred not available */
+	if (fallback_mask) {
+		cpu = select_best_idle_cpu(p, fallback_mask, &fallback_score);
+		if (cpu >= 0) {
+			*is_idle = true;
+			stat_inc(P2DQ_STAT_EAS_FALLBACK);
+			if (prefer_little)
+				stat_inc(P2DQ_STAT_EAS_BIG_SELECT);
+			else
+				stat_inc(P2DQ_STAT_EAS_LITTLE_SELECT);
+			return cpu;
+		}
+	}
+
+	return -1;
+}
+
 static s32 pick_idle_cpu(struct task_struct *p, task_ctx *taskc,
 			 s32 prev_cpu, u64 wake_flags, bool *is_idle)
 {
@@ -1141,39 +1496,66 @@ static s32 pick_idle_cpu(struct task_struct *p, task_ctx *taskc,
 		goto found_cpu;
 	}
 
+	/*
+	 * Energy-aware selection with comprehensive scoring
+	 * Uses effective capacity, energy cost, and thermal awareness
+	 */
+	if (p2dq_config.enable_eas && topo_config.has_little_cores) {
+		cpu = pick_idle_energy_aware(p, llcx, is_idle);
+		if (cpu >= 0)
+			goto found_cpu;
+	}
+
 	if (p2dq_config.sched_mode == MODE_PERF &&
 	    topo_config.has_little_cores &&
 	    llcx->big_cpumask) {
-		cpu = __pick_idle_cpu(llcx->big_cpumask,
-				      SCX_PICK_IDLE_CORE);
-		if (cpu >= 0) {
-			*is_idle = true;
-			goto found_cpu;
-		}
-		if (llcx->big_cpumask) {
-			cpu = __pick_idle_cpu(llcx->big_cpumask, 0);
+		/* Try thermal-aware selection first for big cores if thermal tracking enabled */
+		if (p2dq_config.thermal_enabled) {
+			cpu = pick_idle_thermal_aware(llcx->big_cpumask, p);
 			if (cpu >= 0) {
 				*is_idle = true;
 				goto found_cpu;
 			}
 		}
+		/* Fallback to non-thermal-aware if thermal disabled or no idle big cores */
+		if (llcx->big_cpumask &&
+		    (cpu = __pick_idle_cpu(llcx->big_cpumask, SCX_PICK_IDLE_CORE)) &&
+		    cpu >= 0) {
+			*is_idle = true;
+			goto found_cpu;
+		}
+		if (llcx->big_cpumask &&
+		    (cpu = __pick_idle_cpu(llcx->big_cpumask, 0)) &&
+		    cpu >= 0) {
+			*is_idle = true;
+			goto found_cpu;
+		}
 	}
 
 	if (p2dq_config.sched_mode == MODE_EFFICIENCY &&
 	    topo_config.has_little_cores &&
 	    llcx->little_cpumask) {
-		cpu = __pick_idle_cpu(llcx->little_cpumask, SCX_PICK_IDLE_CORE);
-		if (cpu >= 0) {
-			*is_idle = true;
-			goto found_cpu;
-		}
-		if (llcx->little_cpumask) {
-			cpu = __pick_idle_cpu(llcx->little_cpumask, 0);
+		/* Try thermal-aware selection first for little cores if thermal tracking enabled */
+		if (p2dq_config.thermal_enabled) {
+			cpu = pick_idle_thermal_aware(llcx->little_cpumask, p);
 			if (cpu >= 0) {
 				*is_idle = true;
 				goto found_cpu;
 			}
 		}
+		/* Fallback to non-thermal-aware if thermal disabled or no idle little cores */
+		if (llcx->little_cpumask &&
+		    (cpu = __pick_idle_cpu(llcx->little_cpumask, SCX_PICK_IDLE_CORE)) &&
+		    cpu >= 0) {
+			*is_idle = true;
+			goto found_cpu;
+		}
+		if (llcx->little_cpumask &&
+		    (cpu = __pick_idle_cpu(llcx->little_cpumask, 0)) &&
+		    cpu >= 0) {
+			*is_idle = true;
+			goto found_cpu;
+		}
 	}
 
 
@@ -1912,7 +2294,7 @@ static int p2dq_running_impl(struct task_struct *p)
 
 	/* Decay PELT metrics when task starts running (0 delta for decay-only) */
 	if (p2dq_config.pelt_enabled)
-		update_task_pelt(taskc, now, 0);
+		update_task_pelt(taskc, now, 0, task_cpu);
 
 	return 0;
 }
@@ -1924,6 +2306,7 @@ void BPF_STRUCT_OPS(p2dq_stopping, struct task_struct *p, bool runnable)
 	struct cpu_ctx *cpuc;
 	u64 used, scaled_used, last_dsq_slice_ns;
 	u64 now = bpf_ktime_get_ns();
+	s32 task_cpu = scx_bpf_task_cpu(p);
 
 	if (unlikely(!(taskc = lookup_task_ctx(p)) ||
 	    !(llcx = lookup_llc_ctx(taskc->llc_id))))
@@ -1940,7 +2323,7 @@ void BPF_STRUCT_OPS(p2dq_stopping, struct task_struct *p, bool runnable)
 	// time. When a nice task was run we need to update the cpu_ctx so that
 	// tasks are no longer enqueued to the local DSQ.
 	if (task_ctx_test_flag(taskc, TASK_CTX_F_WAS_NICE) &&
-	    (cpuc = lookup_cpu_ctx(scx_bpf_task_cpu(p)))) {
+	    (cpuc = lookup_cpu_ctx(task_cpu))) {
 		cpu_ctx_clear_flag(cpuc, CPU_CTX_F_NICE_TASK);
 		task_ctx_clear_flag(taskc, TASK_CTX_F_WAS_NICE);
 	}
@@ -1958,7 +2341,7 @@ void BPF_STRUCT_OPS(p2dq_stopping, struct task_struct *p, bool runnable)
 
 	/* Update PELT metrics if enabled */
 	if (p2dq_config.pelt_enabled) {
-		update_task_pelt(taskc, now, used);
+		update_task_pelt(taskc, now, used, task_cpu);
 		aggregate_pelt_to_llc(llcx, taskc,
 				      task_ctx_test_flag(taskc, TASK_CTX_F_INTERACTIVE),
 				      !task_ctx_test_flag(taskc, TASK_CTX_F_ALL_CPUS));
@@ -3235,6 +3618,50 @@ void BPF_STRUCT_OPS(p2dq_exit, struct scx_exit_info *ei)
 	UEI_RECORD(uei, ei);
 }
 
+/*
+ * Thermal Pressure Tracking (requires CONFIG_SCHED_HW_PRESSURE=y)
+ *
+ * Thermal tracking provides optimization by avoiding throttled CPUs.
+ * This program has autoload disabled by default and is conditionally
+ * enabled from userspace if the kernel supports hw_pressure_update tracepoint.
+ *
+ * The '?' suffix makes this program optional - veristat and the verifier
+ * will skip it if the tracepoint doesn't exist in the kernel.
+ *
+ * Tracepoint: hw_pressure_update
+ * Fires when kernel detects thermal throttling on a CPU
+ *
+ * Arguments:
+ *   cpu: CPU ID experiencing pressure
+ *   hw_pressure: Pressure value (0 = no throttling, 1024 = max capacity lost)
+ *
+ * Note: This tracepoint only exists on ARM/ARM64 architectures
+ */
+#if defined(__aarch64__) || defined(__arm__)
+__weak __hidden SEC("tp_btf/hw_pressure_update?")
+int BPF_PROG(on_thermal_pressure, u32 cpu, u64 hw_pressure)
+{
+	struct cpu_ctx *cpuc;
+
+	if (cpu >= MAX_CPUS)
+		return 0;
+
+	cpuc = lookup_cpu_ctx(cpu);
+	if (!cpuc)
+		return 0;
+
+	cpuc->perf = (u32)hw_pressure;
+
+	if (hw_pressure > 512) {
+		scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE);
+		stat_inc(P2DQ_STAT_THERMAL_KICK);
+	}
+
+	return 0;
+}
+#endif
+
+
 #if P2DQ_CREATE_STRUCT_OPS
 s32 BPF_STRUCT_OPS_SLEEPABLE(p2dq_init)
 {
diff --git a/scheds/rust/scx_p2dq/src/bpf/types.h b/scheds/rust/scx_p2dq/src/bpf/types.h
index ac0a9abd1..cd123df62 100644
--- a/scheds/rust/scx_p2dq/src/bpf/types.h
+++ b/scheds/rust/scx_p2dq/src/bpf/types.h
@@ -46,7 +46,7 @@ struct cpu_ctx {
 	u64				slice_ns;
 	u32				core_id;
 	u32				dsq_index;
-	u32				perf;
+	u32				perf;  /* Thermal pressure (0-1024, 0=no throttling, 1024=max capacity lost) */
 	u32				flags;  /* Bitmask for interactive, is_big, nice_task */
 	u64				ran_for;
 	u32				node_id;
diff --git a/scheds/rust/scx_p2dq/src/energy.rs b/scheds/rust/scx_p2dq/src/energy.rs
new file mode 100644
index 000000000..8c885e0a0
--- /dev/null
+++ b/scheds/rust/scx_p2dq/src/energy.rs
@@ -0,0 +1,192 @@
+use anyhow::Result;
+use scx_utils::{EnergyModel as KernelEnergyModel, Topology};
+use std::collections::BTreeMap;
+use tracing::info;
+
+/// Energy characteristics for a CPU type
+#[derive(Debug, Clone)]
+pub struct CpuEnergyProfile {
+    pub capacity: u32,         // Relative performance (0-1024)
+    pub base_power_mw: u32,    // Base power consumption (mW)
+    pub dynamic_power_mw: u32, // Dynamic power at 100% util (mW)
+    pub efficiency: f32,       // Performance per watt
+}
+
+impl CpuEnergyProfile {
+    /// Calculate energy cost coefficient for placement decisions
+    /// Returns cost in arbitrary units (higher = less efficient)
+    pub fn energy_cost(&self) -> u32 {
+        // Cost = power / capacity (mW per unit of performance)
+        // Scale to integer for BPF
+        let total_power = self.base_power_mw + self.dynamic_power_mw;
+        ((total_power as f64 / self.capacity as f64) * 1024.0) as u32
+    }
+}
+
+pub struct EnergyModel {
+    /// Map from CPU ID to energy profile
+    cpu_profiles: BTreeMap<usize, CpuEnergyProfile>,
+    /// Utilization threshold for small tasks (prefer little cores)
+    pub small_task_threshold: u32,
+    /// Utilization threshold for large tasks (prefer big cores)
+    pub large_task_threshold: u32,
+}
+
+impl EnergyModel {
+    /// Create new energy model from system topology
+    /// Tries to use kernel energy model first, falls back to heuristics
+    pub fn new(topo: &Topology) -> Result<Self> {
+        let mut cpu_profiles = BTreeMap::new();
+
+        // Try to use kernel energy model if available
+        if let Ok(kernel_em) = KernelEnergyModel::new() {
+            info!("Using kernel energy model from /sys/kernel/debug/energy_model");
+
+            for cpu in topo.all_cpus.values() {
+                let profile = Self::create_profile_from_kernel_em(cpu, &kernel_em);
+                cpu_profiles.insert(cpu.id, profile);
+            }
+        } else {
+            info!("Kernel energy model not available, using frequency-based estimates");
+
+            for cpu in topo.all_cpus.values() {
+                let profile = Self::create_profile_from_heuristics(cpu, topo);
+                cpu_profiles.insert(cpu.id, profile);
+            }
+        }
+
+        // Derive thresholds from actual capacity distribution
+        let (small_thresh, large_thresh) = Self::derive_thresholds(topo);
+
+        Ok(EnergyModel {
+            cpu_profiles,
+            small_task_threshold: small_thresh,
+            large_task_threshold: large_thresh,
+        })
+    }
+
+    /// Derive task size thresholds from CPU capacity distribution
+    fn derive_thresholds(topo: &Topology) -> (u32, u32) {
+        // Find min and max capacities
+        let mut min_cap = u32::MAX;
+        let mut max_cap = 0u32;
+
+        for cpu in topo.all_cpus.values() {
+            let cap = cpu.cpu_capacity as u32;
+            min_cap = min_cap.min(cap);
+            max_cap = max_cap.max(cap);
+        }
+
+        // If homogeneous (all cores similar capacity), use percentage-based thresholds
+        if max_cap - min_cap < 200 {
+            // Less than ~20% variation
+            return (256, 768); // 25% and 75% of 1024
+        }
+
+        // For big.LITTLE or heterogeneous systems:
+        // Small task threshold: 25% of little core capacity
+        // Large task threshold: 75% of big core capacity
+        let small_thresh = (min_cap / 4).max(128);
+        let large_thresh = ((max_cap * 3) / 4).min(896);
+
+        (small_thresh, large_thresh)
+    }
+
+    /// Create energy profile from kernel energy model
+    fn create_profile_from_kernel_em(
+        cpu: &scx_utils::Cpu,
+        kernel_em: &KernelEnergyModel,
+    ) -> CpuEnergyProfile {
+        if let Some(pd) = kernel_em.get_pd_by_cpu_id(cpu.id) {
+            // Use highest performance state (max frequency) for power estimates
+            if let Some((_, ps)) = pd.perf_table.last_key_value() {
+                // Kernel provides power in microwatts, convert to milliwatts
+                let dynamic_power_mw = (ps.power / 1000) as u32;
+
+                // Estimate idle power as ~2-5% of dynamic power
+                let base_power_mw = (dynamic_power_mw / 30).max(10);
+
+                return CpuEnergyProfile {
+                    capacity: cpu.cpu_capacity as u32,
+                    base_power_mw,
+                    dynamic_power_mw,
+                    efficiency: (cpu.cpu_capacity as f32) / (dynamic_power_mw as f32),
+                };
+            }
+        }
+
+        // Fallback if we can't find this CPU in the energy model
+        Self::create_profile_from_heuristics(cpu, &Topology::new().unwrap())
+    }
+
+    /// Create energy profile based on CPU characteristics using heuristics
+    /// Uses frequency and capacity to estimate power consumption
+    fn create_profile_from_heuristics(cpu: &scx_utils::Cpu, topo: &Topology) -> CpuEnergyProfile {
+        // Find max capacity in the system to determine core type
+        let max_capacity = topo
+            .all_cpus
+            .values()
+            .map(|c| c.cpu_capacity)
+            .max()
+            .unwrap_or(1024);
+
+        // Determine if this is a big or little core
+        // Consider it "big" if capacity is >= 78% of max capacity
+        let is_big_core = cpu.cpu_capacity >= (max_capacity * 78) / 100;
+
+        // Power scales roughly with frequency and voltage
+        // P ≈ C * V^2 * f, and V ≈ f for modern CPUs
+        // So P ≈ k * f^3 (simplified)
+
+        let freq_ratio = if cpu.max_freq > 0 {
+            cpu.max_freq as f64 / 2500000.0 // Normalize to ~2.5GHz baseline
+        } else {
+            1.0
+        };
+
+        let capacity_ratio = cpu.cpu_capacity as f64 / 1024.0;
+
+        if is_big_core {
+            // Big core - scale power based on frequency
+            let base_dynamic_power = 3000.0; // 3W baseline for 2.5GHz big core
+            let dynamic_power_mw = (base_dynamic_power * freq_ratio.powf(2.5)) as u32;
+            let base_power_mw = (dynamic_power_mw / 60).max(30); // ~1.6-3% of dynamic
+
+            CpuEnergyProfile {
+                capacity: cpu.cpu_capacity as u32,
+                base_power_mw,
+                dynamic_power_mw,
+                efficiency: (cpu.cpu_capacity as f32) / (dynamic_power_mw as f32),
+            }
+        } else {
+            // Little core - more efficient, lower power
+            let base_dynamic_power = 1200.0; // 1.2W baseline for little core
+            let dynamic_power_mw =
+                (base_dynamic_power * freq_ratio.powf(2.5) * capacity_ratio) as u32;
+            let base_power_mw = (dynamic_power_mw / 50).max(15); // ~2% of dynamic
+
+            CpuEnergyProfile {
+                capacity: cpu.cpu_capacity as u32,
+                base_power_mw,
+                dynamic_power_mw,
+                efficiency: (cpu.cpu_capacity as f32) / (dynamic_power_mw as f32),
+            }
+        }
+    }
+
+    /// Get energy cost for a CPU
+    pub fn cpu_energy_cost(&self, cpu: usize) -> u32 {
+        self.cpu_profiles
+            .get(&cpu)
+            .map(|p| p.energy_cost())
+            .unwrap_or(1024)
+    }
+
+    /// Get CPU capacity
+    pub fn cpu_capacity(&self, cpu: usize) -> u32 {
+        self.cpu_profiles
+            .get(&cpu)
+            .map(|p| p.capacity)
+            .unwrap_or(1024)
+    }
+}
diff --git a/scheds/rust/scx_p2dq/src/lib.rs b/scheds/rust/scx_p2dq/src/lib.rs
index f5335ab98..8f6ae47d6 100644
--- a/scheds/rust/scx_p2dq/src/lib.rs
+++ b/scheds/rust/scx_p2dq/src/lib.rs
@@ -4,6 +4,7 @@
 // GNU General Public License version 2.
 pub mod bpf_intf;
 pub mod bpf_skel;
+pub mod energy;
 pub use bpf_skel::types;
 
 use scx_utils::cli::TopologyArgs;
@@ -339,6 +340,13 @@ pub struct SchedulerOpts {
     #[clap(long, action = clap::ArgAction::SetTrue)]
     pub wakeup_preemption: bool,
 
+    /// Enable Energy-Aware Scheduling (EAS) for big.LITTLE CPUs.
+    /// Places low-utilization tasks on efficient cores and high-utilization
+    /// tasks on performance cores. Requires PELT to be enabled. Improves
+    /// battery life on heterogeneous systems.
+    #[clap(long, default_value_t = false, action = clap::ArgAction::Set)]
+    pub enable_eas: bool,
+
     #[clap(flatten, next_help_heading = "Topology Options")]
     pub topo: TopologyArgs,
 }
@@ -468,6 +476,9 @@ macro_rules! init_open_skel {
             rodata.p2dq_config.pelt_enabled = MaybeUninit::new(opts.enable_pelt);
             rodata.p2dq_config.fork_balance = MaybeUninit::new(opts.fork_balance);
             rodata.p2dq_config.exec_balance = MaybeUninit::new(opts.exec_balance);
+            rodata.p2dq_config.enable_eas = MaybeUninit::new(opts.enable_eas);
+            rodata.p2dq_config.small_task_threshold = 256;  // 25% utilization
+            rodata.p2dq_config.large_task_threshold = 768;  // 75% utilization
 
             // Latency priority config
             rodata.latency_config.latency_priority_enabled = MaybeUninit::new(opts.latency_priority);
@@ -483,7 +494,16 @@ macro_rules! init_open_skel {
 
 #[macro_export]
 macro_rules! init_skel {
-    ($skel: expr, $topo: expr) => {
+    ($skel: expr, $topo: expr) => {{
+        use $crate::energy::EnergyModel;
+
+        // Initialize energy model for EAS
+        let energy_model = EnergyModel::new(&$topo).unwrap_or_else(|e| {
+            eprintln!("Warning: Failed to create energy model: {}", e);
+            eprintln!("Energy-aware scheduling will use fallback values");
+            EnergyModel::new(&$topo).unwrap() // This should not fail
+        });
+
         for cpu in $topo.all_cpus.values() {
             $skel.maps.bss_data.as_mut().unwrap().big_core_ids[cpu.id] =
                 if cpu.core_type == ($crate::CoreType::Big { turbo: true }) {
@@ -494,9 +514,15 @@ macro_rules! init_skel {
             $skel.maps.bss_data.as_mut().unwrap().cpu_core_ids[cpu.id] = cpu.core_id as u32;
             $skel.maps.bss_data.as_mut().unwrap().cpu_llc_ids[cpu.id] = cpu.llc_id as u64;
             $skel.maps.bss_data.as_mut().unwrap().cpu_node_ids[cpu.id] = cpu.node_id as u64;
+
+            // Populate energy model data
+            $skel.maps.bss_data.as_mut().unwrap().cpu_capacity[cpu.id] =
+                energy_model.cpu_capacity(cpu.id) as u16;
+            $skel.maps.bss_data.as_mut().unwrap().cpu_energy_cost[cpu.id] =
+                energy_model.cpu_energy_cost(cpu.id) as u16;
         }
         for llc in $topo.all_llcs.values() {
             $skel.maps.bss_data.as_mut().unwrap().llc_ids[llc.id] = llc.id as u64;
         }
-    };
+    }};
 }
diff --git a/scheds/rust/scx_p2dq/src/main.rs b/scheds/rust/scx_p2dq/src/main.rs
index 8457903d0..7920ff50d 100644
--- a/scheds/rust/scx_p2dq/src/main.rs
+++ b/scheds/rust/scx_p2dq/src/main.rs
@@ -46,6 +46,9 @@ use bpf_intf::stat_idx_P2DQ_STAT_DIRECT;
 use bpf_intf::stat_idx_P2DQ_STAT_DISPATCH_PICK2;
 use bpf_intf::stat_idx_P2DQ_STAT_DSQ_CHANGE;
 use bpf_intf::stat_idx_P2DQ_STAT_DSQ_SAME;
+use bpf_intf::stat_idx_P2DQ_STAT_EAS_BIG_SELECT;
+use bpf_intf::stat_idx_P2DQ_STAT_EAS_FALLBACK;
+use bpf_intf::stat_idx_P2DQ_STAT_EAS_LITTLE_SELECT;
 use bpf_intf::stat_idx_P2DQ_STAT_ENQ_CPU;
 use bpf_intf::stat_idx_P2DQ_STAT_ENQ_INTR;
 use bpf_intf::stat_idx_P2DQ_STAT_ENQ_LLC;
@@ -59,6 +62,8 @@ use bpf_intf::stat_idx_P2DQ_STAT_KEEP;
 use bpf_intf::stat_idx_P2DQ_STAT_LLC_MIGRATION;
 use bpf_intf::stat_idx_P2DQ_STAT_NODE_MIGRATION;
 use bpf_intf::stat_idx_P2DQ_STAT_SELECT_PICK2;
+use bpf_intf::stat_idx_P2DQ_STAT_THERMAL_AVOID;
+use bpf_intf::stat_idx_P2DQ_STAT_THERMAL_KICK;
 use bpf_intf::stat_idx_P2DQ_STAT_WAKE_LLC;
 use bpf_intf::stat_idx_P2DQ_STAT_WAKE_MIG;
 use bpf_intf::stat_idx_P2DQ_STAT_WAKE_PREV;
@@ -153,6 +158,12 @@ impl<'a> Scheduler<'a> {
             https://github.com/sched-ext/scx/issues/new?labels=scx_p2dq&title=scx_p2dq:%20New%20Issue&assignees=hodgesds&body=Kernel%20version:%20(fill%20me%20out)%0ADistribution:%20(fill%20me%20out)%0AHardware:%20(fill%20me%20out)%0A%0AIssue:%20(fill%20me%20out)"
         )?;
 
+        // Disable autoload for thermal pressure tracepoint by default
+        // Will be conditionally enabled if kernel supports it
+        // Note: This tracepoint only exists on ARM/ARM64 architectures
+        #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
+        open_skel.progs.on_thermal_pressure.set_autoload(false);
+
         // Apply hardware-specific optimizations before macro
         let hw_profile = scx_p2dq::HardwareProfile::detect();
         let mut opts_optimized = opts.clone();
@@ -168,6 +179,41 @@ impl<'a> Scheduler<'a> {
             &hw_profile
         )?;
 
+        // Thermal pressure tracking (ARM/ARM64 only)
+        #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
+        {
+            let thermal_enabled = std::path::Path::new(
+                "/sys/kernel/tracing/events/thermal_pressure/hw_pressure_update",
+            )
+            .exists()
+                || std::path::Path::new(
+                    "/sys/kernel/debug/tracing/events/thermal_pressure/hw_pressure_update",
+                )
+                .exists();
+
+            if thermal_enabled {
+                debug!(
+                    "Kernel supports thermal pressure tracking, enabling hw_pressure_update tracepoint"
+                );
+                open_skel.progs.on_thermal_pressure.set_autoload(true);
+                stats::set_thermal_tracking_enabled(true);
+
+                open_skel
+                    .maps
+                    .rodata_data
+                    .as_mut()
+                    .unwrap()
+                    .p2dq_config
+                    .thermal_enabled = std::mem::MaybeUninit::new(true);
+            } else {
+                debug!("Kernel does not support thermal pressure tracking (CONFIG_SCHED_HW_PRESSURE not enabled)");
+            }
+        }
+
+        if opts_optimized.enable_eas {
+            stats::set_eas_enabled(true);
+        }
+
         if opts.queued_wakeup {
             open_skel.struct_ops.p2dq_mut().flags |= *compat::SCX_OPS_ALLOW_QUEUED_WAKEUP;
         }
@@ -233,6 +279,11 @@ impl<'a> Scheduler<'a> {
             exec_balance: stats[stat_idx_P2DQ_STAT_EXEC_BALANCE as usize],
             fork_same_llc: stats[stat_idx_P2DQ_STAT_FORK_SAME_LLC as usize],
             exec_same_llc: stats[stat_idx_P2DQ_STAT_EXEC_SAME_LLC as usize],
+            thermal_kick: stats[stat_idx_P2DQ_STAT_THERMAL_KICK as usize],
+            thermal_avoid: stats[stat_idx_P2DQ_STAT_THERMAL_AVOID as usize],
+            eas_little_select: stats[stat_idx_P2DQ_STAT_EAS_LITTLE_SELECT as usize],
+            eas_big_select: stats[stat_idx_P2DQ_STAT_EAS_BIG_SELECT as usize],
+            eas_fallback: stats[stat_idx_P2DQ_STAT_EAS_FALLBACK as usize],
         }
     }
 
diff --git a/scheds/rust/scx_p2dq/src/stats.rs b/scheds/rust/scx_p2dq/src/stats.rs
index 7c1e4dd88..aa44dfafb 100644
--- a/scheds/rust/scx_p2dq/src/stats.rs
+++ b/scheds/rust/scx_p2dq/src/stats.rs
@@ -11,6 +11,28 @@ use scx_stats_derive::Stats;
 use serde::Deserialize;
 use serde::Serialize;
 
+// Global flag to track if thermal pressure tracking is enabled
+static THERMAL_TRACKING_ENABLED: AtomicBool = AtomicBool::new(false);
+
+// Global flag to track if energy-aware scheduling is enabled
+static EAS_ENABLED: AtomicBool = AtomicBool::new(false);
+
+pub fn set_thermal_tracking_enabled(enabled: bool) {
+    THERMAL_TRACKING_ENABLED.store(enabled, Ordering::Relaxed);
+}
+
+pub fn is_thermal_tracking_enabled() -> bool {
+    THERMAL_TRACKING_ENABLED.load(Ordering::Relaxed)
+}
+
+pub fn set_eas_enabled(enabled: bool) {
+    EAS_ENABLED.store(enabled, Ordering::Relaxed);
+}
+
+pub fn is_eas_enabled() -> bool {
+    EAS_ENABLED.load(Ordering::Relaxed)
+}
+
 #[stat_doc]
 #[derive(Clone, Debug, Default, Serialize, Deserialize, Stats)]
 #[stat(top)]
@@ -59,6 +81,16 @@ pub struct Metrics {
     pub fork_same_llc: u64,
     #[stat(desc = "Number of times exec stayed on same LLC")]
     pub exec_same_llc: u64,
+    #[stat(desc = "Number of CPU kicks due to thermal pressure")]
+    pub thermal_kick: u64,
+    #[stat(desc = "Number of times throttled CPUs were avoided")]
+    pub thermal_avoid: u64,
+    #[stat(desc = "Number of times EAS placed task on little core")]
+    pub eas_little_select: u64,
+    #[stat(desc = "Number of times EAS placed task on big core")]
+    pub eas_big_select: u64,
+    #[stat(desc = "Number of times EAS fell back to non-preferred core type")]
+    pub eas_fallback: u64,
 }
 
 impl Metrics {
@@ -78,8 +110,9 @@ impl Metrics {
             self.enq_intr,
             self.enq_mig,
         )?;
-        writeln!(
-            w,
+
+        // Build the stats line conditionally based on thermal tracking availability
+        let mut stats_line = format!(
             "\twake prev/llc/mig {}/{}/{}\n\tpick2 select/dispatch {}/{}\n\tmigrations llc/node: {}/{}\n\tfork balance/same {}/{}\n\texec balance/same {}/{}",
             self.wake_prev,
             self.wake_llc,
@@ -92,7 +125,25 @@ impl Metrics {
             self.fork_same_llc,
             self.exec_balance,
             self.exec_same_llc,
-        )?;
+        );
+
+        // Only show thermal stats if thermal tracking is enabled
+        if is_thermal_tracking_enabled() {
+            stats_line.push_str(&format!(
+                "\n\tthermal kick/avoid {}/{}",
+                self.thermal_kick, self.thermal_avoid,
+            ));
+        }
+
+        // Only show EAS stats if energy-aware scheduling is enabled
+        if is_eas_enabled() {
+            stats_line.push_str(&format!(
+                "\n\tEAS little/big/fallback {}/{}/{}",
+                self.eas_little_select, self.eas_big_select, self.eas_fallback,
+            ));
+        }
+
+        writeln!(w, "{}", stats_line)?;
         Ok(())
     }
 
@@ -120,6 +171,11 @@ impl Metrics {
             exec_balance: self.exec_balance - rhs.exec_balance,
             fork_same_llc: self.fork_same_llc - rhs.fork_same_llc,
             exec_same_llc: self.exec_same_llc - rhs.exec_same_llc,
+            thermal_kick: self.thermal_kick - rhs.thermal_kick,
+            thermal_avoid: self.thermal_avoid - rhs.thermal_avoid,
+            eas_little_select: self.eas_little_select - rhs.eas_little_select,
+            eas_big_select: self.eas_big_select - rhs.eas_big_select,
+            eas_fallback: self.eas_fallback - rhs.eas_fallback,
         }
     }
 }