From a110a41ddca04592c8171372eaa3a9001db1db5d Mon Sep 17 00:00:00 2001 From: Lizhi Hou <36547078+houlz0507@users.noreply.github.com> Date: Mon, 2 Dec 2024 11:05:23 -0800 Subject: [PATCH] refactor power management (#313) Signed-off-by: Lizhi Hou --- src/driver/amdxdna/aie2_ctx.c | 6 + src/driver/amdxdna/aie2_debugfs.c | 15 +- src/driver/amdxdna/aie2_message.c | 9 +- src/driver/amdxdna/aie2_pci.c | 95 +++++++----- src/driver/amdxdna/aie2_pci.h | 107 +++++++------ src/driver/amdxdna/aie2_pm.c | 151 ++++++++----------- src/driver/amdxdna/aie2_smu.c | 242 +++++------------------------- src/driver/amdxdna/aie2_solver.c | 62 ++++---- src/driver/amdxdna/aie2_solver.h | 9 +- src/driver/amdxdna/npu1_regs.c | 46 ++---- src/driver/amdxdna/npu4_family.h | 41 +---- src/driver/amdxdna/npu4_regs.c | 27 ++-- 12 files changed, 310 insertions(+), 500 deletions(-) diff --git a/src/driver/amdxdna/aie2_ctx.c b/src/driver/amdxdna/aie2_ctx.c index ce489679..fcb9ef2b 100644 --- a/src/driver/amdxdna/aie2_ctx.c +++ b/src/driver/amdxdna/aie2_ctx.c @@ -610,6 +610,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) struct drm_gpu_scheduler *sched; struct amdxdna_hwctx_priv *priv; struct amdxdna_gem_obj *heap; + struct amdxdna_dev_hdl *ndev; unsigned int wq_flags; int i, ret; @@ -722,6 +723,8 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) goto release_resource; } hwctx->status = HWCTX_STATE_INIT; + ndev = xdna->dev_handle; + ndev->hwctx_num++; XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name); @@ -753,10 +756,13 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx) { + struct amdxdna_dev_hdl *ndev; struct amdxdna_dev *xdna; int idx; xdna = hwctx->client->xdna; + ndev = xdna->dev_handle; + ndev->hwctx_num--; drm_sched_wqueue_stop(&hwctx->priv->sched); /* Now, scheduler will not send command to device. */ diff --git a/src/driver/amdxdna/aie2_debugfs.c b/src/driver/amdxdna/aie2_debugfs.c index 5c7d787d..73ae8f86 100644 --- a/src/driver/amdxdna/aie2_debugfs.c +++ b/src/driver/amdxdna/aie2_debugfs.c @@ -227,7 +227,10 @@ static ssize_t aie2_dpm_level_set(struct file *file, const char __user *ptr, } mutex_lock(&ndev->xdna->dev_lock); - ret = aie2_smu_set_fixed_dpm_level(ndev, val); + ndev->dft_dpm_level = val; + if (ndev->pw_mode != POWER_MODE_DEFAULT) + val = ndev->dpm_level; + ret = ndev->priv->hw_ops.set_dpm(ndev, val); mutex_unlock(&ndev->xdna->dev_lock); if (ret) { XDNA_ERR(ndev->xdna, "Setting dpm_level:%d failed, ret: %d", val, ret); @@ -239,15 +242,13 @@ static ssize_t aie2_dpm_level_set(struct file *file, const char __user *ptr, static int aie2_dpm_level_get(struct seq_file *m, void *unused) { struct amdxdna_dev_hdl *ndev = m->private; - const struct dpm_clk *dpm_table; - u32 num_dpm_levels; + const struct dpm_clk_freq *dpm_table; int dpm_level; int i; - dpm_table = SMU_DPM_TABLE_ENTRY(ndev, 0); - dpm_level = aie2_smu_get_dpm_level(ndev); - num_dpm_levels = SMU_DPM_MAX(ndev); - for (i = 0; i <= num_dpm_levels; i++) { + dpm_table = ndev->priv->dpm_clk_tbl; + dpm_level = ndev->dpm_level; + for (i = 0; dpm_table[i].hclk; i++) { u32 npuclk = dpm_table[i].npuclk; u32 hclk = dpm_table[i].hclk; diff --git a/src/driver/amdxdna/aie2_message.c b/src/driver/amdxdna/aie2_message.c index a41a1f87..4100bd6a 100644 --- a/src/driver/amdxdna/aie2_message.c +++ b/src/driver/amdxdna/aie2_message.c @@ -65,11 +65,18 @@ int aie2_resume_fw(struct amdxdna_dev_hdl *ndev) int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value) { DECLARE_AIE2_MSG(set_runtime_cfg, MSG_OP_SET_RUNTIME_CONFIG); + int ret; req.type = type; req.value = value; - return aie2_send_mgmt_msg_wait(ndev, &msg); + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) { + XDNA_ERR(ndev->xdna, "Failed to set runtime config, ret %d", ret); + return ret; + } + + return 0; } int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value) diff --git a/src/driver/amdxdna/aie2_pci.c b/src/driver/amdxdna/aie2_pci.c index 9acc13ba..f7e6f7b7 100644 --- a/src/driver/amdxdna/aie2_pci.c +++ b/src/driver/amdxdna/aie2_pci.c @@ -177,37 +177,32 @@ static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev) return ret; } -static int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev) +int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev, + enum rt_config_category category, u32 *val) { - int i; + const struct rt_config *cfg; + u32 value; + int ret; - for (i = 0; i < ndev->priv->num_rt_cfg; i++) { - const struct rt_config *cfg = &ndev->priv->rt_config[i]; - u64 value; - int ret; + for (cfg = ndev->priv->rt_config; cfg->type; cfg++) { + if (cfg->category != category) + continue; + value = val ? *val : cfg->value; #ifdef AMDXDNA_DEVEL if (priv_load && cfg->type == ndev->priv->priv_load_cfg.type) { cfg = &ndev->priv->priv_load_cfg; + value = cfg->value; XDNA_INFO(ndev->xdna, "Set runtime type %d value %d", cfg->type, cfg->value); } #endif - ret = aie2_set_runtime_cfg(ndev, cfg->type, cfg->value); + ret = aie2_set_runtime_cfg(ndev, cfg->type, value); if (ret) { XDNA_ERR(ndev->xdna, "Set runtime type %d value %d failed", - cfg->type, cfg->value); - return ret; - } - - ret = aie2_get_runtime_cfg(ndev, cfg->type, &value); - if (ret) { - XDNA_ERR(ndev->xdna, "Get runtime cfg failed"); + cfg->type, value); return ret; } - - if (value != cfg->value) - return -EINVAL; } return 0; @@ -244,7 +239,7 @@ static int aie2_mgmt_fw_init(struct amdxdna_dev_hdl *ndev) } } - ret = aie2_runtime_cfg(ndev); + ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_INIT, NULL); if (ret) { XDNA_ERR(ndev->xdna, "Runtime config failed"); return ret; @@ -306,10 +301,25 @@ static void aie2_mgmt_fw_fini(struct amdxdna_dev_hdl *ndev) XDNA_DBG(ndev->xdna, "npu firmware suspended"); } +static int aie2_xrs_set_dft_dpm_level(struct drm_device *ddev, u32 dpm_level) +{ + struct amdxdna_dev *xdna = to_xdna_dev(ddev); + struct amdxdna_dev_hdl *ndev; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + + ndev = xdna->dev_handle; + ndev->dft_dpm_level = dpm_level; + if (ndev->pw_mode != POWER_MODE_DEFAULT || ndev->dpm_level == dpm_level) + return 0; + + return ndev->priv->hw_ops.set_dpm(ndev, dpm_level); +} + static struct xrs_action_ops aie2_xrs_actions = { .load_hwctx = aie2_xrs_load_hwctx, .unload_hwctx = aie2_xrs_unload_hwctx, - .set_dpm_level = aie2_smu_set_dft_dpm_level, + .set_dft_dpm_level = aie2_xrs_set_dft_dpm_level, }; static void aie2_hw_stop(struct amdxdna_dev *xdna) @@ -317,7 +327,11 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna) struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev); struct amdxdna_dev_hdl *ndev = xdna->dev_handle; - aie2_pm_stop(ndev); + if (ndev->dev_status <= AIE2_DEV_INIT) { + XDNA_ERR(xdna, "device is already stopped"); + return; + } + aie2_mgmt_fw_fini(ndev); xdna_mailbox_stop_channel(ndev->mgmt_chann); xdna_mailbox_destroy_channel(ndev->mgmt_chann); @@ -330,6 +344,8 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna) aie2_smu_stop(ndev); pci_clear_master(pdev); pci_disable_device(pdev); + + ndev->dev_status = AIE2_DEV_INIT; } static int aie2_hw_start(struct amdxdna_dev *xdna) @@ -340,6 +356,11 @@ static int aie2_hw_start(struct amdxdna_dev *xdna) u32 xdna_mailbox_intr_reg; int mgmt_mb_irq, ret; + if (ndev->dev_status >= AIE2_DEV_START) { + XDNA_INFO(xdna, "device is already started"); + return 0; + } + ret = pci_enable_device(pdev); if (ret) { XDNA_ERR(xdna, "failed to enable device, ret %d", ret); @@ -396,18 +417,20 @@ static int aie2_hw_start(struct amdxdna_dev *xdna) goto destroy_mbox; } - ret = aie2_mgmt_fw_init(ndev); + ret = aie2_pm_init(ndev); if (ret) { - XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret); + XDNA_ERR(xdna, "failed to init pm, ret %d", ret); goto destroy_mgmt_chann; } - ret = aie2_pm_start(ndev); + ret = aie2_mgmt_fw_init(ndev); if (ret) { - XDNA_ERR(xdna, "failed to start power manager, ret %d", ret); + XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret); goto destroy_mgmt_chann; } + ndev->dev_status = AIE2_DEV_START; + return 0; destroy_mgmt_chann: @@ -535,10 +558,6 @@ static int aie2_init(struct amdxdna_dev *xdna) } xdna->dev_handle = ndev; - aie2_smu_setup(ndev); - - ndev->pw_mode = POWER_MODE_DEFAULT; - ndev->clk_gate_enabled = true; ret = aie2_hw_start(xdna); if (ret) { XDNA_ERR(xdna, "start npu failed, ret %d", ret); @@ -552,11 +571,11 @@ static int aie2_init(struct amdxdna_dev *xdna) } ndev->total_col = min(aie2_max_col, ndev->metadata.cols); - xrs_cfg.max_dpm_level = SMU_DPM_MAX(ndev); - xrs_cfg.clk_list.num_levels = ndev->priv->smu_npu_dpm_levels; - xrs_cfg.clk_list.cu_clk_list = ndev->priv->smu_npu_dpm_clk_table; + xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1; + for (i = 0; i < xrs_cfg.clk_list.num_levels; i++) + xrs_cfg.clk_list.cu_clk_list[i] = ndev->priv->dpm_clk_tbl[i].hclk; xrs_cfg.sys_eff_factor = 1; - xrs_cfg.dev = xdna->ddev.dev; + xrs_cfg.ddev = &xdna->ddev; xrs_cfg.actions = &aie2_xrs_actions; xrs_cfg.total_col = ndev->total_col; @@ -782,13 +801,11 @@ static int aie2_get_clock_metadata(struct amdxdna_client *client, if (!clock) return -ENOMEM; - memcpy(clock->mp_npu_clock.name, aie2_smu_get_mpnpu_clock_name(ndev), - sizeof(clock->mp_npu_clock.name)); - clock->mp_npu_clock.freq_mhz = aie2_smu_get_mpnpu_clock_freq(ndev); - - memcpy(clock->h_clock.name, aie2_smu_get_hclock_name(ndev), - sizeof(clock->h_clock.name)); - clock->h_clock.freq_mhz = aie2_smu_get_hclock_freq(ndev); + snprintf(clock->mp_npu_clock.name, sizeof(clock->mp_npu_clock.name), + "MP-NPU Clock"); + clock->mp_npu_clock.freq_mhz = ndev->npuclk_freq; + snprintf(clock->h_clock.name, sizeof(clock->h_clock.name), "H Clock"); + clock->h_clock.freq_mhz = ndev->hclk_freq; if (copy_to_user(u64_to_user_ptr(args->buffer), clock, sizeof(*clock))) ret = -EFAULT; diff --git a/src/driver/amdxdna/aie2_pci.h b/src/driver/amdxdna/aie2_pci.h index 6b6bea33..d27a864c 100644 --- a/src/driver/amdxdna/aie2_pci.h +++ b/src/driver/amdxdna/aie2_pci.h @@ -147,23 +147,20 @@ struct aie_metadata { struct aie_tile_metadata shim; }; -struct clock { - char name[16]; - u32 max_freq_mhz; - u32 freq_mhz; +enum rt_config_category { + AIE2_RT_CFG_INIT, + AIE2_RT_CFG_CLK_GATING, }; -struct smu { - const struct dpm_clk *dpm_table; - u32 num_dpm_levels; - struct clock mp_npu_clock; - struct clock h_clock; - u32 curr_dpm_level; - u32 dft_dpm_level; - u32 fixed_dpm_level; -#define SMU_POWER_OFF 0 -#define SMU_POWER_ON 1 - u32 power_state; +struct rt_config { + u32 type; + u32 value; + u32 category; +}; + +struct dpm_clk_freq { + u32 npuclk; + u32 hclk; }; #ifdef AMDXDNA_DEVEL @@ -202,6 +199,17 @@ struct amdxdna_hwctx_priv { struct drm_syncobj *syncobj; }; +enum aie2_dev_status { + AIE2_DEV_UNINIT, + AIE2_DEV_INIT, + AIE2_DEV_START, +}; + +enum aie2_power_state { + SMU_POWER_OFF, + SMU_POWER_ON, +}; + struct async_events; struct amdxdna_dev_hdl { @@ -219,17 +227,26 @@ struct amdxdna_dev_hdl { u32 mgmt_prot_minor; u32 total_col; - u32 smu_curr_dpm_level; struct aie_version version; struct aie_metadata metadata; - struct smu smu; + + /*power management and clock */ enum amdxdna_power_mode_type pw_mode; - bool clk_gate_enabled; + enum aie2_power_state power_state; + u32 dpm_level; + u32 dft_dpm_level; + u32 max_dpm_level; + u32 clk_gating; + u32 npuclk_freq; + u32 hclk_freq; /* Mailbox and the management channel */ struct mailbox *mbox; struct mailbox_channel *mgmt_chann; struct async_events *async_events; + + u32 dev_status; + u32 hwctx_num; }; #define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \ @@ -240,16 +257,8 @@ struct aie2_bar_off_pair { u32 offset; }; -struct rt_config { - u32 type; - u32 value; -}; - -struct rt_config_clk_gating { - const u32 *types; - u32 num_types; - u32 value_enable; - u32 value_disable; +struct aie2_hw_ops { + int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level); }; struct amdxdna_dev_priv { @@ -257,7 +266,8 @@ struct amdxdna_dev_priv { u64 protocol_major; u64 protocol_minor; const struct rt_config *rt_config; - u32 num_rt_cfg; + const struct dpm_clk_freq *dpm_clk_tbl; + #define COL_ALIGN_NONE 0 #define COL_ALIGN_NATURE 1 u32 col_align; @@ -268,15 +278,23 @@ struct amdxdna_dev_priv { struct aie2_bar_off_pair sram_offs[SRAM_MAX_INDEX]; struct aie2_bar_off_pair psp_regs_off[PSP_MAX_REGS]; struct aie2_bar_off_pair smu_regs_off[SMU_MAX_REGS]; - struct rt_config_clk_gating clk_gating; - u32 smu_rev; - const struct dpm_clk *smu_npu_dpm_clk_table; - u32 smu_npu_dpm_levels; + struct aie2_hw_ops hw_ops; #ifdef AMDXDNA_DEVEL struct rt_config priv_load_cfg; #endif }; +extern const struct amdxdna_dev_ops aie2_ops; + +int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev, + enum rt_config_category category, u32 *val); + +/* aie2 npu hw config */ +extern const struct dpm_clk_freq npu1_dpm_clk_table[]; +extern const struct dpm_clk_freq npu4_dpm_clk_table[]; +extern const struct rt_config npu1_default_rt_cfg[]; +extern const struct rt_config npu4_default_rt_cfg[]; + /* aie2_pci.c */ #define AIE2_BIT_BYPASS_POWER_SWITCH 0 /* NOSYS */ #define AIE2_BIT_BYPASS_SET_FREQ 1 @@ -286,19 +304,24 @@ extern const struct amdxdna_dev_ops aie2_ops; int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 fw_minor); /* aie2_smu.c */ -void aie2_smu_setup(struct amdxdna_dev_hdl *ndev); int aie2_smu_start(struct amdxdna_dev_hdl *ndev); void aie2_smu_stop(struct amdxdna_dev_hdl *ndev); -char *aie2_smu_get_mpnpu_clock_name(struct amdxdna_dev_hdl *ndev); -char *aie2_smu_get_hclock_name(struct amdxdna_dev_hdl *ndev); +int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level); +int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level); int aie2_smu_get_mpnpu_clock_freq(struct amdxdna_dev_hdl *ndev); int aie2_smu_get_hclock_freq(struct amdxdna_dev_hdl *ndev); int aie2_smu_set_power_on(struct amdxdna_dev_hdl *ndev); int aie2_smu_set_power_off(struct amdxdna_dev_hdl *ndev); int aie2_smu_get_power_state(struct amdxdna_dev_hdl *ndev); -u32 aie2_smu_get_dpm_level(struct amdxdna_dev_hdl *ndev); -int aie2_smu_set_dft_dpm_level(struct amdxdna_dev_hdl *ndev, u32 dpm_level); -int aie2_smu_set_fixed_dpm_level(struct amdxdna_dev_hdl *ndev, u32 dpm_level); + +/* aie2_pm.c */ +int aie2_pm_init(struct amdxdna_dev_hdl *ndev); +int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target); + +static inline bool aie2_pm_is_turbo(struct amdxdna_dev_hdl *ndev) +{ + return ndev->pw_mode == POWER_MODE_TURBO; +} /* aie2_psp.c */ struct psp_device *aie2m_psp_create(struct device *dev, struct psp_config *conf); @@ -371,10 +394,4 @@ void aie2_restart_ctx(struct amdxdna_client *client); int aie2_xrs_load_hwctx(struct amdxdna_hwctx *hwctx, struct xrs_action_load *action); int aie2_xrs_unload_hwctx(struct amdxdna_hwctx *hwctx); -/* aie2_pm.c */ -int aie2_pm_start(struct amdxdna_dev_hdl *ndev); -void aie2_pm_stop(struct amdxdna_dev_hdl *ndev); -bool aie2_pm_is_turbo(struct amdxdna_dev_hdl *ndev); -int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target); - #endif /* _AIE2_PCI_H_ */ diff --git a/src/driver/amdxdna/aie2_pm.c b/src/driver/amdxdna/aie2_pm.c index cde009eb..aa7cac3a 100644 --- a/src/driver/amdxdna/aie2_pm.c +++ b/src/driver/amdxdna/aie2_pm.c @@ -5,123 +5,98 @@ #include "aie2_pci.h" -static int aie2_pm_set_clock_gating(struct amdxdna_dev_hdl *ndev, bool enable) +#define AIE2_CLK_GATING_ENABLE 1 +#define AIE2_CLK_GATING_DISABLE 0 + +static int aie2_pm_set_clk_gating(struct amdxdna_dev_hdl *ndev, u32 val) { - const struct rt_config_clk_gating *config; - u32 value; int ret; - int i; - - if (enable == ndev->clk_gate_enabled) - return 0; - - config = &ndev->priv->clk_gating; - if (enable) - value = config->value_enable; - else - value = config->value_disable; - - XDNA_DBG(ndev->xdna, "%s clock gating, %d type(s)", - (enable) ? "Enable" : "Disable", config->num_types); - - for (i = 0; i < config->num_types; i++) { - ret = aie2_set_runtime_cfg(ndev, config->types[i], value); - if (ret) { - XDNA_ERR(ndev->xdna, "Config type %d, value %d", - config->types[i], value); - break; - } - } - if (!ret) - ndev->clk_gate_enabled = enable; + ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, &val); + if (ret) + return ret; - return ret; + ndev->clk_gating = val; + return 0; } -bool aie2_pm_is_turbo(struct amdxdna_dev_hdl *ndev) +int aie2_pm_init(struct amdxdna_dev_hdl *ndev) { - return ndev->pw_mode == POWER_MODE_TURBO; -} + int ret; -static int aie2_pm_check_turbo(struct amdxdna_dev_hdl *ndev, - enum amdxdna_power_mode_type prev, - enum amdxdna_power_mode_type next) -{ - struct amdxdna_dev *xdna = ndev->xdna; - struct amdxdna_client *client; + if (ndev->dev_status != AIE2_DEV_UNINIT) { + /* Resume device */ + ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->dpm_level); + if (ret) + return ret; - if (prev != POWER_MODE_TURBO && next != POWER_MODE_TURBO) - return 0; + ret = aie2_pm_set_clk_gating(ndev, ndev->clk_gating); + if (ret) + return ret; - drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); - list_for_each_entry(client, &xdna->client_list, node) { - bool empty; - - mutex_lock(&client->hwctx_lock); - empty = amdxdna_no_hwctx(client); - mutex_unlock(&client->hwctx_lock); - if (!empty) - return -EBUSY; + return 0; } + + while (ndev->priv->dpm_clk_tbl[ndev->max_dpm_level].hclk) + ndev->max_dpm_level++; + ndev->max_dpm_level--; + + ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->max_dpm_level); + if (ret) + return ret; + + ret = aie2_pm_set_clk_gating(ndev, AIE2_CLK_GATING_ENABLE); + if (ret) + return ret; + + ndev->pw_mode = POWER_MODE_DEFAULT; + ndev->dft_dpm_level = ndev->max_dpm_level; + return 0; } int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target) { struct amdxdna_dev *xdna = ndev->xdna; - int ret = 0; + u32 clk_gating, dpm_level; + int ret; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); if (ndev->pw_mode == target) return 0; - XDNA_DBG(xdna, "Changing power mode from %d to %d", ndev->pw_mode, target); - switch (target) { - case POWER_MODE_TURBO: // Turbo mode - ret = aie2_pm_check_turbo(ndev, ndev->pw_mode, target); - if (ret) - break; - ret = aie2_pm_set_clock_gating(ndev, false); - if (ret) - break; - ret = aie2_smu_set_fixed_dpm_level(ndev, SMU_DPM_MAX(ndev)); + case POWER_MODE_TURBO: + if (ndev->hwctx_num) { + XDNA_ERR(xdna, "Can not set turbo when there is active hwctx"); + return -EINVAL; + } + + clk_gating = AIE2_CLK_GATING_DISABLE; + dpm_level = ndev->max_dpm_level; break; - case POWER_MODE_HIGH: // Performance mode - ret = aie2_pm_set_clock_gating(ndev, true); - if (ret) - break; - ret = aie2_smu_set_fixed_dpm_level(ndev, SMU_DPM_MAX(ndev)); + case POWER_MODE_HIGH: + clk_gating = AIE2_CLK_GATING_ENABLE; + dpm_level = ndev->max_dpm_level; break; - case POWER_MODE_DEFAULT: // Default mode - ret = aie2_pm_set_clock_gating(ndev, true); - if (ret) - break; - // Revert back to default level, let resolver decide level - ret = aie2_smu_set_fixed_dpm_level(ndev, SMU_DPM_INVALID); + case POWER_MODE_DEFAULT: + clk_gating = AIE2_CLK_GATING_ENABLE; + dpm_level = ndev->dft_dpm_level; break; default: - // POWER_MODE_LOW and POWER_MODE_MEDIUM - ret = -EOPNOTSUPP; - break; + return -EOPNOTSUPP; } - if (ret) { - /* Either nothing was done or messed up, can't recover. */ - XDNA_ERR(xdna, "Failed to set power mode: %d, ret %d", target, ret); + + ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level); + if (ret) return ret; - } - ndev->pw_mode = target; - XDNA_INFO(xdna, "Power mode changed to %d", ndev->pw_mode); - return 0; -} + ret = aie2_pm_set_clk_gating(ndev, clk_gating); + if (ret) + return ret; -int aie2_pm_start(struct amdxdna_dev_hdl *ndev) -{ - return aie2_pm_set_mode(ndev, ndev->pw_mode); -} + ndev->pw_mode = target; -void aie2_pm_stop(struct amdxdna_dev_hdl *ndev) -{ - aie2_pm_set_mode(ndev, POWER_MODE_DEFAULT); + return 0; } diff --git a/src/driver/amdxdna/aie2_smu.c b/src/driver/amdxdna/aie2_smu.c index 5c15d5e2..757b9e3a 100644 --- a/src/driver/amdxdna/aie2_smu.c +++ b/src/driver/amdxdna/aie2_smu.c @@ -17,12 +17,6 @@ #define AIE2_SMU_SET_SOFT_DPMLEVEL 0x7 #define AIE2_SMU_SET_HARD_DPMLEVEL 0x8 -/* This is a hack for NPU1 device */ -const struct dpm_clk npu1_hack_dpm_clk_table[] = { - {400, 800}, - {600, 1024}, -}; - static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd, u32 reg_arg, u32 *out) { @@ -55,171 +49,81 @@ static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd, return 0; } -/* - * Depending on the current running frequency and debugfs setting, - * aie2_smu_set_clock_freq() might or might not update freqency. - */ -static int aie2_smu_set_clock_freq(struct amdxdna_dev_hdl *ndev, - struct clock *clock, u32 freq_mhz) +int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) { - u32 smu_cmd; + u32 freq; int ret; - if (!freq_mhz || freq_mhz > clock->max_freq_mhz) { - XDNA_ERR(ndev->xdna, "Invalid %s freq %d", clock->name, freq_mhz); - return -EINVAL; + ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ, + ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq); + if (ret) { + XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n", + ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret); } + ndev->npuclk_freq = freq; - if (clock == &ndev->smu.mp_npu_clock) - smu_cmd = AIE2_SMU_SET_MPNPUCLK_FREQ; - else if (clock == &ndev->smu.h_clock) - smu_cmd = AIE2_SMU_SET_HCLK_FREQ; - else - return -EINVAL; /* Buggy */ - - if (freq_mhz == clock->freq_mhz) - return 0; + ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ, + ndev->priv->dpm_clk_tbl[dpm_level].hclk, &freq); + if (ret) { + XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n", + ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret); + } + ndev->hclk_freq = freq; + ndev->dpm_level = dpm_level; - ret = aie2_smu_exec(ndev, smu_cmd, freq_mhz, NULL); - if (ret) - return ret; + XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n", + ndev->npuclk_freq, ndev->hclk_freq); - clock->freq_mhz = freq_mhz; - XDNA_DBG(ndev->xdna, "Set %s = %d mhz", clock->name, clock->freq_mhz); return 0; } -int aie2_smu_get_mpnpu_clock_freq(struct amdxdna_dev_hdl *ndev) -{ - return ndev->smu.mp_npu_clock.freq_mhz; -} - -char *aie2_smu_get_mpnpu_clock_name(struct amdxdna_dev_hdl *ndev) -{ - return ndev->smu.mp_npu_clock.name; -} - -int aie2_smu_get_hclock_freq(struct amdxdna_dev_hdl *ndev) -{ - return ndev->smu.h_clock.freq_mhz; -} - -char *aie2_smu_get_hclock_name(struct amdxdna_dev_hdl *ndev) +int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) { - return ndev->smu.h_clock.name; -} - -static int aie2_smu_set_dpm_level_v0(struct amdxdna_dev_hdl *ndev, u32 dpm_level) -{ - const struct dpm_clk *dpm_entry = SMU_DPM_TABLE_ENTRY(ndev, dpm_level); - struct clock *clk; int ret; - clk = &ndev->smu.mp_npu_clock; - ret = aie2_smu_set_clock_freq(ndev, clk, dpm_entry->npuclk); + ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, NULL); if (ret) { - XDNA_ERR(ndev->xdna, "setting npu clk failed for dpm level %d, ret: %d", + XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ", dpm_level, ret); return ret; } - clk = &ndev->smu.h_clock; - ret = aie2_smu_set_clock_freq(ndev, clk, dpm_entry->hclk); + ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, NULL); if (ret) { - XDNA_ERR(ndev->xdna, "setting hclk failed for dpm level %d, ret: %d", + XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d", dpm_level, ret); return ret; } - return ret; -} - -static int aie2_smu_set_dpm_level_v1(struct amdxdna_dev_hdl *ndev, u32 dpm_level) -{ - int ret; - - ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, NULL); - if (!ret) - XDNA_INFO_ONCE(ndev->xdna, "Set hard dpm level = %d", dpm_level); - else - return ret; - - ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, NULL); - if (!ret) - XDNA_INFO_ONCE(ndev->xdna, "Set soft dpm level = %d", dpm_level); - - ndev->smu.mp_npu_clock.freq_mhz = SMU_DPM_TABLE_ENTRY(ndev, dpm_level)->npuclk; - ndev->smu.h_clock.freq_mhz = SMU_DPM_TABLE_ENTRY(ndev, dpm_level)->hclk; + ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk; + ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk; + ndev->dpm_level = dpm_level; - return ret; -} + XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n", + ndev->npuclk_freq, ndev->hclk_freq); -// Find out the target level to set, fixed level always gets priority -static u32 aie2_smu_get_tgt_dpm_level(struct amdxdna_dev_hdl *ndev) -{ - return ndev->smu.fixed_dpm_level != SMU_DPM_INVALID ? - ndev->smu.fixed_dpm_level : ndev->smu.dft_dpm_level; -} - -// Find out what current level is set to -u32 aie2_smu_get_dpm_level(struct amdxdna_dev_hdl *ndev) -{ - return ndev->smu.curr_dpm_level; -} - -static int aie2_smu_set_dpm_level(struct amdxdna_dev_hdl *ndev, u32 dpm_level) -{ - int ret; - - if (aie2_control_flags & BIT(AIE2_BIT_BYPASS_SET_FREQ)) { - XDNA_DBG(ndev->xdna, "Bypassed set dpm level"); - return 0; - } - - if (dpm_level > SMU_DPM_MAX(ndev)) - return -EINVAL; - if (dpm_level == ndev->smu.curr_dpm_level) - return 0; - - /* default is v1 */ - if (ndev->priv->smu_rev == SMU_REVISION_NPU1) - ret = aie2_smu_set_dpm_level_v0(ndev, dpm_level); - else - ret = aie2_smu_set_dpm_level_v1(ndev, dpm_level); - if (!ret) { - ndev->smu.curr_dpm_level = dpm_level; - XDNA_DBG(ndev->xdna, "The current dpm level is set to %d", dpm_level); - } - - return ret; + return 0; } -int aie2_smu_set_fixed_dpm_level(struct amdxdna_dev_hdl *ndev, u32 dpm_level) +int aie2_smu_get_mpnpu_clock_freq(struct amdxdna_dev_hdl *ndev) { - ndev->smu.fixed_dpm_level = dpm_level; - XDNA_DBG(ndev->xdna, "The fixed dpm level is %d", dpm_level); - return aie2_smu_set_dpm_level(ndev, aie2_smu_get_tgt_dpm_level(ndev)); + return ndev->npuclk_freq; } -int aie2_smu_set_dft_dpm_level(struct amdxdna_dev_hdl *ndev, u32 dpm_level) +int aie2_smu_get_hclock_freq(struct amdxdna_dev_hdl *ndev) { - ndev->smu.dft_dpm_level = dpm_level; - XDNA_DBG(ndev->xdna, "The default dpm level is %d", dpm_level); - return aie2_smu_set_dpm_level(ndev, aie2_smu_get_tgt_dpm_level(ndev)); + return ndev->hclk_freq; } int aie2_smu_set_power_on(struct amdxdna_dev_hdl *ndev) { int ret; - if (ndev->smu.power_state == SMU_POWER_ON) - return 0; - ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0, NULL); if (ret) return ret; + ndev->power_state = SMU_POWER_ON; - ndev->smu.power_state = SMU_POWER_ON; return 0; } @@ -227,20 +131,17 @@ int aie2_smu_set_power_off(struct amdxdna_dev_hdl *ndev) { int ret; - if (ndev->smu.power_state == SMU_POWER_OFF) - return 0; - ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0, NULL); if (ret) return ret; + ndev->power_state = SMU_POWER_OFF; - ndev->smu.power_state = SMU_POWER_OFF; return 0; } int aie2_smu_get_power_state(struct amdxdna_dev_hdl *ndev) { - return ndev->smu.power_state; + return ndev->power_state; } int aie2_smu_start(struct amdxdna_dev_hdl *ndev) @@ -253,13 +154,6 @@ int aie2_smu_start(struct amdxdna_dev_hdl *ndev) return ret; } - // Restore DPM level to what we set before - ret = aie2_smu_set_dpm_level(ndev, aie2_smu_get_tgt_dpm_level(ndev)); - if (ret) { - XDNA_ERR(ndev->xdna, "Restore dpm level failed, ret %d", ret); - return ret; - } - return 0; } @@ -268,71 +162,9 @@ void aie2_smu_stop(struct amdxdna_dev_hdl *ndev) int ret; /* Minimize clocks/dpm level prior to power off */ - ret = aie2_smu_set_dpm_level(ndev, 0); - if (ret) - XDNA_WARN(ndev->xdna, "Set dpm level 0 failed, ret %d", ret); + ndev->priv->hw_ops.set_dpm(ndev, 0); ret = aie2_smu_set_power_off(ndev); if (ret) XDNA_WARN(ndev->xdna, "Power off failed, ret %d", ret); } - -static void aie2_smu_npu1_hack(struct amdxdna_dev_hdl *ndev) -{ - struct smu *smu = &ndev->smu; - u32 npuclk_freq; - u32 out; - int ret; - - /* This function is only for NPU1, do nothing for others */ - if (ndev->priv->smu_rev != SMU_REVISION_NPU1) - return; - - /* - * TODO: We are setting the smu power on here, normally to undo this - * tried smu poweroff before returning. That poweroff didn't work - * especially during the early bootup, so not doing poweroff for now - */ - ret = aie2_smu_set_power_on(ndev); - if (ret) { - XDNA_ERR(ndev->xdna, "%s: Power on failed, ret %d", __func__, ret); - return; - } - - /* - * It's a workaround for which DPM table to use on NPU1, - * set the highest clk, read it back and then reset to default clk - */ - npuclk_freq = SMU_DPM_TABLE_ENTRY(ndev, SMU_DPM_MAX(ndev))->npuclk; - aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ, npuclk_freq, &out); - - if (npuclk_freq != out) { - XDNA_DBG(ndev->xdna, "Use small DPM table"); - smu->dpm_table = npu1_hack_dpm_clk_table; - smu->num_dpm_levels = ARRAY_SIZE(npu1_hack_dpm_clk_table); - } - - npuclk_freq = SMU_DPM_TABLE_ENTRY(ndev, 0)->npuclk; - aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ, npuclk_freq, NULL); -} - -void aie2_smu_setup(struct amdxdna_dev_hdl *ndev) -{ - struct smu *smu = &ndev->smu; - - snprintf(smu->mp_npu_clock.name, sizeof(smu->mp_npu_clock.name), "MP-NPU Clock"); - snprintf(smu->h_clock.name, sizeof(smu->h_clock.name), "H Clock"); - - smu->dpm_table = ndev->priv->smu_npu_dpm_clk_table; - smu->num_dpm_levels = ndev->priv->smu_npu_dpm_levels; - - /* This is a hack for special NPU1 device */ - aie2_smu_npu1_hack(ndev); - - smu->dft_dpm_level = SMU_DPM_MAX(ndev); - smu->fixed_dpm_level = SMU_DPM_INVALID; - smu->curr_dpm_level = SMU_DPM_INVALID; - - smu->mp_npu_clock.max_freq_mhz = SMU_DPM_TABLE_ENTRY(ndev, SMU_DPM_MAX(ndev))->npuclk; - smu->h_clock.max_freq_mhz = SMU_DPM_TABLE_ENTRY(ndev, SMU_DPM_MAX(ndev))->hclk; -} diff --git a/src/driver/amdxdna/aie2_solver.c b/src/driver/amdxdna/aie2_solver.c index 1f06d2ae..a70b2b06 100644 --- a/src/driver/amdxdna/aie2_solver.c +++ b/src/driver/amdxdna/aie2_solver.c @@ -3,10 +3,11 @@ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. */ -#include +#include +#include +#include #include #include -#include #include "aie2_solver.h" @@ -87,7 +88,7 @@ static int sanity_check(struct solver_state *xrs, struct alloc_requests *req) * We can find at least one CDOs groups that meet the * GOPs requirement. */ - cu_clk_freq = xrs->cfg.clk_list.cu_clk_list[xrs->cfg.clk_list.num_levels - 1].hclk; + cu_clk_freq = xrs->cfg.clk_list.cu_clk_list[xrs->cfg.clk_list.num_levels - 1]; if (qos_meet(xrs, rqos, cdop->qos_cap.opc * cu_clk_freq / 1000)) return -EINVAL; @@ -108,36 +109,37 @@ static bool is_valid_qos_dpm_params(struct aie_qos *rqos) return false; } -static u32 find_dpm_level(struct solver_state *xrs, struct alloc_requests *req) +static int set_dpm_level(struct solver_state *xrs, struct alloc_requests *req, u32 *dpm_level) { + struct solver_rgroup *rgp = &xrs->rgp; struct cdo_parts *cdop = &req->cdo; struct aie_qos *rqos = &req->rqos; - struct solver_rgroup *rgp = &xrs->rgp; + u32 freq, max_dpm_level, level; struct solver_node *node; - u32 cu_clk_freq, dpm_level; + max_dpm_level = xrs->cfg.clk_list.num_levels - 1; /* If no QoS parameters are passed, set it to the max DPM level */ - if (!is_valid_qos_dpm_params(rqos)) - return xrs->cfg.max_dpm_level; - - /* - * We can find at least one CDOs groups that meet the - * GOPs requirement. - */ - for (dpm_level = 0; dpm_level < xrs->cfg.clk_list.num_levels; dpm_level++) { - cu_clk_freq = xrs->cfg.clk_list.cu_clk_list[dpm_level].hclk; + if (!is_valid_qos_dpm_params(rqos)) { + level = max_dpm_level; + goto set_dpm; + } - if (!qos_meet(xrs, rqos, cdop->qos_cap.opc * cu_clk_freq / 1000)) + /* Find one CDO group that meet the GOPs requirement. */ + for (level = 0; level < max_dpm_level; level++) { + freq = xrs->cfg.clk_list.cu_clk_list[level]; + if (!qos_meet(xrs, rqos, cdop->qos_cap.opc * freq / 1000)) break; } /* set the dpm level which fits all the sessions */ list_for_each_entry(node, &rgp->node_list, list) { - if (node->dpm_level > dpm_level) - dpm_level = node->dpm_level; + if (node->dpm_level > level) + level = node->dpm_level; } - return dpm_level; +set_dpm: + *dpm_level = level; + return xrs->cfg.actions->set_dft_dpm_level(xrs->cfg.ddev, level); } static struct solver_node *rg_search_node(struct solver_rgroup *rgp, u64 rid) @@ -299,18 +301,19 @@ int xrs_allocate_resource(void *hdl, struct alloc_requests *req, struct amdxdna_ struct xrs_action_load load_act; struct solver_node *snode; struct solver_state *xrs; + u32 dpm_level; int ret; xrs = (struct solver_state *)hdl; ret = sanity_check(xrs, req); if (ret) { - dev_err(xrs->cfg.dev, "invalid request"); + drm_err(xrs->cfg.ddev, "invalid request"); return ret; } if (rg_search_node(&xrs->rgp, req->rid)) { - dev_err(xrs->cfg.dev, "rid %lld is in-use", req->rid); + drm_err(xrs->cfg.ddev, "rid %lld is in-use", req->rid); return -EEXIST; } @@ -318,20 +321,19 @@ int xrs_allocate_resource(void *hdl, struct alloc_requests *req, struct amdxdna_ if (IS_ERR(snode)) return PTR_ERR(snode); - snode->dpm_level = find_dpm_level(xrs, req); - ret = xrs->cfg.actions->set_dpm_level(hwctx->client->xdna->dev_handle, - snode->dpm_level); + fill_load_action(xrs, snode, &load_act); + ret = xrs->cfg.actions->load_hwctx(hwctx, &load_act); if (ret) goto free_node; - fill_load_action(xrs, snode, &load_act); - ret = xrs->cfg.actions->load_hwctx(hwctx, &load_act); + ret = set_dpm_level(xrs, req, &dpm_level); if (ret) goto free_node; + snode->dpm_level = dpm_level; snode->hwctx = hwctx; - dev_dbg(xrs->cfg.dev, "start col %d ncols %d\n", + drm_dbg(xrs->cfg.ddev, "start col %d ncols %d\n", snode->pt_node->start_col, snode->pt_node->ncols); return 0; @@ -349,7 +351,7 @@ int xrs_release_resource(void *hdl, u64 rid) node = rg_search_node(&xrs->rgp, rid); if (!node) { - dev_err(xrs->cfg.dev, "node not exist"); + drm_err(xrs->cfg.ddev, "node not exist"); return -ENODEV; } @@ -364,11 +366,11 @@ void *xrsm_init(struct init_config *cfg) struct solver_rgroup *rgp; struct solver_state *xrs; - xrs = devm_kzalloc(cfg->dev, sizeof(*xrs), GFP_KERNEL); + xrs = drmm_kzalloc(cfg->ddev, sizeof(*xrs), GFP_KERNEL); if (!xrs) return NULL; - memcpy(&xrs->cfg, cfg, sizeof(struct init_config)); + memcpy(&xrs->cfg, cfg, sizeof(*cfg)); rgp = &xrs->rgp; INIT_LIST_HEAD(&rgp->node_list); diff --git a/src/driver/amdxdna/aie2_solver.h b/src/driver/amdxdna/aie2_solver.h index 508563a7..a4f4e6c0 100644 --- a/src/driver/amdxdna/aie2_solver.h +++ b/src/driver/amdxdna/aie2_solver.h @@ -73,15 +73,17 @@ struct xrs_action_load { * Resource solver chooses the frequency from the table * to meet the QOS requirements. */ +#define POWER_LEVEL_NUM 8 + struct clk_list_info { u32 num_levels; /* available power levels */ - const struct dpm_clk *cu_clk_list; /* available aie clock frequencies in Mhz*/ + u32 cu_clk_list[POWER_LEVEL_NUM]; /* available aie clock frequencies in Mhz*/ }; struct xrs_action_ops { int (*load_hwctx)(struct amdxdna_hwctx *hwctx, struct xrs_action_load *action); int (*unload_hwctx)(struct amdxdna_hwctx *hwctx); - int (*set_dpm_level)(struct amdxdna_dev_hdl *ndev, u32 dpm_level); + int (*set_dft_dpm_level)(struct drm_device *ddev, u32 level); }; /* @@ -91,9 +93,8 @@ struct init_config { u32 total_col; u32 sys_eff_factor; /* system efficiency factor */ u32 latency_adj; /* latency adjustment in ms */ - u32 max_dpm_level; /* Max dpm level in the system */ struct clk_list_info clk_list; /* List of frequencies available in system */ - struct device *dev; + struct drm_device *ddev; struct xrs_action_ops *actions; }; diff --git a/src/driver/amdxdna/npu1_regs.c b/src/driver/amdxdna/npu1_regs.c index 55b93a39..3b127054 100644 --- a/src/driver/amdxdna/npu1_regs.c +++ b/src/driver/amdxdna/npu1_regs.c @@ -38,21 +38,14 @@ #define NPU1_SMU_BAR_BASE MPNPU_APERTURE0_BASE #define NPU1_SRAM_BAR_BASE MPNPU_APERTURE1_BASE -#define NPU1_RT_CFG_TYPE_CLK_GATING 1 -#define NPU1_RT_CFG_TYPE_PDI_LOAD 2 -#define NPU1_RT_CFG_TYPE_DEBUG_BO 4 - -#define NPU1_RT_CFG_VAL_CLK_GATING_OFF 0 -#define NPU1_RT_CFG_VAL_CLK_GATING_ON 1 - -#define NPU1_RT_CFG_VAL_PDI_LOAD_MGMT 0 -#define NPU1_RT_CFG_VAL_PDI_LOAD_APP 1 - -#define NPU1_RT_CFG_VAL_DEBUG_BO_DEFAULT 0 -#define NPU1_RT_CFG_VAL_DEBUG_BO_LARGE 1 +const struct rt_config npu1_default_rt_cfg[] = { + { 2, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */ + { 4, 1, AIE2_RT_CFG_INIT }, /* Large Debug BO */ + { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ + { 0 }, +}; -/*fill in the dpm clock frequencies */ -const struct dpm_clk npu1_dpm_clk_table[] = { +const struct dpm_clk_freq npu1_dpm_clk_table[] = { {400, 800}, {600, 1024}, {600, 1024}, @@ -61,21 +54,15 @@ const struct dpm_clk npu1_dpm_clk_table[] = { {720, 1309}, {720, 1309}, {847, 1600}, + { 0 } }; -const struct rt_config npu1_rt_cfg[] = { - {NPU1_RT_CFG_TYPE_PDI_LOAD, NPU1_RT_CFG_VAL_PDI_LOAD_APP}, - {NPU1_RT_CFG_TYPE_DEBUG_BO, NPU1_RT_CFG_VAL_DEBUG_BO_LARGE}, -}; - -const u32 npu1_clk_gating_types[] = {NPU1_RT_CFG_TYPE_CLK_GATING}; - const struct amdxdna_dev_priv npu1_dev_priv = { .fw_path = "amdnpu/1502_00/npu.sbin", .protocol_major = 0x5, .protocol_minor = 0x5, - .rt_config = npu1_rt_cfg, - .num_rt_cfg = ARRAY_SIZE(npu1_rt_cfg), + .rt_config = npu1_default_rt_cfg, + .dpm_clk_tbl = npu1_dpm_clk_table, .col_align = COL_ALIGN_NONE, .mbox_dev_addr = NPU1_MBOX_BAR_BASE, .mbox_size = 0, /* Use BAR size */ @@ -100,17 +87,12 @@ const struct amdxdna_dev_priv npu1_dev_priv = { DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU1_SMU, MPNPU_PUB_SCRATCH6), DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU1_SMU, MPNPU_PUB_SCRATCH7), }, - .clk_gating = { - .types = npu1_clk_gating_types, - .num_types = ARRAY_SIZE(npu1_clk_gating_types), - .value_enable = NPU1_RT_CFG_VAL_CLK_GATING_ON, - .value_disable = NPU1_RT_CFG_VAL_CLK_GATING_OFF, + .hw_ops = { + .set_dpm = npu1_set_dpm, }, - .smu_rev = SMU_REVISION_NPU1, - .smu_npu_dpm_clk_table = npu1_dpm_clk_table, - .smu_npu_dpm_levels = ARRAY_SIZE(npu1_dpm_clk_table), + #ifdef AMDXDNA_DEVEL - .priv_load_cfg = {NPU1_RT_CFG_TYPE_PDI_LOAD, NPU1_RT_CFG_VAL_PDI_LOAD_MGMT}, + .priv_load_cfg = { 2, 0, AIE2_RT_CFG_INIT }, #endif }; diff --git a/src/driver/amdxdna/npu4_family.h b/src/driver/amdxdna/npu4_family.h index 23ca2511..576864e9 100644 --- a/src/driver/amdxdna/npu4_family.h +++ b/src/driver/amdxdna/npu4_family.h @@ -58,33 +58,10 @@ #define NPU4_SMU_BAR_BASE MMNPU_APERTURE4_BASE #define NPU4_SRAM_BAR_BASE MMNPU_APERTURE1_BASE -#define NPU4_RT_CFG_TYPE_CLK_GATING 1 -#define NPU4_RT_CFG_TYPE_HCLK_GATING 2 -#define NPU4_RT_CFG_TYPE_PWR_GATING 3 -#define NPU4_RT_CFG_TYPE_L1IMU_GATING 4 -#define NPU4_RT_CFG_TYPE_PDI_LOAD 5 -#define NPU4_RT_CFG_TYPE_DEBUG_BO 10 - -#define NPU4_RT_CFG_VAL_CLK_GATING_OFF 0 -#define NPU4_RT_CFG_VAL_CLK_GATING_ON 1 - -#define NPU4_RT_CFG_VAL_PDI_LOAD_MGMT 0 -#define NPU4_RT_CFG_VAL_PDI_LOAD_APP 1 - -#define NPU4_RT_CFG_VAL_DEBUG_BO_DEFAULT 0 -#define NPU4_RT_CFG_VAL_DEBUG_BO_LARGE 1 - -#define NPU4_INIT_RT_CFG_NUM 2 -#define NPU4_CLK_GATING_CFG_NUM 4 - -extern const struct dpm_clk npu4_dpm_clk_table[DPM_LEVEL_MAX]; -extern const struct rt_config npu4_rt_cfg[NPU4_INIT_RT_CFG_NUM]; -extern const u32 npu4_clk_gating_types[NPU4_CLK_GATING_CFG_NUM]; - #define NPU4_COMMON_DEV_PRIV \ - .rt_config = npu4_rt_cfg, \ - .num_rt_cfg = ARRAY_SIZE(npu4_rt_cfg), \ - .priv_load_cfg = {NPU4_RT_CFG_TYPE_PDI_LOAD, NPU4_RT_CFG_VAL_PDI_LOAD_MGMT}, \ + .rt_config = npu4_default_rt_cfg, \ + .dpm_clk_tbl = npu4_dpm_clk_table, \ + .priv_load_cfg = { 5, 0, AIE2_RT_CFG_INIT }, \ .col_align = COL_ALIGN_NATURE, \ .mbox_dev_addr = NPU4_MBOX_BAR_BASE, \ .mbox_size = 0, /* Use BAR size */ \ @@ -109,15 +86,9 @@ extern const u32 npu4_clk_gating_types[NPU4_CLK_GATING_CFG_NUM]; DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61), \ DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU4_SMU, MP1_C2PMSG_60), \ }, \ - .clk_gating = { \ - .types = npu4_clk_gating_types, \ - .num_types = ARRAY_SIZE(npu4_clk_gating_types), \ - .value_enable = NPU4_RT_CFG_VAL_CLK_GATING_ON, \ - .value_disable = NPU4_RT_CFG_VAL_CLK_GATING_OFF, \ - }, \ - .smu_rev = SMU_REVISION_NPU4, \ - .smu_npu_dpm_clk_table = npu4_dpm_clk_table, \ - .smu_npu_dpm_levels = ARRAY_SIZE(npu4_dpm_clk_table) + .hw_ops = { \ + .set_dpm = npu4_set_dpm, \ + } #define NPU4_COMMON_DEV_INFO \ .reg_bar = NPU4_REG_BAR_INDEX, \ diff --git a/src/driver/amdxdna/npu4_regs.c b/src/driver/amdxdna/npu4_regs.c index 50d0bb10..37e1a6a5 100644 --- a/src/driver/amdxdna/npu4_regs.c +++ b/src/driver/amdxdna/npu4_regs.c @@ -5,7 +5,17 @@ #include "npu4_family.h" -const struct dpm_clk npu4_dpm_clk_table[DPM_LEVEL_MAX] = { +const struct rt_config npu4_default_rt_cfg[] = { + { 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */ + { 10, 1, AIE2_RT_CFG_INIT }, /* Large Debug BO */ + { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ + { 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ + { 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ + { 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ + { 0 }, +}; + +const struct dpm_clk_freq npu4_dpm_clk_table[] = { {396, 792}, {600, 1056}, {792, 1152}, @@ -13,19 +23,8 @@ const struct dpm_clk npu4_dpm_clk_table[DPM_LEVEL_MAX] = { {975, 1267}, {1056, 1408}, {1152, 1584}, - {1267, 1800} -}; - -const struct rt_config npu4_rt_cfg[NPU4_INIT_RT_CFG_NUM] = { - {NPU4_RT_CFG_TYPE_PDI_LOAD, NPU4_RT_CFG_VAL_PDI_LOAD_APP}, - {NPU4_RT_CFG_TYPE_DEBUG_BO, NPU4_RT_CFG_VAL_DEBUG_BO_LARGE}, -}; - -const u32 npu4_clk_gating_types[NPU4_CLK_GATING_CFG_NUM] = { - NPU4_RT_CFG_TYPE_CLK_GATING, - NPU4_RT_CFG_TYPE_HCLK_GATING, - NPU4_RT_CFG_TYPE_PWR_GATING, - NPU4_RT_CFG_TYPE_L1IMU_GATING, + {1267, 1800}, + { 0 } }; const struct amdxdna_dev_priv npu4_dev_priv = {