From 047e6f233db8ba45e0c52d9f170069a827304e6a Mon Sep 17 00:00:00 2001
From: Xinyue Xie <116336560+ieiue@users.noreply.github.com>
Date: Fri, 20 Mar 2026 16:28:55 +0800
Subject: [PATCH 1/4] Update version to v3.9.0.26

---
 source/source_main/version.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/source/source_main/version.h b/source/source_main/version.h
index c9bd1d70c5..c7f86412c4 100644
--- a/source/source_main/version.h
+++ b/source/source_main/version.h
@@ -1,3 +1,3 @@
 #ifndef VERSION
-#define VERSION "v3.9.0.25"
+#define VERSION "v3.9.0.26"
 #endif

From e277c1c2766cbb11746496670a5daf27ff9afb7a Mon Sep 17 00:00:00 2001
From: Chen Nuo <49788094+Cstandardlib@users.noreply.github.com>
Date: Fri, 20 Mar 2026 21:26:31 +0800
Subject: [PATCH 2/4] Fix: dsp memory op (#7056)

* Fix dsp setmem op

* Clean up the code
---
 .../source_base/module_device/memory_op.cpp   | 19 ++++++++++++
 source/source_base/module_device/memory_op.h  | 14 +++++++++
 source/source_pw/module_pwdft/op_pw_nl.h      | 11 ++++---
 source/source_pw/module_pwdft/vnl_pw.cpp      | 31 ++++++++++++-------
 4 files changed, 58 insertions(+), 17 deletions(-)

diff --git a/source/source_base/module_device/memory_op.cpp b/source/source_base/module_device/memory_op.cpp
index 2ef4be588a..bff9234f64 100644
--- a/source/source_base/module_device/memory_op.cpp
+++ b/source/source_base/module_device/memory_op.cpp
@@ -471,6 +471,19 @@ struct resize_memory_op_mt<FPTYPE, base_device::DEVICE_CPU>
     }
 };
 
+template <typename FPTYPE>
+struct set_memory_op_mt<FPTYPE, base_device::DEVICE_CPU>
+{
+    void operator()(FPTYPE* arr, const int var, const size_t size)
+    {
+        ModuleBase::OMP_PARALLEL([&](int num_thread, int thread_id) {
+            int beg = 0, len = 0;
+            ModuleBase::BLOCK_TASK_DIST_1D(num_thread, thread_id, size, (size_t)4096 / sizeof(FPTYPE), beg, len);
+            memset(arr + beg, var, sizeof(FPTYPE) * len);
+        });
+    }
+};
+
 template <typename FPTYPE>
 struct delete_memory_op_mt<FPTYPE, base_device::DEVICE_CPU>
 {
@@ -487,6 +500,12 @@ template struct resize_memory_op_mt<double, base_device::DEVICE_CPU>;
 template struct resize_memory_op_mt<std::complex<float>, base_device::DEVICE_CPU>;
 template struct resize_memory_op_mt<std::complex<double>, base_device::DEVICE_CPU>;
 
+template struct set_memory_op_mt<int, base_device::DEVICE_CPU>;
+template struct set_memory_op_mt<float, base_device::DEVICE_CPU>;
+template struct set_memory_op_mt<double, base_device::DEVICE_CPU>;
+template struct set_memory_op_mt<std::complex<float>, base_device::DEVICE_CPU>;
+template struct set_memory_op_mt<std::complex<double>, base_device::DEVICE_CPU>;
+
 template struct delete_memory_op_mt<int, base_device::DEVICE_CPU>;
 template struct delete_memory_op_mt<float, base_device::DEVICE_CPU>;
 template struct delete_memory_op_mt<double, base_device::DEVICE_CPU>;
diff --git a/source/source_base/module_device/memory_op.h b/source/source_base/module_device/memory_op.h
index c24acbb024..004468f410 100644
--- a/source/source_base/module_device/memory_op.h
+++ b/source/source_base/module_device/memory_op.h
@@ -234,6 +234,20 @@ struct resize_memory_op_mt
     void operator()(FPTYPE*& arr, const size_t size, const char* record_in = nullptr);
 };
 
+template <typename FPTYPE, typename Device>
+struct set_memory_op_mt
+{
+    /// @brief memset for DSP memory allocated by mt allocator.
+    ///
+    /// Input Parameters
+    /// \param var : the specified constant byte value
+    /// \param size : array size
+    ///
+    /// Output Parameters
+    /// \param arr : output array initialized by the input value
+    void operator()(FPTYPE* arr, const int var, const size_t size);
+};
+
 template <typename FPTYPE, typename Device>
 struct delete_memory_op_mt
 {
diff --git a/source/source_pw/module_pwdft/op_pw_nl.h b/source/source_pw/module_pwdft/op_pw_nl.h
index 829bb31e93..dcdbf889a8 100644
--- a/source/source_pw/module_pwdft/op_pw_nl.h
+++ b/source/source_pw/module_pwdft/op_pw_nl.h
@@ -88,14 +88,15 @@ class Nonlocal<OperatorPW<T, Device>> : public OperatorPW<T, Device>
     using gemv_op = ModuleBase::gemv_op<T, Device>;
     using gemm_op = ModuleBase::gemm_op<T, Device>;
     using nonlocal_op = nonlocal_pw_op<Real, Device>;
-    using setmem_complex_op = base_device::memory::set_memory_op<T, Device>;
-    #ifdef __DSP
+#ifdef __DSP
+    using setmem_complex_op = base_device::memory::set_memory_op_mt<T, Device>;
     using resmem_complex_op = base_device::memory::resize_memory_op_mt<T, Device>;
     using delmem_complex_op = base_device::memory::delete_memory_op_mt<T, Device>;
-    #else
+#else
+    using setmem_complex_op = base_device::memory::set_memory_op<T, Device>;
     using resmem_complex_op = base_device::memory::resize_memory_op<T, Device>;
     using delmem_complex_op = base_device::memory::delete_memory_op<T, Device>;
-    #endif
+#endif
     using syncmem_complex_h2d_op = base_device::memory::synchronize_memory_op<T, Device, base_device::DEVICE_CPU>;
 
     T one{1, 0};
@@ -104,4 +105,4 @@ class Nonlocal<OperatorPW<T, Device>> : public OperatorPW<T, Device>
 
 } // namespace hamilt
 
-#endif
\ No newline at end of file
+#endif
diff --git a/source/source_pw/module_pwdft/vnl_pw.cpp b/source/source_pw/module_pwdft/vnl_pw.cpp
index 3a1fdda873..0ac8ef9b95 100644
--- a/source/source_pw/module_pwdft/vnl_pw.cpp
+++ b/source/source_pw/module_pwdft/vnl_pw.cpp
@@ -64,6 +64,13 @@ void pseudopot_cell_vnl::release_memory()
         delmem_ch_op()(this->c_deeq_nc);
         delmem_ch_op()(this->c_vkb);
         delmem_ch_op()(this->c_qq_so);
+#ifdef __DSP
+        if (this->z_vkb != nullptr)
+        {
+            base_device::memory::delete_memory_op_mt<std::complex<double>, base_device::DEVICE_CPU>()(this->z_vkb);
+            this->z_vkb = nullptr;
+        }
+#endif
         // There's no need to delete double precision pointers while in a CPU environment.
     }
     memory_released = true;
@@ -273,13 +280,13 @@ void pseudopot_cell_vnl::init(const UnitCell& ucell,
             resmem_sh_op()(s_tab, this->tab.getSize());
             resmem_ch_op()(c_vkb, nkb * npwx);
         }
-        #ifdef __DSP
+#ifdef __DSP
         base_device::memory::resize_memory_op_mt<std::complex<double>, base_device::DEVICE_CPU>()
-        (this->z_vkb, this->vkb.size, "Nonlocal<PW>::ps");
-        memcpy(this->z_vkb,this->vkb.c,this->vkb.size*16);
-        #else
+        (this->z_vkb, this->vkb.size, "VNL::z_vkb");
+        // memcpy(this->z_vkb,this->vkb.c,this->vkb.size*16);
+#else
         this->z_vkb = this->vkb.c;
-        #endif
+#endif
         this->d_tab = this->tab.ptr;
         // There's no need to delete double precision pointers while in a CPU environment.
     }
@@ -293,12 +300,12 @@ void pseudopot_cell_vnl::init(const UnitCell& ucell,
 // with structure factor, for all atoms, in reciprocal space
 //----------------------------------------------------------
 template <typename FPTYPE, typename Device>
-void pseudopot_cell_vnl::getvnl(Device* ctx, 
+void pseudopot_cell_vnl::getvnl(Device* ctx,
                                 const UnitCell& ucell,
-                                const int& ik, 
+                                const int& ik,
                                 std::complex<FPTYPE>* vkb_in) const
 {
-    if (PARAM.inp.test_pp) 
+    if (PARAM.inp.test_pp)
     {
         ModuleBase::TITLE("pseudopot_cell_vnl", "getvnl");
     }
@@ -732,10 +739,10 @@ void pseudopot_cell_vnl::init_vnl(UnitCell& cell, const ModulePW::PW_Basis* rho_
             for (int iq = 0; iq < PARAM.globalv.nqx; iq++)
             {
                 const double q = iq * PARAM.globalv.dq;
-                ModuleBase::Sphbes::Spherical_Bessel(kkbeta, cell.atoms[it].ncpp.r.data(), q, l, jl);  
+                ModuleBase::Sphbes::Spherical_Bessel(kkbeta, cell.atoms[it].ncpp.r.data(), q, l, jl);
                 for (int ir = 0; ir < kkbeta; ir++)
-                {   
-		            aux[ir] = cell.atoms[it].ncpp.betar(ib, ir) * jl[ir] * cell.atoms[it].ncpp.r[ir];   
+                {
+		            aux[ir] = cell.atoms[it].ncpp.betar(ib, ir) * jl[ir] * cell.atoms[it].ncpp.r[ir];
                 }
                 double vqint=0.0;
                 ModuleBase::Integral::Simpson_Integral(kkbeta, aux, cell.atoms[it].ncpp.rab.data(), vqint);
@@ -1723,7 +1730,7 @@ template void pseudopot_cell_vnl::getvnl<float, base_device::DEVICE_CPU>(base_de
                                                                          int const&,
                                                                          std::complex<float>*) const;
 template void pseudopot_cell_vnl::getvnl<double, base_device::DEVICE_CPU>(base_device::DEVICE_CPU*,
-                                                                          const UnitCell&, 
+                                                                          const UnitCell&,
                                                                           int const&,
                                                                           std::complex<double>*) const;
 #if defined(__CUDA) || defined(__ROCM)

From ad77994d0d3eb5446300942f922b3719bffc1bd0 Mon Sep 17 00:00:00 2001
From: Chen Nuo <49788094+Cstandardlib@users.noreply.github.com>
Date: Fri, 20 Mar 2026 21:26:54 +0800
Subject: [PATCH 3/4] Fix: Add DSP gemm pack with auto memcpy to buffer (#7060)

* Add DSP gemm pack with auto memcpy to buffer

* Fix and reorder gemm signature
---
 .../source_base/kernels/dsp/dsp_connector.cpp | 120 +++++++++++++++++-
 .../source_base/kernels/dsp/dsp_connector.h   |  60 +++++++--
 .../module_external/blas_connector_matrix.cpp |  16 ++-
 3 files changed, 177 insertions(+), 19 deletions(-)

diff --git a/source/source_base/kernels/dsp/dsp_connector.cpp b/source/source_base/kernels/dsp/dsp_connector.cpp
index 2baf73a4ec..7fa0f20ee7 100644
--- a/source/source_base/kernels/dsp/dsp_connector.cpp
+++ b/source/source_base/kernels/dsp/dsp_connector.cpp
@@ -403,6 +403,66 @@ void zgemm_mth_(const char* transa,
     free_ht(bet);
 } // zgemm that needn't malloc_ht or free_ht
 
+void zgemm_pack_mth_(const char* transa,
+                     const char* transb,
+                     const int* m,
+                     const int* n,
+                     const int* k,
+                     const std::complex<double>* alpha,
+                     const std::complex<double>* a,
+                     const int* lda,
+                     const std::complex<double>* b,
+                     const int* ldb,
+                     const std::complex<double>* beta,
+                     std::complex<double>* c,
+                     const int* ldc,
+                     int cluster_id)
+{
+    const bool transa_not = (transa[0] == 'N' || transa[0] == 'n');
+    const bool transb_not = (transb[0] == 'N' || transb[0] == 'n');
+    // const size_t a_elems = static_cast<size_t>(*lda) * (transa_not ? static_cast<size_t>(*k) : static_cast<size_t>(*m));
+    // const size_t b_elems = static_cast<size_t>(*ldb) * (transb_not ? static_cast<size_t>(*n) : static_cast<size_t>(*k));
+    const size_t c_elems = static_cast<size_t>(*ldc) * static_cast<size_t>(*n);
+
+    // std::complex<double>* A_dsp = static_cast<std::complex<double>*>(malloc_ht(a_elems * sizeof(std::complex<double>), cluster_id));
+    // std::complex<double>* B_dsp = static_cast<std::complex<double>*>(malloc_ht(b_elems * sizeof(std::complex<double>), cluster_id));
+    std::complex<double>* C_dsp = static_cast<std::complex<double>*>(malloc_ht(c_elems * sizeof(std::complex<double>), cluster_id));
+    std::complex<double>* alp = static_cast<std::complex<double>*>(malloc_ht(sizeof(std::complex<double>), cluster_id));
+    std::complex<double>* bet = static_cast<std::complex<double>*>(malloc_ht(sizeof(std::complex<double>), cluster_id));
+
+    // memcpy(A_dsp, a, a_elems * sizeof(std::complex<double>));
+    // memcpy(B_dsp, b, b_elems * sizeof(std::complex<double>));
+    memcpy(C_dsp, c, c_elems * sizeof(std::complex<double>));
+    *alp = *alpha;
+    *bet = *beta;
+
+    mt_hthread_zgemm(CBLAS_ORDER::CblasColMajor,
+                     convertBLASTranspose(transa),
+                     convertBLASTranspose(transb),
+                     *m,
+                     *n,
+                     *k,
+                     alp,
+                     a,
+                    //  A_dsp,
+                     *lda,
+                     b,
+                    //  B_dsp,
+                     *ldb,
+                     bet,
+                    //  c,
+                     C_dsp,
+                     *ldc,
+                     cluster_id);
+    memcpy(c, C_dsp, c_elems * sizeof(std::complex<double>));
+
+    // free_ht(A_dsp);
+    // free_ht(B_dsp);
+    free_ht(C_dsp);
+    free_ht(alp);
+    free_ht(bet);
+}
+
 void cgemm_mth_(const char* transa,
                 const char* transb,
                 const int* m,
@@ -443,6 +503,64 @@ void cgemm_mth_(const char* transa,
     free_ht(bet);
 } // cgemm that needn't malloc_ht or free_ht
 
+void cgemm_pack_mth_(const char* transa,
+                     const char* transb,
+                     const int* m,
+                     const int* n,
+                     const int* k,
+                     const std::complex<float>* alpha,
+                     const std::complex<float>* a,
+                     const int* lda,
+                     const std::complex<float>* b,
+                     const int* ldb,
+                     const std::complex<float>* beta,
+                     std::complex<float>* c,
+                     const int* ldc,
+                     int cluster_id)
+{
+    const bool transa_not = (transa[0] == 'N' || transa[0] == 'n');
+    const bool transb_not = (transb[0] == 'N' || transb[0] == 'n');
+    const size_t a_elems = static_cast<size_t>(*lda) * (transa_not ? static_cast<size_t>(*k) : static_cast<size_t>(*m));
+    const size_t b_elems = static_cast<size_t>(*ldb) * (transb_not ? static_cast<size_t>(*n) : static_cast<size_t>(*k));
+    const size_t c_elems = static_cast<size_t>(*ldc) * static_cast<size_t>(*n);
+
+    std::complex<float>* A_dsp = static_cast<std::complex<float>*>(malloc_ht(a_elems * sizeof(std::complex<float>), cluster_id));
+    std::complex<float>* B_dsp = static_cast<std::complex<float>*>(malloc_ht(b_elems * sizeof(std::complex<float>), cluster_id));
+    std::complex<float>* C_dsp = static_cast<std::complex<float>*>(malloc_ht(c_elems * sizeof(std::complex<float>), cluster_id));
+    std::complex<float>* alp = static_cast<std::complex<float>*>(malloc_ht(sizeof(std::complex<float>), cluster_id));
+    std::complex<float>* bet = static_cast<std::complex<float>*>(malloc_ht(sizeof(std::complex<float>), cluster_id));
+
+    memcpy(A_dsp, a, a_elems * sizeof(std::complex<float>));
+    memcpy(B_dsp, b, b_elems * sizeof(std::complex<float>));
+    memcpy(C_dsp, c, c_elems * sizeof(std::complex<float>));
+    *alp = *alpha;
+    *bet = *beta;
+
+    mt_hthread_cgemm(CBLAS_ORDER::CblasColMajor,
+                     convertBLASTranspose(transa),
+                     convertBLASTranspose(transb),
+                     *m,
+                     *n,
+                     *k,
+                     (const void*)alp,
+                     (const void*)A_dsp,
+                     *lda,
+                     (const void*)B_dsp,
+                     *ldb,
+                     (const void*)bet,
+                     (void*)C_dsp,
+                     *ldc,
+                     cluster_id);
+
+    memcpy(c, C_dsp, c_elems * sizeof(std::complex<float>));
+
+    free_ht(A_dsp);
+    free_ht(B_dsp);
+    free_ht(C_dsp);
+    free_ht(alp);
+    free_ht(bet);
+}
+
 void sgemv_mth_(const char* transa,
                 const int* m,
                 const int* n,
@@ -570,4 +688,4 @@ void cgemv_mth_(const char* transa,
     free_ht(alp);
     free_ht(bet);
 }
-} // namespace mtfunc
\ No newline at end of file
+} // namespace mtfunc
diff --git a/source/source_base/kernels/dsp/dsp_connector.h b/source/source_base/kernels/dsp/dsp_connector.h
index 997a21de59..3dcc3d125d 100644
--- a/source/source_base/kernels/dsp/dsp_connector.h
+++ b/source/source_base/kernels/dsp/dsp_connector.h
@@ -62,19 +62,21 @@ void zgemm_mt_(const char* transa,
                int cluster_id);
 
 void cgemm_mt_(const char* transa,
-               const char* transb,
-               const int* m,
-               const int* n,
-               const int* k,
-               const std::complex<float>* alpha,
-               const std::complex<float>* a,
-               const int* lda,
-               const std::complex<float>* b,
-               const int* ldb,
-               const std::complex<float>* beta,
-               std::complex<float>* c,
-               const int* ldc,
-               int cluster_id);
+                const char* transb,
+                const int* m,
+                const int* n,
+                const int* k,
+                const std::complex<float>* alpha,
+                const std::complex<float>* a,
+                const int* lda,
+                const std::complex<float>* b,
+                const int* ldb,
+                const std::complex<float>* beta,
+                std::complex<float>* c,
+                const int* ldc,
+                int cluster_id);
+
+
 
 void sgemv_mt_(const char* transa,
                const int* m,
@@ -173,6 +175,21 @@ void zgemm_mth_(const char* transa,
                 const int* ldc,
                 int cluster_id);
 
+void zgemm_pack_mth_(const char* transa,
+                     const char* transb,
+                     const int* m,
+                     const int* n,
+                     const int* k,
+                     const std::complex<double>* alpha,
+                     const std::complex<double>* a,
+                     const int* lda,
+                     const std::complex<double>* b,
+                     const int* ldb,
+                     const std::complex<double>* beta,
+                     std::complex<double>* c,
+                     const int* ldc,
+                     int cluster_id);
+
 void cgemm_mth_(const char* transa,
                 const char* transb,
                 const int* m,
@@ -188,6 +205,21 @@ void cgemm_mth_(const char* transa,
                 const int* ldc,
                 int cluster_id);
 
+void cgemm_pack_mth_(const char* transa,
+                    const char* transb,
+                    const int* m,
+                    const int* n,
+                    const int* k,
+                    const std::complex<float>* alpha,
+                    const std::complex<float>* a,
+                    const int* lda,
+                    const std::complex<float>* b,
+                    const int* ldb,
+                    const std::complex<float>* beta,
+                    std::complex<float>* c,
+                    const int* ldc,
+                    int cluster_id);
+
 void sgemv_mth_(const char* transa,
                 const int* m,
                 const int* n,
@@ -282,4 +314,4 @@ void dsp_dav_subspace_reduce(T* hcc, T* scc, int nbase, int nbase_x, int notconv
 } // namespace mtfunc
 
 #endif
-#endif
\ No newline at end of file
+#endif
diff --git a/source/source_base/module_external/blas_connector_matrix.cpp b/source/source_base/module_external/blas_connector_matrix.cpp
index 3b18d3ee3a..2becee24bf 100644
--- a/source/source_base/module_external/blas_connector_matrix.cpp
+++ b/source/source_base/module_external/blas_connector_matrix.cpp
@@ -107,7 +107,9 @@ void BlasConnector::gemm(const char transa,
 #ifdef __DSP
     else if (device_type == base_device::AbacusDevice_t::DspDevice)
     {
-        mtfunc::cgemm_mth_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc, GlobalV::MY_RANK % PARAM.inp.dsp_count);
+        mtfunc::cgemm_pack_mth_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc, GlobalV::MY_RANK % PARAM.inp.dsp_count);
+        // cgemm_mth_ for raw dsp mth;
+        // cgemm_pack_mth_ for dsp mth with memcpy to DSP buffer
     }
 #endif
     else if (device_type == base_device::AbacusDevice_t::GpuDevice)
@@ -158,7 +160,9 @@ void BlasConnector::gemm(const char transa,
 #ifdef __DSP
     else if (device_type == base_device::AbacusDevice_t::DspDevice)
     {
-        mtfunc::zgemm_mth_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc, GlobalV::MY_RANK % PARAM.inp.dsp_count);
+        mtfunc::zgemm_pack_mth_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc, GlobalV::MY_RANK % PARAM.inp.dsp_count);
+        // zgemm_mth_ for raw dsp mth;
+        // zgemm_pack_mth_ for dsp mth with memcpy to DSP buffer
     }
 #endif
     else if (device_type == base_device::AbacusDevice_t::GpuDevice)
@@ -277,7 +281,9 @@ void BlasConnector::gemm_cm(const char transa,
 #ifdef __DSP
     else if (device_type == base_device::AbacusDevice_t::DspDevice)
     {
-        mtfunc::cgemm_mth_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc, GlobalV::MY_RANK % PARAM.inp.dsp_count);
+        mtfunc::cgemm_pack_mth_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc, GlobalV::MY_RANK % PARAM.inp.dsp_count);
+        // cgemm_mth_ for raw dsp mth;
+        // cgemm_pack_mth_ for dsp mth with memcpy to DSP buffer
     }
 #endif
 #ifdef __CUDA
@@ -328,7 +334,9 @@ void BlasConnector::gemm_cm(const char transa,
 #ifdef __DSP
     else if (device_type == base_device::AbacusDevice_t::DspDevice)
     {
-        mtfunc::zgemm_mth_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc, GlobalV::MY_RANK % PARAM.inp.dsp_count);
+        mtfunc::zgemm_pack_mth_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc, GlobalV::MY_RANK % PARAM.inp.dsp_count);
+        // zgemm_mth_ for raw dsp mth;
+        // zgemm_pack_mth_ for dsp mth with memcpy to DSP buffer
     }
 #endif
 #ifdef __CUDA

From 3cab0f03f4a382323eaa2de3c1591936f404f11f Mon Sep 17 00:00:00 2001
From: linpeize <linpeize2024@163.com>
Date: Fri, 20 Mar 2026 21:28:50 +0800
Subject: [PATCH 4/4] Refactor: add nullptr for uninitialized pointer (#7069)

Co-authored-by: linpz <linpz@mail.ustc.edu.cn>
---
 source/source_base/math_bspline.h                |  2 +-
 source/source_base/math_chebyshev.h              |  6 +++---
 source/source_base/mcd.c                         |  8 ++++----
 .../module_container/base/core/bfc_allocator.h   |  2 +-
 source/source_basis/module_ao/ORB_nonlocal.h     |  2 +-
 source/source_basis/module_ao/ORB_nonlocal_lm.h  |  4 ++--
 source/source_basis/module_ao/ORB_read.cpp       |  6 +++---
 source/source_cell/setup_nonlocal.h              |  4 ++--
 source/source_esolver/esolver_fp.h               |  4 ++--
 source/source_hsolver/module_genelpa/utils.cpp   |  4 ++--
 source/source_lcao/force_stress_arrays.h         |  2 +-
 source/source_lcao/module_ri/ewald_Vq.h          |  2 +-
 source/source_lcao/module_rt/propagator_cn2.cpp  |  4 ++--
 source/source_md/md_base.h                       |  2 +-
 source/source_md/nhchain.h                       | 16 ++++++++--------
 source/source_pw/module_pwdft/vl_pw.h            |  4 ++--
 source/source_relax/bfgs_basic.h                 | 10 +++++-----
 source/source_relax/lbfgs.h                      |  2 +-
 18 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/source/source_base/math_bspline.h b/source/source_base/math_bspline.h
index 5973f04a1c..1d246d7fe2 100644
--- a/source/source_base/math_bspline.h
+++ b/source/source_base/math_bspline.h
@@ -37,7 +37,7 @@ class Bspline
     int norder; // the order of bezier base; norder >= 0
     double Dx; // Dx: the interval of control node
     double xi; // xi: the starting point
-    double *bezier; // bezier[n] = Bk[n]
+    double * bezier = nullptr; // bezier[n] = Bk[n]
 
   public:
     Bspline();
diff --git a/source/source_base/math_chebyshev.h b/source/source_base/math_chebyshev.h
index 0b9acf50f0..3d534b911c 100644
--- a/source/source_base/math_chebyshev.h
+++ b/source/source_base/math_chebyshev.h
@@ -210,7 +210,7 @@ class Chebyshev
     std::complex<REAL>* coefc_cpu = nullptr;    //[CPU] expansion coefficient of each order
 
     FFTW<REAL> fftw;          // use for fftw
-    REAL* polytrace;                  //[CPU] w_n = \sum_i v^+ * T_n(A) * v, only
+    REAL* polytrace = nullptr;                  //[CPU] w_n = \sum_i v^+ * T_n(A) * v, only
 
     bool getcoef_real;    // coef_real has been calculated
     bool getcoef_complex; // coef_complex has been calculated
@@ -248,7 +248,7 @@ class FFTW<double>
     FFTW(const int norder2_in);
     ~FFTW();
     void execute_fftw();
-    double* dcoef; //[norder2]
+    double* dcoef = nullptr; //[norder2]
     fftw_complex* ccoef = nullptr;
     fftw_plan coef_plan;
 };
@@ -261,7 +261,7 @@ class FFTW<float>
     FFTW(const int norder2_in);
     ~FFTW();
     void execute_fftw();
-    float* dcoef; //[norder2]
+    float* dcoef = nullptr; //[norder2]
     fftwf_complex* ccoef = nullptr;
     fftwf_plan coef_plan;
 };
diff --git a/source/source_base/mcd.c b/source/source_base/mcd.c
index ca5009f468..cf04c7161a 100644
--- a/source/source_base/mcd.c
+++ b/source/source_base/mcd.c
@@ -64,9 +64,9 @@ typedef struct ChunkS
 #else
 	long long	id;	// 64bit allocation ID
 #endif
-	char		*function;	//creating function
-	char		*file;		//file function is in
-	void		*ptr;		//pointer to allocation
+	char		* function = nullptr;	//creating function
+	char		* file = nullptr;		//file function is in
+	void		* ptr = nullptr;		//pointer to allocation
 	struct ChunkS 	*next,		//next chunk (null if nonw)
 			*prev;		//previous chunk (null if nonw)
 }Chunk;
@@ -706,7 +706,7 @@ int MCD_sscanf(const char *str,const char *fmt,char*fun,char*file,int line,...)
 void scan_args(const char *fmt,va_list argptr,char*fun,char*file,int line)
 {
 	char **ptr;
-	void *dummy;	// clear up the unused warning
+	void * dummy = nullptr;	// clear up the unused warning
 
 	for(;*fmt;fmt++) {
 		if(*fmt!='%')
diff --git a/source/source_base/module_container/base/core/bfc_allocator.h b/source/source_base/module_container/base/core/bfc_allocator.h
index 661a097529..ac1384f2cd 100644
--- a/source/source_base/module_container/base/core/bfc_allocator.h
+++ b/source/source_base/module_container/base/core/bfc_allocator.h
@@ -122,7 +122,7 @@ class BFCAllocator : public Allocator {
             }
 
           private:
-            BFCAllocator* allocator_;  // The parent allocator
+            BFCAllocator* allocator_ = nullptr;  // The parent allocator
         };
 
         using free_chunk_set_t = std::set<ChunkHandle, ChunkComparator>;
diff --git a/source/source_basis/module_ao/ORB_nonlocal.h b/source/source_basis/module_ao/ORB_nonlocal.h
index bc5e5776ae..4c4007a796 100644
--- a/source/source_basis/module_ao/ORB_nonlocal.h
+++ b/source/source_basis/module_ao/ORB_nonlocal.h
@@ -42,7 +42,7 @@ class Numerical_Nonlocal
 			const int& nproj_in,
 			const Numerical_Nonlocal_Lm* ps_orbital_in);
 
-	Numerical_Nonlocal_Lm* Proj; ///< length: nproj(only store radial function )
+	Numerical_Nonlocal_Lm* Proj = nullptr; ///< length: nproj(only store radial function )
 
 	const double& get_rcut_max() const { return rcut_max; }
     const int& get_nproj() const { return nproj; }
diff --git a/source/source_basis/module_ao/ORB_nonlocal_lm.h b/source/source_basis/module_ao/ORB_nonlocal_lm.h
index 11e00d2470..5ff1378b08 100644
--- a/source/source_basis/module_ao/ORB_nonlocal_lm.h
+++ b/source/source_basis/module_ao/ORB_nonlocal_lm.h
@@ -85,11 +85,11 @@ class Numerical_Nonlocal_Lm
 	double kcut;
 	double dk;
 
-	double* r_radial; //points of r
+	double* r_radial = nullptr; //points of r
 	double* k_radial = nullptr;
 
 	double* rab = nullptr;
-	double* beta_r; // |beta(r) * r>
+	double* beta_r = nullptr; // |beta(r) * r>
 	double* beta_k = nullptr;
 };
 
diff --git a/source/source_basis/module_ao/ORB_read.cpp b/source/source_basis/module_ao/ORB_read.cpp
index 50d7e70eb9..a6a0bb21d5 100644
--- a/source/source_basis/module_ao/ORB_read.cpp
+++ b/source/source_basis/module_ao/ORB_read.cpp
@@ -466,10 +466,10 @@ void LCAO_Orbitals::read_orb_file(std::ofstream& ofs_in, // GlobalV::ofs_running
         {
             ofs_in << " " << std::setw(12) << count + 1 << std::setw(3) << L << std::setw(3) << N;
 
-            double* radial; // radial mesh
-            double* psi;    // radial local orbital
+            double* radial = nullptr; // radial mesh
+            double* psi = nullptr;    // radial local orbital
             double* psir;   // psi * r
-            double* rab;    // dr
+            double* rab = nullptr;    // dr
 
             // set the number of mesh and the interval distance.
             ofs_in << std::setw(8) << meshr << std::setw(8) << dr;
diff --git a/source/source_cell/setup_nonlocal.h b/source/source_cell/setup_nonlocal.h
index 0ae8e2df03..4ac214c97e 100644
--- a/source/source_cell/setup_nonlocal.h
+++ b/source/source_cell/setup_nonlocal.h
@@ -14,8 +14,8 @@ class InfoNonlocal
 		///
 		///NON-LOCAL part for LCAO
 		///
-		Numerical_Nonlocal* Beta;/// nonlocal projectors (1-dimension array)
-		int *nproj; //mohan add 2010-12-19
+		Numerical_Nonlocal* Beta = nullptr;/// nonlocal projectors (1-dimension array)
+		int * nproj = nullptr; //mohan add 2010-12-19
 		int nprojmax; // mohan add 2010-03-07
 		double rcutmax_Beta;	//caoyu add 2021-05-24
 		const double& get_rcutmax_Beta(void) const { return rcutmax_Beta; }
diff --git a/source/source_esolver/esolver_fp.h b/source/source_esolver/esolver_fp.h
index a57af07901..59fd79a994 100644
--- a/source/source_esolver/esolver_fp.h
+++ b/source/source_esolver/esolver_fp.h
@@ -60,8 +60,8 @@ class ESolver_FP: public ESolver
     //!          charge density and potential are defined on dense grids,
     //!          but effective potential needs to be interpolated on smooth grids in order to compute Veff|psi>
     ModulePW::PW_Basis* pw_rho = nullptr;
-    ModulePW::PW_Basis* pw_rhod;    //! dense grid for USPP
-    ModulePW::PW_Basis_Big* pw_big; ///< [temp] pw_basis_big class
+    ModulePW::PW_Basis* pw_rhod = nullptr;    //! dense grid for USPP
+    ModulePW::PW_Basis_Big* pw_big = nullptr; ///< [temp] pw_basis_big class
 
     //! parallel for rho grid
     Parallel_Grid Pgrid;
diff --git a/source/source_hsolver/module_genelpa/utils.cpp b/source/source_hsolver/module_genelpa/utils.cpp
index 6654c33b9e..ed79a5790a 100644
--- a/source/source_hsolver/module_genelpa/utils.cpp
+++ b/source/source_hsolver/module_genelpa/utils.cpp
@@ -94,7 +94,7 @@ void loadMatrix(const char FileName[], int nFull, double* a, int* desca, int bla
     if (myid == ROOT_PROC)
         matrixFile.open(FileName);
 
-    double* b; // buffer
+    double* b = nullptr; // buffer
     const int MAX_BUFFER_SIZE = 1e9; // max buffer size is 1GB
 
     int N = nFull;
@@ -179,7 +179,7 @@ void saveMatrix(const char FileName[], int nFull, double* a, int* desca, int bla
         matrixFile.width(24);
     }
 
-    double* b; // buffer
+    double* b = nullptr; // buffer
     const int MAX_BUFFER_SIZE = 1e9; // max buffer size is 1GB
 
     int N = nFull;
diff --git a/source/source_lcao/force_stress_arrays.h b/source/source_lcao/force_stress_arrays.h
index 725665f5c8..6d9b726b2d 100644
--- a/source/source_lcao/force_stress_arrays.h
+++ b/source/source_lcao/force_stress_arrays.h
@@ -44,7 +44,7 @@ class ForceStressArrays
     // r_mu - r_nu
     //----------------------------------------
 
-    double* DH_r;//zhengdy added 2017-07
+    double* DH_r = nullptr;//zhengdy added 2017-07
 
     double* stvnl11 = nullptr;
     double* stvnl12 = nullptr;
diff --git a/source/source_lcao/module_ri/ewald_Vq.h b/source/source_lcao/module_ri/ewald_Vq.h
index bbd2cff5d4..4f53a80a70 100644
--- a/source/source_lcao/module_ri/ewald_Vq.h
+++ b/source/source_lcao/module_ri/ewald_Vq.h
@@ -83,7 +83,7 @@ class Ewald_Vq
     double ccp_rmesh_times;
     LRI_CV<Tdata> cv;
     Gaussian_Abfs gaussian_abfs;
-    const K_Vectors* p_kv;
+    const K_Vectors* p_kv = nullptr;
     std::vector<ModuleBase::Vector3<double>> kvec_c;
     // std::vector<double> wk;
     MPI_Comm mpi_comm;
diff --git a/source/source_lcao/module_rt/propagator_cn2.cpp b/source/source_lcao/module_rt/propagator_cn2.cpp
index 3f85ed26a0..8563dee8e3 100644
--- a/source/source_lcao/module_rt/propagator_cn2.cpp
+++ b/source/source_lcao/module_rt/propagator_cn2.cpp
@@ -388,10 +388,10 @@ void Propagator::compute_propagator_cn2_tensor(const int nlocal,
 
     // 5. QR Factorization of A (Denominator)
     int64_t tau_size = m_global + nb;
-    void* d_tau;
+    void* d_tau = nullptr;
     cudaMallocAsync(&d_tau, tau_size * sizeof(std::complex<double>), cublas_res.stream);
 
-    int* d_info;
+    int* d_info = nullptr;
     cudaMallocAsync(&d_info, sizeof(int), cublas_res.stream);
     cudaMemsetAsync(d_info, 0, sizeof(int), cublas_res.stream);
 
diff --git a/source/source_md/md_base.h b/source/source_md/md_base.h
index c235df3d17..ccc919ba89 100644
--- a/source/source_md/md_base.h
+++ b/source/source_md/md_base.h
@@ -75,7 +75,7 @@ class MD_base
     int step_;                          ///< the MD step finished in current calculation
     int step_rst_;                      ///< the MD step finished in previous calculations
     int frozen_freedom_;                ///< the fixed freedom of the system
-    double* allmass;                    ///< atom mass
+    double* allmass = nullptr;                    ///< atom mass
     ModuleBase::Vector3<double>* pos;   ///< atom displacements  liuyu modify 2023-03-22
     ModuleBase::Vector3<double>* vel;   ///< atom velocity
     ModuleBase::Vector3<int>* ionmbl;   ///< atom is frozen or not
diff --git a/source/source_md/nhchain.h b/source/source_md/nhchain.h
index bdbcf08b2a..e338f2bb35 100644
--- a/source/source_md/nhchain.h
+++ b/source/source_md/nhchain.h
@@ -73,10 +73,10 @@ class Nose_Hoover : public MD_base
 
     int tdof;           ///< particle degree of freedom
     double t_target=0.0;///< target temperature
-    double* mass_eta;   ///< mass of thermostats coupled with particles
-    double* eta;        ///< position of thermostats coupled with particles
-    double* v_eta;      ///< velocity of thermostats coupled with particles
-    double* g_eta;      ///< acceleration of thermostats coupled with particles
+    double* mass_eta = nullptr;   ///< mass of thermostats coupled with particles
+    double* eta = nullptr;        ///< position of thermostats coupled with particles
+    double* v_eta = nullptr;      ///< velocity of thermostats coupled with particles
+    double* g_eta = nullptr;      ///< acceleration of thermostats coupled with particles
 
     int npt_flag;                ///< whether NPT ensemble
     double mass_omega[6];        ///< mass of lattice component
@@ -89,10 +89,10 @@ class Nose_Hoover : public MD_base
     double p_target[6];          ///< target stress components
     double p_hydro = 0.0;        ///< target hydrostatic target pressure
     double p_current[6] = {0.0}; ///< current stress after coupled
-    double* mass_peta;           ///< mass of thermostats coupled with barostat
-    double* peta;                ///< position of thermostats coupled with barostat
-    double* v_peta;              ///< velocity of thermostats coupled with barostat
-    double* g_peta;              ///< acceleration of thermostats coupled with barostat
+    double* mass_peta = nullptr;           ///< mass of thermostats coupled with barostat
+    double* peta = nullptr;                ///< position of thermostats coupled with barostat
+    double* v_peta = nullptr;              ///< velocity of thermostats coupled with barostat
+    double* g_peta = nullptr;              ///< acceleration of thermostats coupled with barostat
     double mtk_term=0;           ///< mtk correction
     double md_tfreq;             ///< Oscillation frequency, used to determine qmass of thermostats coupled with particles
     double md_pfirst;            ///< Initial pressure
diff --git a/source/source_pw/module_pwdft/vl_pw.h b/source/source_pw/module_pwdft/vl_pw.h
index afc323d3e5..0a0f0789db 100644
--- a/source/source_pw/module_pwdft/vl_pw.h
+++ b/source/source_pw/module_pwdft/vl_pw.h
@@ -24,11 +24,11 @@ class pseudopot_cell_vl
                    const ModulePW::PW_Basis* rho_basis);
 
     ModuleBase::matrix vloc;   //(ntype,ngl),the local potential for each atom type(ntype,ngl)
-	bool *numeric; //[ntype], =true
+	bool * numeric = nullptr; //[ntype], =true
 
 private:
 
-	double *zp;   // (npsx),the charge of the pseudopotential
+	double * zp = nullptr;   // (npsx),the charge of the pseudopotential
 
 	void allocate(const UnitCell& ucell,
                   const int ngg);
diff --git a/source/source_relax/bfgs_basic.h b/source/source_relax/bfgs_basic.h
index 777e6c78eb..85de10474a 100644
--- a/source/source_relax/bfgs_basic.h
+++ b/source/source_relax/bfgs_basic.h
@@ -26,12 +26,12 @@ class BFGS_Basic
     void reset_hessian(void);
     void save_bfgs(void);
 
-    double* pos;  // std::vector containing 3N coordinates of the system ( x )
-    double* grad; // std::vector containing 3N components of ( grad( V(x) ) )
-    double* move; // pos = pos_p + move.
+    double* pos = nullptr;  // std::vector containing 3N coordinates of the system ( x )
+    double* grad = nullptr; // std::vector containing 3N components of ( grad( V(x) ) )
+    double* move = nullptr; // pos = pos_p + move.
 
-    double* pos_p;  // p: previous
-    double* grad_p; // p: previous
+    double* pos_p = nullptr;  // p: previous
+    double* grad_p = nullptr; // p: previous
     double* move_p = nullptr;
 
   public:                        // mohan update 2011-06-12
diff --git a/source/source_relax/lbfgs.h b/source/source_relax/lbfgs.h
index 1ad929b722..94e33fec70 100644
--- a/source/source_relax/lbfgs.h
+++ b/source/source_relax/lbfgs.h
@@ -48,7 +48,7 @@ class LBFGS
     double energy;                          ///< Current system energy
     double alpha_k;                         ///< Step size parameter
 
-    ModuleESolver::ESolver* solver;         ///< Structure solver
+    ModuleESolver::ESolver* solver = nullptr;         ///< Structure solver
     std::vector<double> steplength;//the length of atoms displacement 
     std::vector<std::vector<double>> H;//Hessian matrix
     std::vector<double> force0;//force in previous step