GPUDirect Storage kernel driver (nvidia-fs) ver-2.22.3 commit

NVIDIA · Nov 16, 2024 · 051bf51 · 051bf51
1 parent 7b080f3
commit 051bf51
Show file tree

Hide file tree

Showing 7 changed files with 101 additions and 47 deletions.
diff --git a/ChangeLog b/ChangeLog
@@ -1,3 +1,5 @@
+nvidia-fs (2.22.3) RELEASE; urgency=low
+  * Switch to nvidia_p2p_get_pages_persistent() call for baremetal on x86
 nvidia-fs (2.20.5) RELEASE; urgency=low
   * Memset sglist beyond blk_rq_nr_phys_segments to avoid memory corruption
 

diff --git a/src/GDS_VERSION b/src/GDS_VERSION
@@ -1 +1 @@
-1.10.0.4
+1.11.0.20
diff --git a/src/nvfs-core.c b/src/nvfs-core.c
@@ -100,6 +100,7 @@ int nvfs_info_enabled = 1;
 int nvfs_rw_stats_enabled = 0;
 int nvfs_peer_stats_enabled = 0;
 unsigned int nvfs_max_devices = MAX_NVFS_DEVICES;
+int nvfs_use_legacy_p2p_allocation = 1;
 
 // for storing real device count
 static unsigned int nvfs_curr_devices = 1;
@@ -1200,12 +1201,22 @@ static int nvfs_unpin_gpu_pages(struct nvfs_gpu_args *gpu_info)
 
 			nvfs_update_free_gpustat(gpu_info);
 
-			ret = nvfs_nvidia_p2p_put_pages(0, 0, gpu_page_start,
-					gpu_info->page_table);
-			if (ret) {
-				nvfs_err("%s:%d error while calling "
-						"put_pages\n",
-						__func__, __LINE__);
+			if (gpu_info->use_legacy_p2p_allocation) {
+				ret = nvfs_nvidia_p2p_put_pages(0, 0, gpu_page_start,
+						gpu_info->page_table);
+				if (ret) {
+					nvfs_err("%s:%d error while calling "
+							"put_pages\n",
+							__func__, __LINE__);
+				}
+			} else {
+				ret = nvfs_nvidia_p2p_put_pages_persistent(gpu_page_start,
+						gpu_info->page_table, 0);
+				if (ret) {
+					nvfs_err("%s:%d error while calling "
+							"put_pages_persistent\n",
+							__func__, __LINE__);
+				}
 			}
 		}
 	}
@@ -1279,22 +1290,42 @@ static int nvfs_pin_gpu_pages(nvfs_ioctl_map_t *input_param,
 	atomic_set(&gpu_info->dma_mapping_in_progress, 0);
 	hash_init(gpu_info->buckets);
 
-	nvfs_dbg("Invoking p2p_get_pages pages (0x%lx - 0x%lx) "
-		 "rounded size %lx\n",
-		 (unsigned long)gpu_virt_start,
-		 (unsigned long)gpu_virt_end, (unsigned long)rounded_size);
+
+	if (nvfs_use_legacy_p2p_allocation) {
+		gpu_info->use_legacy_p2p_allocation = 1;
+		nvfs_dbg("Invoking p2p_get_pages (0x%lx - 0x%lx) "
+			 "rounded size %lx\n",
+			 (unsigned long)gpu_virt_start,
+			 (unsigned long)gpu_virt_end, (unsigned long)rounded_size);
+
+		ret = nvfs_nvidia_p2p_get_pages(0, 0, gpu_virt_start, rounded_size,
+					&gpu_info->page_table,
+					nvfs_get_pages_free_callback, nvfs_mgroup);
+		if (ret < 0) {
+			nvfs_err("%s:%d Error ret %d invoking nvidia_p2p_get_pages\n "
+					"va_start=0x%llx/va_end=0x%llx/"
+					"rounded_size=0x%lx/gpu_buf_length=0x%llx\n",
+					__func__, __LINE__, ret,
+					gpu_virt_start, gpu_virt_end,
+					rounded_size, gpu_buf_len);
+			goto error;
+		}
+	} else {
+		nvfs_dbg("Invoking nvidia_p2p_get_pages_persistent (0x%lx - 0x%lx) "
+			 "rounded size %lx\n",
+			 (unsigned long)gpu_virt_start,
+			 (unsigned long)gpu_virt_end, (unsigned long)rounded_size);
 
-	ret = nvfs_nvidia_p2p_get_pages(0, 0, gpu_virt_start, rounded_size,
-			       &gpu_info->page_table,
-                               nvfs_get_pages_free_callback, nvfs_mgroup);
-	if (ret < 0) {
-		nvfs_err("%s:%d Error ret %d invoking nvidia_p2p_get_pages\n "
-				"va_start=0x%llx/va_end=0x%llx/"
-				"rounded_size=0x%lx/gpu_buf_length=0x%llx\n",
-				__func__, __LINE__, ret,
-				gpu_virt_start, gpu_virt_end,
-				rounded_size, gpu_buf_len);
-		goto error;
+		ret = nvfs_nvidia_p2p_get_pages_persistent(gpu_virt_start, rounded_size, &gpu_info->page_table, 0);
+		if (ret < 0) {
+			nvfs_err("%s:%d Error ret %d invoking nvidia_p2p_get_pages_persistent\n "
+					"va_start=0x%llx/va_end=0x%llx/"
+					"rounded_size=0x%lx/gpu_buf_length=0x%llx\n",
+					__func__, __LINE__, ret,
+					gpu_virt_start, gpu_virt_end,
+					rounded_size, gpu_buf_len);
+			goto error;
+		}
 	}
 
         nvfs_dbg("GPU page table entries: %d\n", gpu_info->page_table->entries);
@@ -1338,13 +1369,21 @@ static int nvfs_pin_gpu_pages(nvfs_ioctl_map_t *input_param,
 				 "version 0x%08x\n",
 				__func__, __LINE__,
 				gpu_info->page_table->version);
-		else if (is_invalid_page_size)
-			nvfs_err("%s:%d nvidia_p2p_get_pages "
-				 "assumption of 64KB pages failed "
-				 "size_id=%d\n",
-				 __func__, __LINE__,
-				 gpu_info->page_table->page_size);
-		else
+		else if (is_invalid_page_size){
+			if (gpu_info->use_legacy_p2p_allocation) {
+				nvfs_err("%s:%d nvidia_p2p_get_pages"
+					 "assumption of 64KB pages failed "
+					 "size_id=%d\n",
+					 __func__, __LINE__,
+					 gpu_info->page_table->page_size);
+			} else {
+				nvfs_err("%s:%d nvidia_p2p_get_pages_persistent "
+					 "assumption of 64KB pages failed "
+					 "size_id=%d\n",
+					 __func__, __LINE__,
+					 gpu_info->page_table->page_size);
+			}
+		} else
 			nvfs_err("%s:%d nvfs_invalid_p2p_get_page "
 				 "fault trigger\n",
 				 __func__, __LINE__);
@@ -1709,10 +1748,11 @@ struct nvfs_io* nvfs_io_init(int op, nvfs_ioctl_ioargs_t *ioargs)
 
 	gpu_virt_start  = (gpu_info->gpuvaddr & GPU_PAGE_MASK);
         va_offset = ((u64)gpu_info->gpuvaddr - gpu_virt_start) +
-		file_args->devptroff;
-	nvfs_dbg("gpuvaddr : %llu, gpu_virt_start : %llu, devptroff : %llu, va_offset : %llu\n",
-			(u64)gpu_info->gpuvaddr, (u64)gpu_virt_start, (u64) file_args->devptroff, va_offset);
-
+						file_args->devptroff;
+	nvfs_dbg("gpuvaddr : %llu, gpu_virt_start : %llu, devptroff : %llu, va_offset : %llu\n", 
+			(u64)gpu_info->gpuvaddr, (u64)gpu_virt_start, (u64) file_args->devptroff, va_offset); 
+	//if (offset_in_page(va_offset)) {
+	// TODO :: PP : Verify 
 	if (va_offset % NVFS_BLOCK_SIZE) {
 		nvfs_err("gpu_va_offset not aligned va_offset %ld "
 			"devptroff %ld\n",
@@ -2404,6 +2444,13 @@ static int __init nvfs_init(void)
 {
 	int i;
 
+	#if defined(CONFIG_X86_64)
+	if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)){
+		// X86 and not a VM
+		nvfs_use_legacy_p2p_allocation = 0;
+	}
+	#endif
+
 	pr_info("nvidia_fs: Initializing nvfs driver module\n");
 
 	major_number = register_chrdev(0, DEVICE_NAME, &nvfs_dev_fops);
@@ -2514,3 +2561,5 @@ module_param_named(peer_stats_enabled, nvfs_peer_stats_enabled, uint, S_IWUSR |
 MODULE_PARM_DESC(nvfs_peer_stats_enabled, "enable peer stats");
 module_param_named(rw_stats_enabled, nvfs_rw_stats_enabled, uint, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(nvfs_rw_stats_enabled, "enable read-write stats");
+module_param_named(use_legacy_p2p_allocation, nvfs_use_legacy_p2p_allocation, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(nvfs_use_legacy_p2p_allocation, "Use legacy p2p allocation");
diff --git a/src/nvfs-dma.c b/src/nvfs-dma.c
@@ -424,18 +424,18 @@ static int nvfs_blk_rq_map_sg_internal(struct request_queue *q,
 		CHECK_AND_PUT_MGROUP(nvfs_mgroup);
 		nvfs_mgroup = NULL;
 
-        if (sg != NULL) {
-            if (prev_phys_addr && is_gpu_page_contiguous(prev_phys_addr, curr_phys_addr)) {
-                //DONT allow merge at (4G - 64K) to handle possible discontiguous IOVA
-                // by SMMU
-                if((gpu_page_index == 0) ||
-                        (gpu_page_index % NVFS_P2P_MAX_CONTIG_GPU_PAGES != 0)) {
-                    sg->length += bvec.bv_len;
-                    prev_phys_addr = curr_phys_addr;
-                    continue;
-                }
-            }
-        }
+		if (sg != NULL) {
+			if (prev_phys_addr && is_gpu_page_contiguous(prev_phys_addr, curr_phys_addr)) {
+                                //DONT allow merge at (4G - 64K) to handle possible discontiguous IOVA
+                                // by SMMU
+                                if((gpu_page_index == 0) ||
+                                    (gpu_page_index % NVFS_P2P_MAX_CONTIG_GPU_PAGES != 0)) {
+                                        sg->length += bvec.bv_len;
+                                        prev_phys_addr = curr_phys_addr;
+                                        continue;
+                                }
+                        }
+		}
 
 new_segment:
 		nsegs++;

diff --git a/src/nvfs-mmap.h b/src/nvfs-mmap.h
@@ -137,6 +137,7 @@ struct nvfs_gpu_args {
 	atomic_t callback_invoked;
         wait_queue_head_t callback_wq;              // wait queue for IO completion
         bool is_bounce_buffer;			    // is this memory used for bounce buffer
+        bool use_legacy_p2p_allocation;             // Use legacy p2p_get/put_page()
 	int n_phys_chunks;			    // number of contiguous physical address range
         u64 pdevinfo;				    // pci domain(upper 4 bytes), bus, device, function for pci ranking
         unsigned int gpu_hash_index;                // cache gpu hash index for pci rank lookups 

diff --git a/src/nvfs-p2p.h b/src/nvfs-p2p.h
@@ -25,6 +25,8 @@
 
 #include "nv-p2p.h"
 
+#define nvfs_nvidia_p2p_get_pages_persistent  nvidia_p2p_get_pages_persistent
+#define nvfs_nvidia_p2p_put_pages_persistent nvidia_p2p_put_pages_persistent
 #define nvfs_nvidia_p2p_get_pages  nvidia_p2p_get_pages
 #define nvfs_nvidia_p2p_put_pages nvidia_p2p_put_pages
 #define nvfs_nvidia_p2p_dma_map_pages nvidia_p2p_dma_map_pages

diff --git a/src/nvfs-vers.h b/src/nvfs-vers.h
@@ -26,10 +26,10 @@
 
 #define NVFS_DRIVER_MAJOR_VERSION   2 //2-bytes
 
-#define NVFS_DRIVER_MINOR_VERSION   20 //2-bytes
+#define NVFS_DRIVER_MINOR_VERSION   22 //2-bytes
 
 // template for build version
-#define NVFS_DRIVER_PATCH_VERSION  5
+#define NVFS_DRIVER_PATCH_VERSION  3
 
 static inline unsigned int nvfs_driver_version(void) {
     return (NVFS_DRIVER_MAJOR_VERSION << 16) | NVFS_DRIVER_MINOR_VERSION;