-
Notifications
You must be signed in to change notification settings - Fork 23
popcorn page fault handling
If the kernel needs to resolve a page fault of a migrated process, it must retrieve the page contents from the node that currently owns that particular page. I will not be describing page ownership here, but instead be focusing on how a page's contents are retrieved from a remote node.
The overall set of events that occur are
- Page fault handler is invoked on
node A
- Popcorn's hooks in hardware-specific page fault handler test whether the current process is remote via
distributed_remote_process(tsk)
- (
distributed_remote_process()
is a popcorn-specific function)
- (
- If it is a
distributed_remote_process
and itsvma
isNULL
orvma->vm_start > address
, it will synchronously attempt to fetch thevma
via aVMA_INFO_REQUEST
tonode B
(thevma
's owner) - When
node B
receives aVMA_INFO_REQUEST
it looks up this information, packages it up, and responds tonode A
with aVMA_INFO_RESPONSE
- After
node A
's begin executing its hardware-agnostic fault handling code, it will reach popcorn-specific code that again tests if process is adistributed_remote_process
- If this is the case, it will call into a popcorn-specific page-fault handler (
page_server_handle_pte_fault
) - It then determines how it should handle the localfault -- at origin, remote, or return
VM_FAULT_CONTINUE
which causes it to use the kernel's normal page fault handling mechanisms- if thread at origin (
!current->at_remote
) then executes__handle_localfault_at_origin
- if thread at remote &
- if thread at origin (
This information is best viewed in emacs org/outline mode so you can collapse and show subtrees, to do so copy the contents of the raw code block at the bottom of this page into an emacs buffer and save the file with an extension of .org
.
For every function call and conditional/loop statements, the number of asterisks is increased by one. An asterisk is removed for each returned function and at the end of a conditional block. I have noted file and page number of each interesting function call so it can be easily referenced.
Line numbers may not exactly match current file in merge branch, but it should be close
- __do_page_fault
-
*
if (distributed_remote_process(tsk) && (!vma || vma->vm_start > address) && (vma_server_fetch_vma(tsk, address) == 0) -
***
// the remainder of lines at this heading level describe what occurs in vma_server_fetch_vma -
***
struct remote_context *rc = get_task_remote(tsk); -
***
vi = __lookup_pending_vma_request(rc, addr); -
***
if !vi -
****
vi = __alloc_vma_info_request(tsk, addr, &req); -
*****
// allocates and populates vma_info_request_t *req -
****
v = __lookup_pending_vma_request(rc, addr); -
*****
// search for request in remote_context->vmas list and return any match -
***
if (req) -
****
// send PCN_KMSG_TYPE_VMA_INFO_REQUEST -
****
wait_for_completion(&vi->complete); -
**
vma = find_vma(mm, address) -
*
fault = handle_mm_fault(vma, address, flags) -
**
if is_vm_hugetlb_page(vma) -
***
ret = hugetlb_fault(vma->vm_mm, vma, address, flags) -
**
else -
***
ret = __handle_mm_fault(vma, address, flags) -
****
// packs vmf structure, allocates/sets pgd, p4d, pud, & pmd -
****
return handle_pte_fault(&vmf) -
*****
if pmd_none(*vmf->pmd) -
******
vmf->pte = NULL // leave __pte_alloc() until later because if vm_opt->fault may want to allocate huge page and if we expose page table for an instant it will be difficult to retract from concurrent faults/rmap lookups -
*****
else -
******
if pmg_devmap_trans_unstable(vmf->pmd) return 0 -
******
// a regular pmd is establesd and it cant morph into a huge pmd -
******
// b/c we hold the mmap_sem read mode and khugepaged takes it in write mode -
******
// so now it's safe to run pte_offset_map() -
******
vmf->pte = pte_offset_map(vmf->pmd, vmf->address) -
******
vmf->orig_pte *vmf->pte -
******
barrier() -
******
if pte_none(vmf->orig_pte) -
*******
pte_unmap(vmf->pte) -
*******
vmf->pte = NULL -
*****
if distributed_process(current) -
******
int ret = page_server_handle_pte_fault(vmf) -
*******
// what folllows in this subtree is all popcorn-specific -
*******
if !current->at_remote -
********
return __handle_localfault_at_origin(vmf) -
*******
if pte_none(vmf->orig_pte) && (vmf->vma->vm->flags & VM_EXEC) -
********
return VM_FAULT_CONTINUE -
*******
if pte_none(vmf->orig_pte) && (!vma_is_anonymous(vmf->vma) && ((vmf->vma->vm_flags & (VM_WRITE | VM_SHARED)) == 0)) -
********
return VM_FAULT_CONTINUE -
*******
if (!pte_present(vmf->orig_pte)) || (vmf->vma->vm_flags & VM_WRITE) && fault_for_write(vmf->flags) && !pte_write(vmf->orig_pte) -
********
return __handle_localfault_at_remote(vmf) -
*********
if (!pte_same(*vmf->pte, vmf->orig_pte)) -
**********
return 0 // fault already handeled -
*********
fh = __start_fault_handling(current, addr, vmf->flags, ptl, &leader) -
*********
if (!leader) return fh->ret; -
*********
if pte_none(*vmf->pte) || !(page = vm_normal_page(vmf->vma, addr, *vmf->pte)) -
**********
page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vmf->vma, addr) -
**********
BUG_ON(!page) -
**********
populated = true -
*********
get_page(page) -
*********
rp = __fetch_page_from_origin(current, vmf->vma, addr, vmf->flags, page) -
**********
__request_remote_page(tsk, tsk->origin_nid, tsk->origin_pid, addr, fault_flags, ws->id, &rh); -
***********
// sets up popcorn kmsg in remote_page_request_t *req; object -
***********
pcn_kmsg_post(PCN_KMSG_TYPE_REMOTE_PAGE_REQUEST, from_nid, req, sizeof(*req)); -
***********
return 0 -
**********
rp = wait_at_station(ws); // rp will containt remote_page_response_t -
***********
// waits for completion or timeout -
**********
if (rp->result == 0) -
***********
paddr = kmap(page) -
***********
copy_to_user_page(....) -
***********
__SetPageUptodate(page) -
***********
return rp -
*********
if rp->result == VM_FAULT_CONTINUE -
**********
BUG_ON(populated) -
**********
entry = pte_make_valid(*vmf->pte); -
**********
mkwrite, mkdirty, wrprotect, mkyoung, update_mmu_cache as needed -
*********
else -
**********
if (populated) alloc_set_pte(vmf, memcg, page) -
**********
else __make_pte(valid(vmf->vma->vm_mm, vmf->vma, addr, vmf->flags, vmf->pte) -
*********
SetPageDistributed(vmf->vma->vm_mm, addr) -
*********
set_page_owner(my_nid, vmf->vma->vm_mm, addr); -
*********
put_page(page) -
*********
return 0 -
*******
return 0 -
******
// retry/backoff as needed -
******
if ret != VM_FAULT_CONTINUE return ret; -
*****
// .. what follows is the code that handles normal/local page faults -
**
return ret
On reception of PCN_KMSG_TYPE_REMOTE_PAGE_REQUEST,
-
*
process_remote_page_request(work) -
**
// unpacks response into remote_page_response_t *res; -
**
tsk = __get_task_struct(req->remote_pid); -
**
mm = get_task_mm(tsk); -
**
vma = find_vma(mm, req->addr); -
**
if (tsk->at_remote) -
***
res->result = __handle_remotefault_at_remote(tsk, mm, vma, req, res); // sets up remote_page_response -
**
else -
***
res->result = __handle_remotefault_at_origin(tsk, mm, vma, req, res); // sets up remote_page_response -
****
pte = __get_pte_at_alloc(mm, vma, addr, &pmd, &ptl) -
****
if (pte_none(*pte)) -
*****
ret = handle_pte_fault_origin(mm, vma, addr, pte, pmd, fault_flags) -
*****
if ret & VM_FAULT_RETRY: return VM_FAULT_RETRY -
****
fh = __start_fault_handling(tsk, addr, fault_flags, ptl, &leader) -
****
if (!fh) return VM_FAULT_RETRY // indicates same page is handled @ origin and might cause this node to be recursively blocked -
****
page = get_normal_page(vma, addr, pte) -
****
BUG_ON(!page) -
****
if (leader) -
*****
if test_page_owner(from_nid, mm, addr) -
******
__claim_local_page(tsk, addr, my_nid); -
*******
// for each popcorn node, revoke page ownership as needed -
********
__revoke_page_ownership(tsk, nid, pid, addr, ws->id); -
*********
// sets up and sends PCN_KMSG_TYPE_PAGE_INVALIDATE_REQUEST -
*******
wait_at_station(ws); -
******
BUG_ON(fault_for_read(fault_flags) && "Read fault from owner??") -
*****
else -
******
if !page_is_mine(mm, addr) -
*******
__claim_remote_page(tsk, addr, from_nid) -
********
// for each active note, it requests or revokes page ownership as needed -
********
if (from-- == 0) -
*********
__request_remote_page(tsk, nid, pid, addr, fault_flags, ws->id, &rh); -
**********
// sets up and sends PCN_KMSG_TYPE_REMOTE_PAGE_REQUEST, -
********
else if (fault_for_write(fault_flags) -
*********
__revoke_page_ownership(tsk, nid, pid, addr, ws->id); -
**********
// sets up and sends PCN_KMSG_TYPE_PAGE_INVALIDATE_REQUEST -
********
else if fault_for_write(fault_flags) -
********
rp = wait_at_station(ws); -
********
copy_to_user_page(vma, page, addr, paddr, rp->page, PAGE_SIZE) -
******
else -
*******
__claim_local_page(tsk, addr, my_nid); -
*****
SetPageDistributed(mm, addr); -
*****
set_page_owner(from_nid, mm, addr); -
*****
// also set/clear page owner, make pte invalid/valid, wrprotect, update_mmu_cache -- as needed -
****
return grant ? VM_FAULT_CONTINUE : 0; -
**
// packs up res message -
**
pcn_kmsg_post(res_type, from_nid, res, res_size); // sends response
When handling a VMA info request
-
*
process_vma_info_request(req) -
**
// allocate and start setting up vma_info_response_t *res -
**
mm = get_task_mm(current) -
**
vma = find_vma(mm, addr) -
**
// continue to fill out res -
**
// send PCN_KMSG_TYPE_VMA_INFO_RESPONSE
Copy the text below into emacs and view via org-mode. For best results either make a symlink at /home/user/popcorn/linux-x86/
pointing to the root directory of the popcorn merge branch, or change all occurances of /home/user/popcorn/linux-x86/
to the root dir of a local copy of the popcorn merge branch. If setup properly, emacs will treat those statments as hyperlinks and open that file/line number in another buffer when clicked.
popcorn page fault handling local/remote
[[__do_page_fault|/home/user/popcorn/linux-x86/arch/x86/mm/fault.c::1216]]
* if (distributed_remote_process(tsk) && (!vma || vma->vm_start > address) && (vma_server_fetch_vma(tsk, address) == 0)
*** // the remainder of lines at this heading level describe what occurs in vma_server_fetch_vma
*** struct remote_context *rc = get_task_remote(tsk);
*** vi = __lookup_pending_vma_request(rc, addr);
*** if !vi
**** vi = __alloc_vma_info_request(tsk, addr, &req);
***** // allocates and populates vma_info_request_t *req
**** v = __lookup_pending_vma_request(rc, addr);
***** // search for request in remote_context->vmas list and return any match
*** if (req)
**** // send PCN_KMSG_TYPE_VMA_INFO_REQUEST
**** wait_for_completion(&vi->complete);
** vma = find_vma(mm, address)
* fault = [[handle_mm_fault|/home/user/popcorn/linux-x86/mm/memory.c::4298]](vma, address, flags)
** if is_vm_hugetlb_page(vma)
*** ret = hugetlb_fault(vma->vm_mm, vma, address, flags)
** else
*** ret = [[__handle_mm_fault|/home/user/popcorn/linux-x86/mm/memory.c::4209]](vma, address, flags)
**** // packs vmf structure, allocates/sets pgd, p4d, pud, & pmd
**** return [[handle_pte_fault|/home/user/popcorn/linux-x86/mm/memory.c::3947]](&vmf)
***** if pmd_none(*vmf->pmd)
****** vmf->pte = NULL // leave __pte_alloc() until later because if vm_opt->fault may want to allocate huge page and if we expose page table for an instant it will be difficult to retract from concurrent faults/rmap lookups
***** else
****** if pmg_devmap_trans_unstable(vmf->pmd) return 0
****** // a regular pmd is establesd and it cant morph into a huge pmd
****** // b/c we hold the mmap_sem read mode and khugepaged takes it in write mode
****** // so now it's safe to run pte_offset_map()
****** vmf->pte = pte_offset_map(vmf->pmd, vmf->address)
****** vmf->orig_pte *vmf->pte
****** barrier()
****** if pte_none(vmf->orig_pte)
******* pte_unmap(vmf->pte)
******* vmf->pte = NULL
***** if distributed_process(current)
****** int ret = [[page_server_handle_pte_fault|/home/user/popcorn/linux-x86/kernel/popcorn/page_server.c::1863]](vmf)
******* // what folllows in this subtree is all popcorn-specific
******* if !current->at_remote
******** return [[__handle_localfault_at_origin|/home/user/popcorn/linux-x86/kernel/popcorn/page_server.c::1757]](vmf)
******* if pte_none(vmf->orig_pte) && (vmf->vma->vm->flags & VM_EXEC)
******** return VM_FAULT_CONTINUE
******* if pte_none(vmf->orig_pte) && (!vma_is_anonymous(vmf->vma) && ((vmf->vma->vm_flags & (VM_WRITE | VM_SHARED)) == 0))
******** return VM_FAULT_CONTINUE
******* if (!pte_present(vmf->orig_pte)) || (vmf->vma->vm_flags & VM_WRITE) && fault_for_write(vmf->flags) && !pte_write(vmf->orig_pte)
******** return [[__handle_localfault_at_remote|/home/user/popcorn/linux-x86/kernel/popcorn/page_server.c::1607]](vmf)
********* if (!pte_same(*vmf->pte, vmf->orig_pte))
********** return 0 // fault already handeled
********* fh = __start_fault_handling(current, addr, vmf->flags, ptl, &leader)
********* if (!leader) return fh->ret;
********* if pte_none(*vmf->pte) || !(page = vm_normal_page(vmf->vma, addr, *vmf->pte))
********** page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vmf->vma, addr)
********** BUG_ON(!page)
********** populated = true
********* get_page(page)
********* rp = [[__fetch_page_from_origin|/home/user/popcorn/linux-x86/kernel/popcorn/page_server.c::1050]](current, vmf->vma, addr, vmf->flags, page)
********** [[__request_remote_page|/home/user/popcorn/linux-x86/kernel/popcorn/page_server.c::1011]](tsk, tsk->origin_nid, tsk->origin_pid, addr, fault_flags, ws->id, &rh);
*********** // sets up popcorn kmsg in remote_page_request_t *req; object
*********** pcn_kmsg_post(PCN_KMSG_TYPE_REMOTE_PAGE_REQUEST, from_nid, req, sizeof(*req));
*********** return 0
********** rp = [[wait_at_station|/home/user/popcorn/linux-x86/kernel/popcorn/wait_station.c::59]](ws); // rp will containt remote_page_response_t
*********** // waits for completion or timeout
********** if (rp->result == 0)
*********** paddr = kmap(page)
*********** copy_to_user_page(....)
*********** __SetPageUptodate(page)
*********** return rp
********* if rp->result == VM_FAULT_CONTINUE
********** BUG_ON(populated)
********** entry = pte_make_valid(*vmf->pte);
********** mkwrite, mkdirty, wrprotect, mkyoung, update_mmu_cache as needed
********* else
********** if (populated) alloc_set_pte(vmf, memcg, page)
********** else __make_pte(valid(vmf->vma->vm_mm, vmf->vma, addr, vmf->flags, vmf->pte)
********* SetPageDistributed(vmf->vma->vm_mm, addr)
********* set_page_owner(my_nid, vmf->vma->vm_mm, addr);
********* put_page(page)
********* return 0
******* return 0
****** // retry/backoff as needed
****** if ret != VM_FAULT_CONTINUE return ret;
***** // .. what follows is the code that handles normal/local page faults
** return ret
On reception of PCN_KMSG_TYPE_REMOTE_PAGE_REQUEST,
* [[process_remote_page_request|/home/user/popcorn/linux-x86/kernel/popcorn/page_server.c::1433]](work)
** // unpacks response into remote_page_response_t *res;
** tsk = __get_task_struct(req->remote_pid);
** mm = get_task_mm(tsk);
** vma = find_vma(mm, req->addr);
** if (tsk->at_remote)
*** res->result = [[__handle_remotefault_at_remote|/home/user/popcorn/linux-x86/kernel/popcorn/page_server.c::1236]](tsk, mm, vma, req, res); // sets up remote_page_response
** else
*** res->result = [[__handle_remotefault_at_origin|/home/user/popcorn/linux-x86/kernel/popcorn/page_server.c::1311]](tsk, mm, vma, req, res); // sets up remote_page_response
**** pte = __get_pte_at_alloc(mm, vma, addr, &pmd, &ptl)
**** if (pte_none(*pte))
***** ret = handle_pte_fault_origin(mm, vma, addr, pte, pmd, fault_flags)
***** if ret & VM_FAULT_RETRY: return VM_FAULT_RETRY
**** fh = __start_fault_handling(tsk, addr, fault_flags, ptl, &leader)
**** if (!fh) return VM_FAULT_RETRY // indicates same page is handled @ origin and might cause this node to be recursively blocked
**** page = get_normal_page(vma, addr, pte)
**** BUG_ON(!page)
**** if (leader)
***** if test_page_owner(from_nid, mm, addr)
****** [[__claim_local_page|/home/user/popcorn/linux-x86/kernel/popcorn/page_server.c::1151]](tsk, addr, my_nid);
******* // for each popcorn node, revoke page ownership as needed
******** [[__revoke_page_ownership|/home/user/popcorn/linux-x86/kernel/popcorn/page_server.c::919]](tsk, nid, pid, addr, ws->id);
********* // sets up and sends PCN_KMSG_TYPE_PAGE_INVALIDATE_REQUEST
******* wait_at_station(ws);
****** BUG_ON(fault_for_read(fault_flags) && "Read fault from owner??")
***** else
****** if !page_is_mine(mm, addr)
******* [[__claim_remote_page|/home/user/popcorn/linux-x86/kernel/popcorn/page_server.c::1077]](tsk, addr, from_nid)
******** // for each active note, it requests or revokes page ownership as needed
******** if (from-- == 0)
********* [[__request_remote_page|/home/user/popcorn/linux-x86/kernel/popcorn/page_server.c::1011]](tsk, nid, pid, addr, fault_flags, ws->id, &rh);
********** // sets up and sends PCN_KMSG_TYPE_REMOTE_PAGE_REQUEST,
******** else if (fault_for_write(fault_flags)
********* [[__revoke_page_ownership|/home/user/popcorn/linux-x86/kernel/popcorn/page_server.c::919]](tsk, nid, pid, addr, ws->id);
********** // sets up and sends PCN_KMSG_TYPE_PAGE_INVALIDATE_REQUEST
******** else if fault_for_write(fault_flags)
******** rp = wait_at_station(ws);
******** copy_to_user_page(vma, page, addr, paddr, rp->page, PAGE_SIZE)
****** else
******* [[__claim_local_page|/home/user/popcorn/linux-x86/kernel/popcorn/page_server.c::1151]](tsk, addr, my_nid);
***** SetPageDistributed(mm, addr);
***** set_page_owner(from_nid, mm, addr);
***** // also set/clear page owner, make pte invalid/valid, wrprotect, update_mmu_cache -- as needed
**** return grant ? VM_FAULT_CONTINUE : 0;
** // packs up res message
** pcn_kmsg_post(res_type, from_nid, res, res_size); // sends response
When handling a VMA info request
* [[process_vma_info_request|/home/user/popcorn/linux-x86/kernel/popcorn/vma_server.c::614]](req)
** // allocate and start setting up vma_info_response_t *res
** mm = get_task_mm(current)
** vma = find_vma(mm, addr)
** // continue to fill out res
** // send PCN_KMSG_TYPE_VMA_INFO_RESPONSE