Skip to content

Commit

Permalink
Add create_mcjit_execution_engine_with_memory_manager for custom MCJI…
Browse files Browse the repository at this point in the history
…T memory management (#566)

* Add a function to the Module that creates an MCJIT capable of using a custom MemoryManager

* Fix test failures caused by differing allocate_data_section call counts across LLVM versions

* refactor: Use explicit bool to i32 cast in MCJIT options

* refactor: Use MaybeUninit::zeroed() for MCJIT options initialization

* refactor: Mark c_str_to_str as unsafe due to arbitrary lifetime risk

* fix: Prevent UB by reordering Box creation in memory manager destruction
  • Loading branch information
ktanaka101 authored Feb 2, 2025
1 parent 03c0c6b commit 97128a9
Show file tree
Hide file tree
Showing 4 changed files with 589 additions and 7 deletions.
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ pub mod debug_info;
pub mod execution_engine;
pub mod intrinsics;
pub mod memory_buffer;
pub mod memory_manager;
#[deny(missing_docs)]
pub mod module;
pub mod object_file;
Expand Down
180 changes: 180 additions & 0 deletions src/memory_manager.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
use llvm_sys::prelude::LLVMBool;

/// A trait for user-defined memory management in MCJIT.
///
/// Implementors can override how LLVM's MCJIT engine allocates memory for code
/// and data sections. This is sometimes needed for:
/// - custom allocators,
/// - sandboxed or restricted environments,
/// - capturing stack map sections (e.g., for garbage collection),
/// - or other specialized JIT memory management requirements.
///
/// # StackMap and GC Integration
///
/// By examining the `section_name` argument in [`allocate_data_section`], you
/// can detect sections such as `.llvm_stackmaps` (on ELF) or `__llvm_stackmaps`
/// (on Mach-O). Recording the location of these sections may be useful for
/// custom garbage collectors. For more information, refer to the [LLVM
/// StackMaps documentation](https://llvm.org/docs/StackMaps.html#stack-map-section).
///
/// Typically, on Darwin (Mach-O), the stack map section name is `__llvm_stackmaps`,
/// and on Linux (ELF), it is `.llvm_stackmaps`.
pub trait McjitMemoryManager: std::fmt::Debug {
/// Allocates a block of memory for a code section.
///
/// # Parameters
///
/// * `size` - The size in bytes for the code section.
/// * `alignment` - The required alignment in bytes.
/// * `section_id` - A numeric ID that LLVM uses to identify this section.
/// * `section_name` - A name for this section, if provided by LLVM.
///
/// # Returns
///
/// Returns a pointer to the allocated memory. Implementors must ensure it is
/// at least `size` bytes long and meets `alignment` requirements.
fn allocate_code_section(
&mut self,
size: libc::uintptr_t,
alignment: libc::c_uint,
section_id: libc::c_uint,
section_name: &str,
) -> *mut u8;

/// Allocates a block of memory for a data section.
///
/// # Parameters
///
/// * `size` - The size in bytes for the data section.
/// * `alignment` - The required alignment in bytes.
/// * `section_id` - A numeric ID that LLVM uses to identify this section.
/// * `section_name` - A name for this section, if provided by LLVM.
/// * `is_read_only` - Whether this data section should be read-only.
///
/// # Returns
///
/// Returns a pointer to the allocated memory. Implementors must ensure it is
/// at least `size` bytes long and meets `alignment` requirements.
fn allocate_data_section(
&mut self,
size: libc::uintptr_t,
alignment: libc::c_uint,
section_id: libc::c_uint,
section_name: &str,
is_read_only: bool,
) -> *mut u8;

/// Finalizes memory permissions for all allocated sections.
///
/// This is called once all sections have been allocated. Implementors can set
/// permissions such as making code sections executable or data sections
/// read-only.
///
/// # Errors
///
/// If any error occurs (for example, failing to set page permissions),
/// return an `Err(String)`. This error is reported back to LLVM as a C string.
fn finalize_memory(&mut self) -> Result<(), String>;

/// Cleans up or deallocates resources before the memory manager is destroyed.
///
/// This is called when LLVM has finished using the memory manager. Any
/// additional allocations or references should be released here if needed.
fn destroy(&mut self);
}

/// Holds a boxed `McjitMemoryManager` and passes it to LLVM as an opaque pointer.
///
/// LLVM calls into the adapter using the extern "C" function pointers defined below.
#[derive(Debug)]
pub struct MemoryManagerAdapter {
pub memory_manager: Box<dyn McjitMemoryManager>,
}

// ------ Extern "C" Adapters ------

/// Adapter for `allocate_code_section`.
///
/// Called by LLVM with a raw pointer (`opaque`). Casts back to `MemoryManagerAdapter`
/// and delegates to `allocate_code_section`.
pub(crate) extern "C" fn allocate_code_section_adapter(
opaque: *mut libc::c_void,
size: libc::uintptr_t,
alignment: libc::c_uint,
section_id: libc::c_uint,
section_name: *const libc::c_char,
) -> *mut u8 {
let adapter = unsafe { &mut *(opaque as *mut MemoryManagerAdapter) };
let sname = unsafe { c_str_to_str(section_name) };
adapter
.memory_manager
.allocate_code_section(size, alignment, section_id, sname)
}

/// Adapter for `allocate_data_section`.
///
/// Note that `LLVMBool` is `0` for false, and `1` for true. We check `!= 0` to
/// interpret it as a bool.
pub(crate) extern "C" fn allocate_data_section_adapter(
opaque: *mut libc::c_void,
size: libc::uintptr_t,
alignment: libc::c_uint,
section_id: libc::c_uint,
section_name: *const libc::c_char,
is_read_only: LLVMBool,
) -> *mut u8 {
let adapter = unsafe { &mut *(opaque as *mut MemoryManagerAdapter) };
let sname = unsafe { c_str_to_str(section_name) };
adapter
.memory_manager
.allocate_data_section(size, alignment, section_id, sname, is_read_only != 0)
}

/// Adapter for `finalize_memory`.
///
/// If an error is returned, the message is converted into a C string and set in `err_msg_out`.
pub(crate) extern "C" fn finalize_memory_adapter(
opaque: *mut libc::c_void,
err_msg_out: *mut *mut libc::c_char,
) -> libc::c_int {
let adapter = unsafe { &mut *(opaque as *mut MemoryManagerAdapter) };
match adapter.memory_manager.finalize_memory() {
Ok(()) => 0,
Err(e) => {
let cstring = std::ffi::CString::new(e).unwrap_or_default();
unsafe {
*err_msg_out = cstring.into_raw();
}
1
},
}
}

/// Adapter for `destroy`.
///
/// Called when LLVM is done with the memory manager. Calls `destroy` and drops
/// the adapter to free resources.
pub(crate) extern "C" fn destroy_adapter(opaque: *mut libc::c_void) {
// Re-box to drop the adapter and its contents.
// SAFETY: `opaque` must have been allocated by Box<MemoryManagerAdapter>.
let mut adapter = unsafe { Box::from_raw(opaque as *mut MemoryManagerAdapter) };

// Clean up user-defined resources
adapter.memory_manager.destroy();

// Dropping `adapter` automatically frees the memory
}

/// Converts a raw C string pointer to a Rust `&str`.
///
/// # Safety
///
/// The caller must ensure `ptr` points to a valid, null-terminated UTF-8 string.
/// If the string is invalid UTF-8 or `ptr` is null, an empty string is returned.
unsafe fn c_str_to_str<'a>(ptr: *const libc::c_char) -> &'a str {
if ptr.is_null() {
""
} else {
unsafe { std::ffi::CStr::from_ptr(ptr) }.to_str().unwrap_or("")
}
}
145 changes: 139 additions & 6 deletions src/module.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,18 @@ use llvm_sys::core::LLVMGetTypeByName;

use llvm_sys::core::{
LLVMAddFunction, LLVMAddGlobal, LLVMAddGlobalInAddressSpace, LLVMAddNamedMetadataOperand, LLVMCloneModule,
LLVMDisposeModule, LLVMDumpModule, LLVMGetFirstFunction, LLVMGetFirstGlobal, LLVMGetLastFunction,
LLVMGetLastGlobal, LLVMGetModuleContext, LLVMGetModuleIdentifier, LLVMGetNamedFunction, LLVMGetNamedGlobal,
LLVMGetNamedMetadataNumOperands, LLVMGetNamedMetadataOperands, LLVMGetTarget, LLVMPrintModuleToFile,
LLVMPrintModuleToString, LLVMSetDataLayout, LLVMSetModuleIdentifier, LLVMSetTarget, LLVMDisposeMessage
LLVMDisposeMessage, LLVMDisposeModule, LLVMDumpModule, LLVMGetFirstFunction, LLVMGetFirstGlobal,
LLVMGetLastFunction, LLVMGetLastGlobal, LLVMGetModuleContext, LLVMGetModuleIdentifier, LLVMGetNamedFunction,
LLVMGetNamedGlobal, LLVMGetNamedMetadataNumOperands, LLVMGetNamedMetadataOperands, LLVMGetTarget,
LLVMPrintModuleToFile, LLVMPrintModuleToString, LLVMSetDataLayout, LLVMSetModuleIdentifier, LLVMSetTarget,
};
#[llvm_versions(7..)]
use llvm_sys::core::{LLVMAddModuleFlag, LLVMGetModuleFlag};
#[llvm_versions(13..)]
use llvm_sys::error::LLVMGetErrorMessage;
use llvm_sys::execution_engine::{
LLVMCreateExecutionEngineForModule, LLVMCreateInterpreterForModule, LLVMCreateJITCompilerForModule,
LLVMCreateSimpleMCJITMemoryManager,
};
use llvm_sys::prelude::{LLVMModuleRef, LLVMValueRef};
#[llvm_versions(13..)]
Expand All @@ -29,7 +30,7 @@ use llvm_sys::LLVMLinkage;
use llvm_sys::LLVMModuleFlagBehavior;

use std::cell::{Cell, Ref, RefCell};
use std::ffi::CStr;
use std::ffi::{c_void, CStr};
use std::fs::File;
use std::marker::PhantomData;
use std::mem::{forget, MaybeUninit};
Expand All @@ -45,12 +46,16 @@ use crate::data_layout::DataLayout;
use crate::debug_info::{DICompileUnit, DWARFEmissionKind, DWARFSourceLanguage, DebugInfoBuilder};
use crate::execution_engine::ExecutionEngine;
use crate::memory_buffer::MemoryBuffer;
use crate::memory_manager::{
allocate_code_section_adapter, allocate_data_section_adapter, destroy_adapter, finalize_memory_adapter,
McjitMemoryManager, MemoryManagerAdapter,
};
#[llvm_versions(13..)]
use crate::passes::PassBuilderOptions;
use crate::support::{to_c_str, LLVMString};
#[llvm_versions(13..)]
use crate::targets::TargetMachine;
use crate::targets::{InitializationConfig, Target, TargetTriple};
use crate::targets::{CodeModel, InitializationConfig, Target, TargetTriple};
use crate::types::{AsTypeRef, BasicType, FunctionType, StructType};
#[llvm_versions(7..)]
use crate::values::BasicValue;
Expand Down Expand Up @@ -609,6 +614,134 @@ impl<'ctx> Module<'ctx> {
Ok(execution_engine)
}

/// Creates an MCJIT `ExecutionEngine` for this `Module` using a custom memory manager.
///
/// # Parameters
///
/// * `memory_manager` - Specifies how LLVM allocates and finalizes code and data sections.
/// Implement the [`McjitMemoryManager`] trait to customize these operations.
/// * `opt_level` - Sets the desired optimization level (e.g. `None`, `Less`, `Default`, `Aggressive`).
/// Higher levels generally produce faster code at the expense of longer compilation times.
/// * `code_model` - Determines how code addresses are represented. Common values include
/// `CodeModel::Default` or `CodeModel::JITDefault`. This impacts the generated machine code layout.
/// * `no_frame_pointer_elim` - If true, frame pointer elimination is disabled. This may assist
/// with certain debugging or profiling tasks but can incur a performance cost.
/// * `enable_fast_isel` - If true, uses a faster instruction selector where possible. This can
/// improve compilation speed, though it may produce less optimized code in some cases.
///
/// # Returns
///
/// Returns a newly created [`ExecutionEngine`] for MCJIT on success. Returns an error if:
/// - The native target fails to initialize,
/// - The `Module` is already owned by another `ExecutionEngine`,
/// - Or MCJIT fails to create the engine (in which case an error string is returned from LLVM).
///
/// # Notes
///
/// Using a custom memory manager can help intercept or manage allocations for specific
/// sections (for example, capturing `.llvm_stackmaps` or applying custom permissions).
/// For details, refer to the [`McjitMemoryManager`] documentation.
///
/// # Safety
///
/// The returned [`ExecutionEngine`] takes ownership of the memory manager. Do not move
/// or free the `memory_manager` after calling this method. When the `ExecutionEngine`
/// is dropped, LLVM will destroy the memory manager by calling
/// [`McjitMemoryManager::destroy()`] and freeing its adapter.
pub fn create_mcjit_execution_engine_with_memory_manager(
&self,
memory_manager: impl McjitMemoryManager + 'static,
opt_level: OptimizationLevel,
code_model: CodeModel,
no_frame_pointer_elim: bool,
enable_fast_isel: bool,
) -> Result<ExecutionEngine<'ctx>, LLVMString> {
use std::mem::MaybeUninit;
// ...

// 1) Initialize the native target
Target::initialize_native(&InitializationConfig::default()).map_err(|mut err_string| {
err_string.push('\0');
LLVMString::create_from_str(&err_string)
})?;

// Check if the module is already owned by an ExecutionEngine
if self.owned_by_ee.borrow().is_some() {
let string = "This module is already owned by an ExecutionEngine.\0";
return Err(LLVMString::create_from_str(string));
}

// 2) Box the memory_manager into a MemoryManagerAdapter
let adapter = MemoryManagerAdapter {
memory_manager: Box::new(memory_manager),
};
let adapter_box = Box::new(adapter);
// Convert the Box into a raw pointer for LLVM.
// In `destroy_adapter`, we use `Box::from_raw` to safely reclaim ownership.
let opaque = Box::into_raw(adapter_box) as *mut c_void;

// 3) Create the LLVMMCJITMemoryManager using the custom callbacks
let mmgr = unsafe {
LLVMCreateSimpleMCJITMemoryManager(
opaque,
allocate_code_section_adapter,
allocate_data_section_adapter,
finalize_memory_adapter,
Some(destroy_adapter),
)
};
if mmgr.is_null() {
let msg = "Failed to create SimpleMCJITMemoryManager.\0";
return Err(LLVMString::create_from_str(msg));
}

// 4) Build LLVMMCJITCompilerOptions
let mut options_uninit = MaybeUninit::<llvm_sys::execution_engine::LLVMMCJITCompilerOptions>::zeroed();
unsafe {
// Ensure defaults are initialized
llvm_sys::execution_engine::LLVMInitializeMCJITCompilerOptions(
options_uninit.as_mut_ptr(),
std::mem::size_of::<llvm_sys::execution_engine::LLVMMCJITCompilerOptions>(),
);
}
let mut options = unsafe { options_uninit.assume_init() };

// Override fields
options.OptLevel = opt_level as u32;
options.CodeModel = code_model.into();
options.NoFramePointerElim = no_frame_pointer_elim as i32;
options.EnableFastISel = enable_fast_isel as i32;
options.MCJMM = mmgr;

// 5) Create MCJIT
let mut execution_engine = MaybeUninit::uninit();
let mut err_string = MaybeUninit::uninit();
let code = unsafe {
llvm_sys::execution_engine::LLVMCreateMCJITCompilerForModule(
execution_engine.as_mut_ptr(),
self.module.get(),
&mut options,
std::mem::size_of::<llvm_sys::execution_engine::LLVMMCJITCompilerOptions>(),
err_string.as_mut_ptr(),
)
};

// If creation fails, extract the error string
if code == 1 {
unsafe {
return Err(LLVMString::new(err_string.assume_init()));
}
}

// Otherwise, it succeeded, so wrap the raw pointer
let execution_engine = unsafe { execution_engine.assume_init() };
let execution_engine = unsafe { ExecutionEngine::new(Rc::new(execution_engine), true) };

*self.owned_by_ee.borrow_mut() = Some(execution_engine.clone());

Ok(execution_engine)
}

/// Creates a `GlobalValue` based on a type in an address space.
///
/// # Example
Expand Down
Loading

0 comments on commit 97128a9

Please sign in to comment.