rustformers
diff --git a/‎Cargo.lock
+10 b/‎Cargo.lock
+10
diff --git a/‎ggml/src/lib.rs
+9-1 b/‎ggml/src/lib.rs
+9-1
diff --git a/‎llama-rs/Cargo.toml
+1 b/‎llama-rs/Cargo.toml
+1
diff --git a/‎llama-rs/src/lib.rs
+17-8 b/‎llama-rs/src/lib.rs
+17-8
@@ -322,13 +322,21 @@ impl Tensor {
     /// # Safety
     ///
     /// The data must not be mutated while being read from.
-    pub unsafe fn data(&self) -> *mut c_void {
+    pub unsafe fn data(&self) -> *const c_void {
         self.with_alive_ctx(|| {
             // SAFETY: The with_alive_call guarantees the context is alive
             unsafe { *self.ptr.as_ptr() }.data
         })
     }
 
+    /// Set the tensor's data pointer (useful for mmap-ed data)
+    pub unsafe fn set_data(&self, data_ptr: *mut c_void) {
+        self.with_alive_ctx(|| {
+            // SAFETY: The with_alive_call guarantees the context is alive
+            unsafe { *self.ptr.as_ptr() }.data = data_ptr;
+        })
+    }
+
     /// Number of elements in this tensor.
     pub fn nelements(&self) -> usize {
         self.with_alive_ctx(|| {
 
@@ -16,3 +16,4 @@ rand = { workspace = true }
 serde = { version = "1.0.156", features = ["derive"] }
 serde_bytes = "0.11"
 bincode = "1.3.3"
+memmap2 = "0.5.10"
@@ -14,6 +14,7 @@ use std::{
     time,
 };
 
+use memmap2::Mmap;
 use thiserror::Error;
 
 use partial_sort::PartialSort;
@@ -66,6 +67,8 @@ pub struct Model {
 
     tensors: HashMap<String, ggml::Tensor>,
 
+    mmap: Option<Mmap>,
+
     // Must be kept alive for the model
     _context: ggml::Context,
 }
@@ -502,7 +505,7 @@ pub enum LoadError {
         /// The name of the tensor.
         tensor_name: String,
         /// The format type that was encountered.
-        ftype: u32,
+        ftype: i32,
         /// The path that failed.
         path: PathBuf,
     },
@@ -585,12 +588,13 @@ impl Model {
 
         let main_path = path.as_ref();
 
-        let mut reader =
-            BufReader::new(
-                File::open(main_path).map_err(|e| LoadError::OpenFileFailed {
+        let file = File::open(main_path).map_err(|e| LoadError::OpenFileFailed {
                     source: e,
                     path: main_path.to_owned(),
-                })?,
+                })?;
+        let mut reader =
+            BufReader::new(
+                &file,
             );
 
         // Verify magic
@@ -732,7 +736,7 @@ impl Model {
         // Initialize the context
         let context = ggml::Context::init(ctx_size);
 
-        let model = {
+        let mut model = {
             let mut tensors = HashMap::new();
 
             let tok_embeddings = context.new_tensor_2d(wtype, n_embd, n_vocab);
@@ -796,15 +800,20 @@ impl Model {
                 layers,
                 tensors,
                 _context: context,
+                mmap: None,
             }
         };
 
         match model_type {
             ModelType::GGMF | ModelType::Unversioned => {
-                load_weights_ggmf_or_unversioned(reader, main_path, load_progress_callback, &model)?
+                let file_offset = reader.stream_position()?;
+                drop(reader);
+                load_weights_ggmf_or_unversioned(file_offset, main_path, load_progress_callback, &model)?
             }
             ModelType::GGJT => {
-                load_weights_ggjt(reader, main_path, load_progress_callback, &model)?
+                let mmap = unsafe { Mmap::map(&file)? };
+                load_weights_ggjt(&mut reader, &mmap, main_path, load_progress_callback, &model)?;
+                model.mmap = Some(mmap);
             }
         }