From d738b1757bb375c8f7150cf566fc5a9d2c88008a Mon Sep 17 00:00:00 2001 From: "Carson M." Date: Fri, 21 Feb 2025 23:31:14 -0600 Subject: [PATCH] chore: start moving stuff off parcel --- backends/tract/tests/session.rs | 2 +- examples/async-gpt2-api/examples/async-gpt2-api.rs | 2 +- examples/cudarc/src/main.rs | 2 +- examples/gpt2/examples/gpt2.rs | 2 +- examples/modnet/examples/modnet.rs | 2 +- .../sentence-transformers/examples/semantic-similarity.rs | 2 +- examples/training/README.md | 2 +- examples/training/examples/pretokenize.rs | 2 +- examples/training/examples/train-clm-simple.rs | 2 +- examples/training/examples/train-clm.rs | 2 +- examples/yolov8/examples/yolov8.rs | 2 +- src/io_binding.rs | 8 ++++---- tests/mnist.rs | 2 +- tests/squeezenet.rs | 2 +- 14 files changed, 17 insertions(+), 17 deletions(-) diff --git a/backends/tract/tests/session.rs b/backends/tract/tests/session.rs index 5320482a..762431c7 100644 --- a/backends/tract/tests/session.rs +++ b/backends/tract/tests/session.rs @@ -16,7 +16,7 @@ fn mnist_5() -> ort::Result<()> { let mut session = Session::builder()? .with_optimization_level(GraphOptimizationLevel::Level3)? - .commit_from_url("https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/mnist.onnx") + .commit_from_url("https://cdn.pyke.io/0/pyke:ort-rs/example-models@0.0.0/mnist.onnx") .expect("Could not download model from file"); // Load image and resize to model's shape, converting to RGB format diff --git a/examples/async-gpt2-api/examples/async-gpt2-api.rs b/examples/async-gpt2-api/examples/async-gpt2-api.rs index cdb2a0ca..7aaf2869 100644 --- a/examples/async-gpt2-api/examples/async-gpt2-api.rs +++ b/examples/async-gpt2-api/examples/async-gpt2-api.rs @@ -38,7 +38,7 @@ async fn main() -> anyhow::Result<()> { let session = Session::builder()? .with_optimization_level(GraphOptimizationLevel::Level1)? .with_intra_threads(4)? - .commit_from_url("https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/gpt2.onnx")?; + .commit_from_url("https://cdn.pyke.io/0/pyke:ort-rs/example-models@0.0.0/gpt2.onnx")?; // Load the tokenizer and encode the prompt into a sequence of tokens. let tokenizer = Tokenizer::from_file(Path::new(env!("CARGO_MANIFEST_DIR")).join("data").join("tokenizer.json")).unwrap(); diff --git a/examples/cudarc/src/main.rs b/examples/cudarc/src/main.rs index afc76964..b8f0236d 100644 --- a/examples/cudarc/src/main.rs +++ b/examples/cudarc/src/main.rs @@ -20,7 +20,7 @@ fn main() -> anyhow::Result<()> { .commit()?; let mut session = - Session::builder()?.commit_from_url("https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/modnet_photographic_portrait_matting.onnx")?; + Session::builder()?.commit_from_url("https://cdn.pyke.io/0/pyke:ort-rs/example-models@0.0.0/modnet_photographic_portrait_matting.onnx")?; let original_img = image::open(Path::new(env!("CARGO_MANIFEST_DIR")).join("data").join("photo.jpg")).unwrap(); let (img_width, img_height) = (original_img.width(), original_img.height()); diff --git a/examples/gpt2/examples/gpt2.rs b/examples/gpt2/examples/gpt2.rs index 7366c4ca..d21a848e 100644 --- a/examples/gpt2/examples/gpt2.rs +++ b/examples/gpt2/examples/gpt2.rs @@ -38,7 +38,7 @@ fn main() -> ort::Result<()> { let mut session = Session::builder()? .with_optimization_level(GraphOptimizationLevel::Level1)? .with_intra_threads(1)? - .commit_from_url("https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/gpt2.onnx")?; + .commit_from_url("https://cdn.pyke.io/0/pyke:ort-rs/example-models@0.0.0/gpt2.onnx")?; // Load the tokenizer and encode the prompt into a sequence of tokens. let tokenizer = Tokenizer::from_file(Path::new(env!("CARGO_MANIFEST_DIR")).join("data").join("tokenizer.json")).unwrap(); diff --git a/examples/modnet/examples/modnet.rs b/examples/modnet/examples/modnet.rs index 264811b8..1c8a9946 100644 --- a/examples/modnet/examples/modnet.rs +++ b/examples/modnet/examples/modnet.rs @@ -16,7 +16,7 @@ fn main() -> ort::Result<()> { .commit()?; let mut session = - Session::builder()?.commit_from_url("https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/modnet_photographic_portrait_matting.onnx")?; + Session::builder()?.commit_from_url("https://cdn.pyke.io/0/pyke:ort-rs/example-models@0.0.0/modnet_photographic_portrait_matting.onnx")?; let original_img = image::open(Path::new(env!("CARGO_MANIFEST_DIR")).join("data").join("photo.jpg")).unwrap(); let (img_width, img_height) = (original_img.width(), original_img.height()); diff --git a/examples/sentence-transformers/examples/semantic-similarity.rs b/examples/sentence-transformers/examples/semantic-similarity.rs index 4f228f1f..dd0665b3 100644 --- a/examples/sentence-transformers/examples/semantic-similarity.rs +++ b/examples/sentence-transformers/examples/semantic-similarity.rs @@ -28,7 +28,7 @@ fn main() -> ort::Result<()> { let mut session = Session::builder()? .with_optimization_level(GraphOptimizationLevel::Level1)? .with_intra_threads(1)? - .commit_from_url("https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/all-MiniLM-L6-v2.onnx")?; + .commit_from_url("https://cdn.pyke.io/0/pyke:ort-rs/example-models@0.0.0/all-MiniLM-L6-v2.onnx")?; // Load the tokenizer and encode the text. let tokenizer = Tokenizer::from_file(Path::new(env!("CARGO_MANIFEST_DIR")).join("data").join("tokenizer.json")).unwrap(); diff --git a/examples/training/README.md b/examples/training/README.md index 7c99d643..de0f533f 100644 --- a/examples/training/README.md +++ b/examples/training/README.md @@ -10,7 +10,7 @@ pip install -i https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/O We're installing the CPU version of the `onnxruntime-training` & `torch` packages because we only need to use Python to *create* the initial graph which will be used for training. Run `python tools/train-data/mini-clm.py` from the root directory of the `ort` repo to create the training artifacts. -Next, we need to convert our dataset into tokens to feed the model. This can be achieved by downloading the `oshicats-v2.jsonl` file from the OshiChats v2 dataset and running `cargo run -p example-training --example pretokenize -- ~/oshichats-v2.jsonl`, or if you (rightfully) don't wish to waste 30 GB worth of disk space and bandwidth on brainrot, you may download a [1 MB pre-tokenized subset of the dataset](https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_data/dataset.bin). Make sure `dataset.bin` is in the root of the `ort` repo. +Next, we need to convert our dataset into tokens to feed the model. This can be achieved by downloading the `oshicats-v2.jsonl` file from the OshiChats v2 dataset and running `cargo run -p example-training --example pretokenize -- ~/oshichats-v2.jsonl`, or if you (rightfully) don't wish to waste 30 GB worth of disk space and bandwidth on brainrot, you may download a [1 MB pre-tokenized subset of the dataset](https://cdn.pyke.io/0/pyke:ort-rs/example-auxiliary-data@0.0.0/train-clm-dataset.bin) (make sure `train-clm-dataset.bin` is in the root of the `ort` repo). Finally, we can train our model! Run `cargo run -p example-training --example train-clm` to start training. If you have an NVIDIA GPU, add `--features cuda` to enable CUDA, though it's not required and you can train directly on CPU instead. **This will use ~8 GB of (V)RAM!** You can lower the memory usage by adjusting the `BATCH_SIZE` and `SEQUENCE_LENGTH` constants in `train-clm.rs`, though note that changing the batch size may require adjustments to the learning rate. diff --git a/examples/training/examples/pretokenize.rs b/examples/training/examples/pretokenize.rs index 79eee195..be3ac51b 100644 --- a/examples/training/examples/pretokenize.rs +++ b/examples/training/examples/pretokenize.rs @@ -12,7 +12,7 @@ const MAX_TOKENS: usize = 500_000; fn main() { let input = env::args().nth(1).expect("provide input jsonl"); - let output = env::args().nth(2).unwrap_or_else(|| "dataset.bin".into()); + let output = env::args().nth(2).unwrap_or_else(|| "train-clm-dataset.bin".into()); let input = BufReader::new(File::open(input).unwrap()); let mut output = BufWriter::new(File::create(output).unwrap()); diff --git a/examples/training/examples/train-clm-simple.rs b/examples/training/examples/train-clm-simple.rs index b1bc3551..f14c9390 100644 --- a/examples/training/examples/train-clm-simple.rs +++ b/examples/training/examples/train-clm-simple.rs @@ -61,7 +61,7 @@ fn main() -> ort::Result<()> { ) .unwrap(); - let mut dataset = File::open("dataset.bin").unwrap(); + let mut dataset = File::open("train-clm-dataset.bin").unwrap(); let file_size = dataset.metadata().unwrap().len(); let num_tokens = (file_size / 2) as usize; // 16-bit tokens let mut rng = rand::thread_rng(); diff --git a/examples/training/examples/train-clm.rs b/examples/training/examples/train-clm.rs index d11c2795..336350b6 100644 --- a/examples/training/examples/train-clm.rs +++ b/examples/training/examples/train-clm.rs @@ -48,7 +48,7 @@ fn main() -> ort::Result<()> { let mut optimizer = trainer.optimizer(); optimizer.set_lr(7e-5)?; - let mut dataset = File::open("dataset.bin").unwrap(); + let mut dataset = File::open("train-clm-dataset.bin").unwrap(); let file_size = dataset.metadata().unwrap().len(); let num_tokens = (file_size / 2) as usize; // 16-bit tokens let mut rng = rand::thread_rng(); diff --git a/examples/yolov8/examples/yolov8.rs b/examples/yolov8/examples/yolov8.rs index f7625982..b1421891 100644 --- a/examples/yolov8/examples/yolov8.rs +++ b/examples/yolov8/examples/yolov8.rs @@ -29,7 +29,7 @@ fn union(box1: &BoundingBox, box2: &BoundingBox) -> f32 { ((box1.x2 - box1.x1) * (box1.y2 - box1.y1)) + ((box2.x2 - box2.x1) * (box2.y2 - box2.y1)) - intersection(box1, box2) } -const YOLOV8M_URL: &str = "https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/yolov8m.onnx"; +const YOLOV8M_URL: &str = "https://cdn.pyke.io/0/pyke:ort-rs/example-models@0.0.0/yolov8m.onnx"; #[rustfmt::skip] const YOLOV8_CLASS_LABELS: [&str; 80] = [ diff --git a/src/io_binding.rs b/src/io_binding.rs index 59989a0d..301013b8 100644 --- a/src/io_binding.rs +++ b/src/io_binding.rs @@ -257,7 +257,7 @@ mod tests { #[test] #[cfg(all(feature = "ndarray", feature = "fetch-models"))] fn test_mnist_input_bound() -> Result<()> { - let mut session = Session::builder()?.commit_from_url("https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/mnist.onnx")?; + let mut session = Session::builder()?.commit_from_url("https://cdn.pyke.io/0/pyke:ort-rs/example-models@0.0.0/mnist.onnx")?; let array = get_image(); @@ -275,7 +275,7 @@ mod tests { #[test] #[cfg(all(feature = "ndarray", feature = "fetch-models"))] fn test_mnist_input_output_bound() -> Result<()> { - let mut session = Session::builder()?.commit_from_url("https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/mnist.onnx")?; + let mut session = Session::builder()?.commit_from_url("https://cdn.pyke.io/0/pyke:ort-rs/example-models@0.0.0/mnist.onnx")?; let array = get_image(); @@ -295,7 +295,7 @@ mod tests { #[test] #[cfg(all(feature = "ndarray", feature = "fetch-models"))] fn test_send_iobinding() -> Result<()> { - let mut session = Session::builder()?.commit_from_url("https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/mnist.onnx")?; + let mut session = Session::builder()?.commit_from_url("https://cdn.pyke.io/0/pyke:ort-rs/example-models@0.0.0/mnist.onnx")?; let array = get_image(); @@ -320,7 +320,7 @@ mod tests { #[test] #[cfg(all(feature = "ndarray", feature = "fetch-models"))] fn test_mnist_clear_bounds() -> Result<()> { - let mut session = Session::builder()?.commit_from_url("https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/mnist.onnx")?; + let mut session = Session::builder()?.commit_from_url("https://cdn.pyke.io/0/pyke:ort-rs/example-models@0.0.0/mnist.onnx")?; let array = get_image(); diff --git a/tests/mnist.rs b/tests/mnist.rs index b3813c1d..d2a55bfd 100644 --- a/tests/mnist.rs +++ b/tests/mnist.rs @@ -18,7 +18,7 @@ fn mnist_5() -> ort::Result<()> { let mut session = Session::builder()? .with_optimization_level(GraphOptimizationLevel::Level1)? .with_intra_threads(1)? - .commit_from_url("https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/mnist.onnx") + .commit_from_url("https://cdn.pyke.io/0/pyke:ort-rs/example-models@0.0.0/mnist.onnx") .expect("Could not download model from file"); let input0_shape = { diff --git a/tests/squeezenet.rs b/tests/squeezenet.rs index 21891714..3150715d 100644 --- a/tests/squeezenet.rs +++ b/tests/squeezenet.rs @@ -23,7 +23,7 @@ fn squeezenet_mushroom() -> ort::Result<()> { let mut session = Session::builder()? .with_optimization_level(GraphOptimizationLevel::Level1)? .with_intra_threads(1)? - .commit_from_url("https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/squeezenet.onnx") + .commit_from_url("https://cdn.pyke.io/0/pyke:ort-rs/example-models@0.0.0/squeezenet.onnx") .expect("Could not download model from file"); let class_labels = get_imagenet_labels()?;