Skip to content

Commit

Permalink
chore: start moving stuff off parcel
Browse files Browse the repository at this point in the history
  • Loading branch information
decahedron1 committed Feb 22, 2025
1 parent 8aab523 commit d738b17
Show file tree
Hide file tree
Showing 14 changed files with 17 additions and 17 deletions.
2 changes: 1 addition & 1 deletion backends/tract/tests/session.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ fn mnist_5() -> ort::Result<()> {

let mut session = Session::builder()?
.with_optimization_level(GraphOptimizationLevel::Level3)?
.commit_from_url("https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/mnist.onnx")
.commit_from_url("https://cdn.pyke.io/0/pyke:ort-rs/[email protected]/mnist.onnx")
.expect("Could not download model from file");

// Load image and resize to model's shape, converting to RGB format
Expand Down
2 changes: 1 addition & 1 deletion examples/async-gpt2-api/examples/async-gpt2-api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ async fn main() -> anyhow::Result<()> {
let session = Session::builder()?
.with_optimization_level(GraphOptimizationLevel::Level1)?
.with_intra_threads(4)?
.commit_from_url("https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/gpt2.onnx")?;
.commit_from_url("https://cdn.pyke.io/0/pyke:ort-rs/[email protected]/gpt2.onnx")?;

// Load the tokenizer and encode the prompt into a sequence of tokens.
let tokenizer = Tokenizer::from_file(Path::new(env!("CARGO_MANIFEST_DIR")).join("data").join("tokenizer.json")).unwrap();
Expand Down
2 changes: 1 addition & 1 deletion examples/cudarc/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ fn main() -> anyhow::Result<()> {
.commit()?;

let mut session =
Session::builder()?.commit_from_url("https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/modnet_photographic_portrait_matting.onnx")?;
Session::builder()?.commit_from_url("https://cdn.pyke.io/0/pyke:ort-rs/[email protected]/modnet_photographic_portrait_matting.onnx")?;

let original_img = image::open(Path::new(env!("CARGO_MANIFEST_DIR")).join("data").join("photo.jpg")).unwrap();
let (img_width, img_height) = (original_img.width(), original_img.height());
Expand Down
2 changes: 1 addition & 1 deletion examples/gpt2/examples/gpt2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ fn main() -> ort::Result<()> {
let mut session = Session::builder()?
.with_optimization_level(GraphOptimizationLevel::Level1)?
.with_intra_threads(1)?
.commit_from_url("https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/gpt2.onnx")?;
.commit_from_url("https://cdn.pyke.io/0/pyke:ort-rs/[email protected]/gpt2.onnx")?;

// Load the tokenizer and encode the prompt into a sequence of tokens.
let tokenizer = Tokenizer::from_file(Path::new(env!("CARGO_MANIFEST_DIR")).join("data").join("tokenizer.json")).unwrap();
Expand Down
2 changes: 1 addition & 1 deletion examples/modnet/examples/modnet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ fn main() -> ort::Result<()> {
.commit()?;

let mut session =
Session::builder()?.commit_from_url("https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/modnet_photographic_portrait_matting.onnx")?;
Session::builder()?.commit_from_url("https://cdn.pyke.io/0/pyke:ort-rs/[email protected]/modnet_photographic_portrait_matting.onnx")?;

let original_img = image::open(Path::new(env!("CARGO_MANIFEST_DIR")).join("data").join("photo.jpg")).unwrap();
let (img_width, img_height) = (original_img.width(), original_img.height());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ fn main() -> ort::Result<()> {
let mut session = Session::builder()?
.with_optimization_level(GraphOptimizationLevel::Level1)?
.with_intra_threads(1)?
.commit_from_url("https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/all-MiniLM-L6-v2.onnx")?;
.commit_from_url("https://cdn.pyke.io/0/pyke:ort-rs/[email protected]/all-MiniLM-L6-v2.onnx")?;

// Load the tokenizer and encode the text.
let tokenizer = Tokenizer::from_file(Path::new(env!("CARGO_MANIFEST_DIR")).join("data").join("tokenizer.json")).unwrap();
Expand Down
2 changes: 1 addition & 1 deletion examples/training/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ pip install -i https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/O

We're installing the CPU version of the `onnxruntime-training` & `torch` packages because we only need to use Python to *create* the initial graph which will be used for training. Run `python tools/train-data/mini-clm.py` from the root directory of the `ort` repo to create the training artifacts.

Next, we need to convert our dataset into tokens to feed the model. This can be achieved by downloading the `oshicats-v2.jsonl` file from the OshiChats v2 dataset and running `cargo run -p example-training --example pretokenize -- ~/oshichats-v2.jsonl`, or if you (rightfully) don't wish to waste 30 GB worth of disk space and bandwidth on brainrot, you may download a [1 MB pre-tokenized subset of the dataset](https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_data/dataset.bin). Make sure `dataset.bin` is in the root of the `ort` repo.
Next, we need to convert our dataset into tokens to feed the model. This can be achieved by downloading the `oshicats-v2.jsonl` file from the OshiChats v2 dataset and running `cargo run -p example-training --example pretokenize -- ~/oshichats-v2.jsonl`, or if you (rightfully) don't wish to waste 30 GB worth of disk space and bandwidth on brainrot, you may download a [1 MB pre-tokenized subset of the dataset](https://cdn.pyke.io/0/pyke:ort-rs/[email protected]/train-clm-dataset.bin) (make sure `train-clm-dataset.bin` is in the root of the `ort` repo).

Finally, we can train our model! Run `cargo run -p example-training --example train-clm` to start training. If you have an NVIDIA GPU, add `--features cuda` to enable CUDA, though it's not required and you can train directly on CPU instead. **This will use ~8 GB of (V)RAM!** You can lower the memory usage by adjusting the `BATCH_SIZE` and `SEQUENCE_LENGTH` constants in `train-clm.rs`, though note that changing the batch size may require adjustments to the learning rate.

Expand Down
2 changes: 1 addition & 1 deletion examples/training/examples/pretokenize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ const MAX_TOKENS: usize = 500_000;

fn main() {
let input = env::args().nth(1).expect("provide input jsonl");
let output = env::args().nth(2).unwrap_or_else(|| "dataset.bin".into());
let output = env::args().nth(2).unwrap_or_else(|| "train-clm-dataset.bin".into());

let input = BufReader::new(File::open(input).unwrap());
let mut output = BufWriter::new(File::create(output).unwrap());
Expand Down
2 changes: 1 addition & 1 deletion examples/training/examples/train-clm-simple.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ fn main() -> ort::Result<()> {
)
.unwrap();

let mut dataset = File::open("dataset.bin").unwrap();
let mut dataset = File::open("train-clm-dataset.bin").unwrap();
let file_size = dataset.metadata().unwrap().len();
let num_tokens = (file_size / 2) as usize; // 16-bit tokens
let mut rng = rand::thread_rng();
Expand Down
2 changes: 1 addition & 1 deletion examples/training/examples/train-clm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ fn main() -> ort::Result<()> {
let mut optimizer = trainer.optimizer();
optimizer.set_lr(7e-5)?;

let mut dataset = File::open("dataset.bin").unwrap();
let mut dataset = File::open("train-clm-dataset.bin").unwrap();
let file_size = dataset.metadata().unwrap().len();
let num_tokens = (file_size / 2) as usize; // 16-bit tokens
let mut rng = rand::thread_rng();
Expand Down
2 changes: 1 addition & 1 deletion examples/yolov8/examples/yolov8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ fn union(box1: &BoundingBox, box2: &BoundingBox) -> f32 {
((box1.x2 - box1.x1) * (box1.y2 - box1.y1)) + ((box2.x2 - box2.x1) * (box2.y2 - box2.y1)) - intersection(box1, box2)
}

const YOLOV8M_URL: &str = "https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/yolov8m.onnx";
const YOLOV8M_URL: &str = "https://cdn.pyke.io/0/pyke:ort-rs/[email protected]/yolov8m.onnx";

#[rustfmt::skip]
const YOLOV8_CLASS_LABELS: [&str; 80] = [
Expand Down
8 changes: 4 additions & 4 deletions src/io_binding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ mod tests {
#[test]
#[cfg(all(feature = "ndarray", feature = "fetch-models"))]
fn test_mnist_input_bound() -> Result<()> {
let mut session = Session::builder()?.commit_from_url("https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/mnist.onnx")?;
let mut session = Session::builder()?.commit_from_url("https://cdn.pyke.io/0/pyke:ort-rs/[email protected]/mnist.onnx")?;

let array = get_image();

Expand All @@ -275,7 +275,7 @@ mod tests {
#[test]
#[cfg(all(feature = "ndarray", feature = "fetch-models"))]
fn test_mnist_input_output_bound() -> Result<()> {
let mut session = Session::builder()?.commit_from_url("https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/mnist.onnx")?;
let mut session = Session::builder()?.commit_from_url("https://cdn.pyke.io/0/pyke:ort-rs/[email protected]/mnist.onnx")?;

let array = get_image();

Expand All @@ -295,7 +295,7 @@ mod tests {
#[test]
#[cfg(all(feature = "ndarray", feature = "fetch-models"))]
fn test_send_iobinding() -> Result<()> {
let mut session = Session::builder()?.commit_from_url("https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/mnist.onnx")?;
let mut session = Session::builder()?.commit_from_url("https://cdn.pyke.io/0/pyke:ort-rs/[email protected]/mnist.onnx")?;

let array = get_image();

Expand All @@ -320,7 +320,7 @@ mod tests {
#[test]
#[cfg(all(feature = "ndarray", feature = "fetch-models"))]
fn test_mnist_clear_bounds() -> Result<()> {
let mut session = Session::builder()?.commit_from_url("https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/mnist.onnx")?;
let mut session = Session::builder()?.commit_from_url("https://cdn.pyke.io/0/pyke:ort-rs/[email protected]/mnist.onnx")?;

let array = get_image();

Expand Down
2 changes: 1 addition & 1 deletion tests/mnist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ fn mnist_5() -> ort::Result<()> {
let mut session = Session::builder()?
.with_optimization_level(GraphOptimizationLevel::Level1)?
.with_intra_threads(1)?
.commit_from_url("https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/mnist.onnx")
.commit_from_url("https://cdn.pyke.io/0/pyke:ort-rs/[email protected]/mnist.onnx")
.expect("Could not download model from file");

let input0_shape = {
Expand Down
2 changes: 1 addition & 1 deletion tests/squeezenet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ fn squeezenet_mushroom() -> ort::Result<()> {
let mut session = Session::builder()?
.with_optimization_level(GraphOptimizationLevel::Level1)?
.with_intra_threads(1)?
.commit_from_url("https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/squeezenet.onnx")
.commit_from_url("https://cdn.pyke.io/0/pyke:ort-rs/[email protected]/squeezenet.onnx")
.expect("Could not download model from file");

let class_labels = get_imagenet_labels()?;
Expand Down

0 comments on commit d738b17

Please sign in to comment.