Skip to content
44 changes: 44 additions & 0 deletions avro/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -748,6 +748,50 @@ registered and used!

<!-- cargo-rdme end -->


### Deserializing Avro Byte Arrays

if using the Serde way to deserialize avro files, there are sometimes special derive statements that need to be applied in the case of byte arrays.

Here is an example of deserializing an avro file containing a nullable byte array.


```rust
use apache_avro::{from_value, Reader};
use serde::{Serialize,Deserialize};
use std::fs::File;
use std::io::BufReader;

#[derive(Debug, Deserialize, Serialize, Clone)]
struct ExampleByteArray{

#[serde(with = "apache_avro::serde_avro_bytes_opt")]
data_bytes: Option<Vec<u8>>,
description: Option<String>
}

fn deserialize_byte_array(){

// Load the example file into reader
let file = File::open("somefile.avro".to_string()).unwrap();
let reader = BufReader::new(file);
let avro_reader = Reader::new(reader).unwrap();


// Deserialize into struct with byte array field
for value in avro_reader{
let value = value.unwrap();
let deserialized = from_value::<ExampleByteArray>(&value).unwrap();
println!("{:?}", deserialized);
}

}
```

Full implementation and other options for things like fixed byte arrays can found in src/bytes.rs



## License

This project is licensed under [Apache License 2.0](https://github.com/apache/avro/blob/main/LICENSE.txt).
Expand Down
Binary file added avro/tests/avro-rs-285-bytes_deserialization.avro
Binary file not shown.
76 changes: 76 additions & 0 deletions avro/tests/avro-rs-285-bytes_deserialization.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
use apache_avro::{from_value, Reader};
use serde::{Serialize,Deserialize};
use std::fs::File;
use std::io::BufReader;


//UPDATE: For byte deserialization to work, you need to add the serde attribute #[serde(with = "apache_avro::serde_avro_bytes_opt")] in this case. There are a lot of other options as well documented in bytes.rs


// This is the schema that was used to write
// schema = {
// "type": "record",
// "name": "SimpleRecord",
// "fields": [
// {"name": "data_bytes", "type": ["null", "bytes"], "default": None},
// {"name": "description", "type": ["null", "string"], "default": None}
// ]
// }


// Here is an example struct that matches the schema, and another with filtered out byte array field
// The reason this is very useful is that in extremely large deeply nested avro files, structs mapped to grab fields of interest in deserialization
// is really effecient and effective. The issue is that when I'm trying to deserialize a byte array field I get the error below no matter how I approach.
// Bytes enum under value doesn't implement Deserialize in that way so I can't just make it a Value::Bytes

#[derive(Debug, Deserialize, Serialize, Clone)]

struct ExampleByteArray{


//update I have discovered that this is the fix
#[serde(with = "apache_avro::serde_avro_bytes_opt")]
data_bytes: Option<Vec<u8>>,
description: Option<String>
}


#[derive(Debug, Deserialize, Serialize, Clone)]
struct ExampleByteArrayFiltered{
description: Option<String>
}

#[test]
fn avro_rs_285_bytes_deserialization_failure(){

// Load the example file into reader
let file = File::open("./tests/avro-rs-285-bytes_deserialization.avro".to_string()).unwrap();
let reader = BufReader::new(file);
let avro_reader = Reader::new(reader).unwrap();


// attempt to deserialize into struct with byte array field
for value in avro_reader{
let value = value.unwrap();
let deserialized = from_value::<ExampleByteArray>(&value).unwrap();
println!("{:?}", deserialized);
}

}

#[test]
fn avro_rs_285_bytes_deserialization_pass_when_filtered(){

// Load the example file into reader
let file = File::open("./tests/avro-rs-285-bytes_deserialization.avro".to_string()).unwrap();
let reader = BufReader::new(file);
let avro_reader = Reader::new(reader).unwrap();

// attempt to deserialize into struct with byte array field filtered out, this will be successful
for value in avro_reader{
let value = value.unwrap();
let deserialized = from_value::<ExampleByteArrayFiltered>(&value).unwrap();
println!("{:?}", deserialized);
}

}
Loading