Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement automatic detection of media type #13

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions src/files.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
const MEDIA_TYPE_HEADERS: [[&[u8]; 2]; 18] = [
// Image
[b"GIF87a", b"image/gif"],
[b"GIF89a", b"image/gif"],
[b"\xFF\xD8\xFF", b"image/jpeg"],
[b"\x89PNG\x0D\x0A\x1A\x0A", b"image/png"],
[b"<svg ", b"image/svg+xml"],
[b"RIFF....WEBPVP8 ", b"image/webp"],
[b"\x00\x00\x01\x00", b"image/x-icon"],
// Audio
[b"ID3", b"audio/mpeg"],
[b"\xFF\x0E", b"audio/mpeg"],
[b"\xFF\x0F", b"audio/mpeg"],
[b"OggS", b"audio/ogg"],
[b"RIFF....WAVEfmt ", b"audio/wav"],
[b"fLaC", b"audio/x-flac"],
// Video
[b"RIFF....AVI LIST", b"video/avi"],
[b"....ftyp", b"video/mp4"],
[b"\x00\x00\x01\x0B", b"video/mpeg"],
[b"....moov", b"video/quicktime"],
[b"\x1A\x45\xDF\xA3", b"video/webm"],
];

// "avi" => "video/avi",
// "bmp" => "image/bmp",
// "css" => "text/css",
// "flac" => "audio/flac",
// "gif" => "image/gif",
// "htm" | "html" => "text/html",
// "ico" => "image/x-icon",
// "jpeg" | "jpg" => "image/jpeg",
// "js" => "application/javascript",
// "json" => "application/json",
// "mp3" => "audio/mpeg",
// "mp4" | "m4v" => "video/mp4",
// "ogg" => "audio/ogg",
// "ogv" => "video/ogg",
// "pdf" => "application/pdf",
// "png" => "image/png",
// "svg" => "image/svg+xml",
// "swf" => "application/x-shockwave-flash",
// "tif" | "tiff" => "image/tiff",
// "txt" => "text/plain",
// "wav" => "audio/wav",
// "webp" => "image/webp",
// "woff" => "font/woff",
// "woff2" => "font/woff2",
// "xml" => "text/xml",
89 changes: 82 additions & 7 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,30 @@ use url::Url;

const DEFAULT_MEDIA_TYPE: &'static str = "text/plain";
const DEFAULT_CHARSET: &'static str = "US-ASCII";
const TEXTUAL_MEDIA_TYPES: &'static [&str] = &[
const FILE_SIGNATURES: [[&[u8]; 2]; 18] = [
// Image
[b"GIF87a", b"image/gif"],
[b"GIF89a", b"image/gif"],
[b"\xFF\xD8\xFF", b"image/jpeg"],
[b"\x89PNG\x0D\x0A\x1A\x0A", b"image/png"],
[b"<svg ", b"image/svg+xml"],
[b"RIFF....WEBPVP8 ", b"image/webp"],
[b"\x00\x00\x01\x00", b"image/x-icon"],
// Audio
[b"ID3", b"audio/mpeg"],
[b"\xFF\x0E", b"audio/mpeg"],
[b"\xFF\x0F", b"audio/mpeg"],
[b"OggS", b"audio/ogg"],
[b"RIFF....WAVEfmt ", b"audio/wav"],
[b"fLaC", b"audio/x-flac"],
// Video
[b"RIFF....AVI LIST", b"video/avi"],
[b"....ftyp", b"video/mp4"],
[b"\x00\x00\x01\x0B", b"video/mpeg"],
[b"....moov", b"video/quicktime"],
[b"\x1A\x45\xDF\xA3", b"video/webm"],
];
const PLAINTEXT_MEDIA_TYPES: &'static [&str] = &[
"application/atom+xml",
"application/dart",
"application/ecmascript",
Expand Down Expand Up @@ -34,7 +57,7 @@ pub struct DataUrl {
charset: Option<String>, // US-ASCII is default, according to the spec
is_base64_encoded: bool, // Indicates if it's a base64-encoded data URL
data: Vec<u8>, // Data, bytes, UTF-8 if text
fragment: Option<String>, // #something-at-the-end, None by default
fragment: Option<String>, // #something at the end, None by default
}

pub enum DataUrlParseError {
Expand All @@ -49,6 +72,58 @@ impl fmt::Debug for DataUrlParseError {
}
}

pub fn detect_media_type(data: &[u8], filename: &str) -> String {
// At first attempt to read file's header
for file_signaure in FILE_SIGNATURES.iter() {
if data.starts_with(file_signaure[0]) {
return String::from_utf8(file_signaure[1].to_vec()).unwrap();
}
}

// If header didn't match any known magic signatures,
// try to guess media type from file name
detect_media_type_by_file_name(&filename)
}

pub fn detect_media_type_by_file_name(filename: &str) -> String {
let filename_lowercased: &str = &filename.to_lowercase();
let parts: Vec<&str> = filename_lowercased.split('.').collect();

let mime: &str = match parts.last() {
Some(v) => match *v {
"avi" => "video/avi",
"bmp" => "image/bmp",
"css" => "text/css",
"flac" => "audio/flac",
"gif" => "image/gif",
"htm" | "html" => "text/html",
"ico" => "image/x-icon",
"jpeg" | "jpg" => "image/jpeg",
"js" => "application/javascript",
"json" => "application/json",
"mp3" => "audio/mpeg",
"mp4" | "m4v" => "video/mp4",
"ogg" => "audio/ogg",
"ogv" => "video/ogg",
"pdf" => "application/pdf",
"png" => "image/png",
"svg" => "image/svg+xml",
"swf" => "application/x-shockwave-flash",
"tif" | "tiff" => "image/tiff",
"txt" => "text/plain",
"wav" => "audio/wav",
"webp" => "image/webp",
"woff" => "font/woff",
"woff2" => "font/woff2",
"xml" => "text/xml",
&_ => "",
},
None => "",
};

mime.to_string()
}

pub(crate) fn parse_data_url_meta_data(
meta_data_string: String,
) -> (Option<String>, Option<String>, bool) {
Expand Down Expand Up @@ -85,7 +160,7 @@ pub(crate) fn parse_data_url_meta_data(
}

pub(crate) fn validate_media_type(media_type: &str) -> bool {
// Must contain one slash
// Must contain one forward slash
media_type.split('/').collect::<Vec<&str>>().len() == 2
}

Expand Down Expand Up @@ -161,18 +236,18 @@ impl DataUrl {
}

let current_media_type: &str = &self.media_type.as_ref().unwrap();
let is_textual: bool = if current_media_type.split('/').collect::<Vec<&str>>()[0]
let is_plaintext: bool = if current_media_type.split('/').collect::<Vec<&str>>()[0]
.eq_ignore_ascii_case("text")
{
true
} else {
TEXTUAL_MEDIA_TYPES
PLAINTEXT_MEDIA_TYPES
.iter()
.find(|mt| current_media_type.eq_ignore_ascii_case(mt))
.is_some()
};

!is_textual
!is_plaintext
}

pub fn media_type(&self) -> &str {
Expand Down Expand Up @@ -316,7 +391,7 @@ impl DataUrl {
}

if let Some(c) = &self.charset {
// windows-1252 is another name for US-ASCII, the default charset for data URLs
// NOTE: windows-1252 is another name for US-ASCII, the default charset for data URLs
if c != "windows-1252" {
result += ";charset=";
result += &c;
Expand Down
60 changes: 39 additions & 21 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,48 +80,48 @@ fn main() {

//////////////////////////////////////////////////////////////////////////

let decode_mode_enabled: bool = app.is_present("decode");
let string_input_set: bool = app.is_present("INPUT");
let is_decode_mode_enabled: bool = app.is_present("decode");
let is_string_input_set: bool = app.is_present("INPUT");
// let stdin_is_a_tty: bool = !io::stdio::stdin_raw().isatty();
let stdout_is_a_tty: bool = atty::is(Stream::Stdout);
let mut file_input_set: bool = app.is_present("INPUT FILE");
let mut file_output_set: bool = app.is_present("OUTPUT FILE");
let input_file_path: &str = if file_input_set {
let is_stdout_a_tty: bool = atty::is(Stream::Stdout);
let mut is_file_input_set: bool = app.is_present("INPUT FILE");
let mut is_file_output_set: bool = app.is_present("OUTPUT FILE");
let input_file_path: &str = if is_file_input_set {
app.value_of("INPUT FILE").unwrap()
} else {
"-"
};
let output_file_path: &str = if file_output_set {
let output_file_path: &str = if is_file_output_set {
app.value_of("OUTPUT FILE").unwrap()
} else {
"-"
};
if file_input_set && input_file_path == "-" {
file_input_set = false;
if is_file_input_set && input_file_path == "-" {
is_file_input_set = false;
}
if file_output_set && output_file_path == "-" {
file_output_set = false;
if is_file_output_set && output_file_path == "-" {
is_file_output_set = false;
}
let file_input_set = file_input_set;
let file_output_set = file_output_set;
let is_file_input_set = is_file_input_set;
let is_file_output_set = is_file_output_set;

//////////////////////////////////////////////////////////////////////////

if string_input_set && file_input_set {
if is_string_input_set && is_file_input_set {
eprintln!("error: Both file and argument inputs provided");
std::process::exit(1);
}

if !stdout_is_a_tty && file_output_set {
if !is_stdout_a_tty && is_file_output_set {
eprintln!("error: Both stdout and argument output provided");
std::process::exit(1);
}

//////////////////////////////////////////////////////////////////////////

let input: Vec<u8> = if string_input_set {
let input: Vec<u8> = if is_string_input_set {
app.value_of("INPUT").unwrap().as_bytes().to_vec()
} else if file_input_set {
} else if is_file_input_set {
match fs::read(input_file_path) {
Ok(input_file_data) => input_file_data,
Err(_) => {
Expand All @@ -136,15 +136,19 @@ fn main() {

//////////////////////////////////////////////////////////////////////////

if decode_mode_enabled {
if is_decode_mode_enabled {
////////////
// Decode //
////////////

// TODO: ideally the program needs to check the current terminal locale (encoding), and not just assume it's UTF-8
let input_as_string: String = String::from_utf8_lossy(&input).to_string();

std::process::exit(match DataUrl::parse(&input_as_string) {
Ok(data_url) => {
if !stdout_is_a_tty || file_output_set || data_url.is_binary() {
if !is_stdout_a_tty || is_file_output_set || data_url.is_binary() {
// Write raw bytes if the output is a file, or if the contents of this data URL has binary format
if file_output_set {
if is_file_output_set {
let mut handle = fs::File::create(output_file_path).unwrap();
handle.write_all(data_url.data()).unwrap();
} else {
Expand All @@ -164,6 +168,10 @@ fn main() {
}
});
} else {
////////////
// Encode //
////////////

let mut data_url = DataUrl::new();

data_url.set_data(&input);
Expand All @@ -186,7 +194,7 @@ fn main() {
// TODO: ideally the program needs to check the current terminal locale (encoding), and not just assume it's UTF-8

// Automatically enforce ;charset=UTF-8 for non-ascii argument inputs
if string_input_set && !String::from_utf8_lossy(&input).to_string().is_ascii() {
if is_string_input_set && !String::from_utf8_lossy(&input).to_string().is_ascii() {
data_url.set_charset(Some("UTF-8".to_string()));
}
}
Expand All @@ -199,6 +207,16 @@ fn main() {
eprintln!("error: Invalid media type '{}'", media_type);
std::process::exit(1);
}
} else {
if is_file_input_set {
if input_file_path.ends_with(".png") {
data_url.set_media_type(Some("image/png".to_string()));
} else {
data_url.set_media_type(Some("text/plain".to_string()));
}
}
// TODO: try to automatically detect file type from file name / header
// data_url.set_media_type(Some("text/TODO".to_string()));
}

if app.is_present("FRAGMENT") {
Expand Down
Binary file added tests/_data_/pixel.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 4 additions & 0 deletions tests/cli/basic.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
//
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
//

#[cfg(test)]
mod passing {
Expand Down Expand Up @@ -73,12 +75,14 @@ ARGS:
}
}

//
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
//

#[cfg(test)]
mod failing {
Expand Down
4 changes: 4 additions & 0 deletions tests/cli/decode.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
//
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
//

#[cfg(test)]
mod passing {
Expand Down Expand Up @@ -47,12 +49,14 @@ mod passing {
}
}

//
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
//

#[cfg(test)]
mod failing {
Expand Down
4 changes: 4 additions & 0 deletions tests/cli/encode.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
//
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
//

#[cfg(test)]
mod passing {
Expand Down Expand Up @@ -138,12 +140,14 @@ mod passing {
}
}

//
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
//

#[cfg(test)]
mod failing {
Expand Down
Loading
Loading