Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

First swing at ZipFile support. (Issue #16) #17

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 64 additions & 23 deletions src/crawler.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@

use anyhow::{Result, anyhow};
use crossbeam::channel::{Receiver, Sender, unbounded};
use glob::glob;
use std::ffi::OsStr;
use std::fs::File;
use std::io::{BufReader, BufRead, Read};
use std::path::PathBuf;

use crate::indexed_image::{IndexedImage, stringify_filepath};

const SUPPORTED_IMAGE_EXTENSIONS: &'static [&str; 12] = &["png", "bmp", "jpg", "jpeg", "jfif", "gif", "tiff", "pnm", "webp", "ico", "tga", "exr"];

/// Given a vec of directory globs and a set of valid extensions,
/// crawl the disk and index images.
/// Returns a Channel with Images as they're created.
Expand All @@ -29,7 +34,7 @@ pub fn crawl_globs_async(globs:Vec<String>, parallel_file_loaders:usize) -> (Rec
match maybe_fname {
Ok(path) => {
println!("Checking {}", stringify_filepath(&path));
if path.is_file() && is_supported_extension(&path) {
if path.is_file() {
if let Err(e) = tx.send(path) {
eprintln!("Failed to submit image for processing: {}", e);
}
Expand All @@ -48,31 +53,67 @@ pub fn crawl_globs_async(globs:Vec<String>, parallel_file_loaders:usize) -> (Rec
let rx = file_rx.clone();
let tx = image_tx.clone();
std::thread::spawn(move || {
while let Ok(image_path) = rx.recv() {
// Calculate the bare minimum that needs calculating and insert it.
match IndexedImage::from_file_path(&image_path.as_path()) {
Ok(img) => {
tx.send(img);
},
Err(e) => {
println!("Error processing {}: {}", image_path.display(), e);
while let Ok(file_path) = rx.recv() {
// File path is any generic file, not necessarily an image file.
// We need to check if it's an image, a zip file, or something else.
if let Some(extension) = file_path.extension().and_then(OsStr::to_str) {
// Figure out the kind of file.
let is_zipfile = extension.eq_ignore_ascii_case("zip");
let mut is_image_file = false;

if !is_zipfile { // Save ourselves some compute by skipping the extension check for zipfiles.
for &ext in SUPPORTED_IMAGE_EXTENSIONS {
if extension.eq_ignore_ascii_case(ext) {
is_image_file = true;
}
}
}
}

// Send one or more images to the image_tx queue.
if is_zipfile {
// Iterate over the zip files by index. Maybe we could do name, but that seems to require a seek.
if let Ok(fin) = File::open(&file_path) {
let mut bufreader = BufReader::new(fin);
if let Ok(mut zipfile) = zip::ZipArchive::new(bufreader) {
let filenames = zipfile.file_names().map(String::from).collect::<Vec<String>>();
for filename in &filenames {
// Try to pull and check the extension:
if let Ok(mut compressed_file) = zipfile.by_name(filename) {
if !compressed_file.is_file() { continue; }

let mut valid_image = false;
for &ext in SUPPORTED_IMAGE_EXTENSIONS {
if filename.ends_with(ext) {
valid_image = true;
break;
}
}
if !valid_image { continue; }

let mut data:Vec<u8> = vec![];
compressed_file.read(&mut data);

if let Ok(img) = IndexedImage::from_memory(&mut data, filename.to_string(), format!("{}/{}", &file_path.display(), filename)) {
tx.send(img);
}
}
}
}
}
} else if is_image_file {
match IndexedImage::from_file_path(&file_path.as_path()) {
Ok(img) => {
tx.send(img);
},
Err(e) => {
println!("Error processing {}: {}", file_path.display(), e);
}
}
}
} // Else we have to skip it. No extension.
}
});
}

(file_rx, image_rx)
}

fn is_supported_extension(path:&PathBuf) -> bool {
if let Some(extension) = path.extension().and_then(|s| s.to_str()) {
let ext = extension.to_lowercase();
for &supported_extension in &["png", "bmp", "jpg", "jpeg", "jfif", "gif", "tiff", "pnm", "webp", "ico", "tga", "exr"] {
if ext == supported_extension {
return true;
}
}
}
return false;
}
25 changes: 14 additions & 11 deletions src/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ const IMAGE_SCHEMA_V1: &'static str = "CREATE TABLE images (
id INTEGER PRIMARY KEY,
filename TEXT NOT NULL,
path TEXT NOT NULL,
container_path TEXT NOT NULL,
image_width INTEGER,
image_height INTEGER,
thumbnail BLOB,
Expand All @@ -56,6 +57,7 @@ const SELECT_FIELDS: &'static str = "
images.id,
images.filename,
images.path,
images.container_path,
images.image_width,
images.image_height,
images.thumbnail
Expand All @@ -69,10 +71,11 @@ fn indexed_image_from_row(row: &Row) -> SQLResult<IndexedImage> {
id: row.get(0)?,
filename: row.get(1)?,
path: row.get(2)?,
resolution: (row.get(3)?, row.get(4)?),
thumbnail: row.get(5)?,
created: Instant::now(), //row.get(6)?
indexed: Instant::now(), //row.get(7)?
container_path: row.get(3)?,
resolution: (row.get(4)?, row.get(5)?),
thumbnail: row.get(6)?,
created: Instant::now(), //row.get(7)?
indexed: Instant::now(), //row.get(8)?
tags: HashMap::new(),
phash: None,
visual_hash: None,
Expand Down Expand Up @@ -250,8 +253,8 @@ impl Engine {
fn insert_image(conn: &mut Connection, mut img:IndexedImage) -> Result<()> {
// Update the images table first...
conn.execute(
"INSERT INTO images (filename, path, image_width, image_height, thumbnail) VALUES (?, ?, ?, ?, ?)",
params![img.filename, img.path, img.resolution.0, img.resolution.1, img.thumbnail,]
"INSERT INTO images (filename, path, container_path, image_width, image_height, thumbnail) VALUES (?, ?, ?, ?, ?, ?)",
params![img.filename, img.path, img.container_path, img.resolution.0, img.resolution.1, img.thumbnail,]
)?;
img.id = conn.last_insert_rowid();

Expand Down Expand Up @@ -346,17 +349,17 @@ impl Engine {
// Parse and process results.
let result_cursor = prepared_statement.query_map(params![], |row| {
let mut img = indexed_image_from_row(row).expect("Unable to decode image in database.");
img.visual_hash = row.get(6).ok();
img.visual_hash = row.get(7).ok();
img.tags = HashMap::new();
let maybe_tag_data: SQLResult<JSONValue> = row.get(7);
let maybe_tag_data: SQLResult<JSONValue> = row.get(8);
if let Ok(tag_data) = maybe_tag_data {
if let Some(map_obj) = tag_data.as_object() {
for (k, v) in map_obj.iter() {
img.tags.insert(k.to_string(), v.to_string());
}
}
}
img.distance_from_query = row.get(8).ok();
img.distance_from_query = row.get(9).ok();
Ok(img)
})?;

Expand Down Expand Up @@ -402,8 +405,8 @@ impl Engine {
)).expect("The query for query_by_image_hash_from_image is wrong! The developer messed up!");
let img_cursor = stmt.query_map(params![indexed_image.visual_hash, self.max_distance_from_query], |row|{
let mut img = indexed_image_from_row(row).expect("Unable to unwrap result from database");
img.visual_hash = Some(row.get(6)?);
img.distance_from_query = Some(row.get(7)?);
img.visual_hash = Some(row.get(7)?);
img.distance_from_query = Some(row.get(8)?);
Ok(img)
}).unwrap();

Expand Down
36 changes: 26 additions & 10 deletions src/indexed_image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@ use anyhow::Result;
use std::collections::HashMap;
use std::error::Error;
use std::fs::File;
use std::io::BufReader;
use std::io::{BufReader, Cursor, Read, BufRead, Seek};
use std::time::Instant;
use std::path::Path;
//use exif::{Field, Exif, };
use image::{ImageError, GenericImageView};
use image::{ImageError, GenericImageView, DynamicImage, ImageFormat};
use tract_onnx::prelude::tract_itertools::Itertools;

use crate::image_hashes::phash;
use crate::image_hashes::mlhash;
Expand All @@ -17,8 +18,9 @@ pub const THUMBNAIL_SIZE: (u32, u32) = (256, 256);
#[derive(Clone, Debug)]
pub struct IndexedImage {
pub id: i64,
pub filename: String,
pub filename: String, // This is for display and search, not for actually opening the file.
pub path: String,
pub container_path: String, // "" by default, but if the image is inside of a zipfile, this will be the path to the zipfile.
pub resolution: (u32, u32),
pub thumbnail: Vec<u8>,
pub created: Instant,
Expand All @@ -35,21 +37,34 @@ pub struct IndexedImage {

impl IndexedImage {
pub fn from_file_path(path:&Path) -> Result<Self> {
let mut img = image::open(path)?;
let mut file = File::open(path)?;
let mut bytes = vec![];
let _bytes_read = file.read_to_end(&mut bytes)?;
//let mut img = image::io::Reader::new(&mut image_buffer).decode()?;

let filename:String = path.file_name().unwrap().to_str().unwrap().to_string();
let pathstring:String = stringify_filepath(path);

IndexedImage::from_memory(&mut bytes, filename, pathstring)
}

pub fn from_memory(bytes:&mut Vec<u8>, filename:String, path:String) -> Result<Self> {
let mut cursor = Cursor::new(bytes);

//let mut img = image::open(path)?;
//let mut img:DynamicImage = image::load_from_memory(bytes)?;
//let mut img:DynamicImage = image::load_from_memory_with_format(bytes.as_slice(), ImageFormat::from_path(&path)?)?;
let mut img:DynamicImage = image::io::Reader::new(&mut cursor).with_guessed_format()?.decode()?;
let thumb = img.thumbnail(THUMBNAIL_SIZE.0, THUMBNAIL_SIZE.1).to_rgb8();
let thumbnail_width = thumb.width();
let thumbnail_height = thumb.height();
let qoi_thumb = qoi::encode_to_vec(&thumb.into_raw(), thumbnail_width, thumbnail_height).expect("Unable to generate compressed thumbnail.");

// Also parse the EXIF data.
// TODO: I wish we didn't need to re-read the file. :|
let fin = File::open(path)?;
let mut image_buffer = std::io::BufReader::new(fin);
cursor.seek(std::io::SeekFrom::Start(0));
let mut tags = HashMap::<String, String>::new();
let mut exifreader = exif::Reader::new();
if let Ok(exif) = exifreader.read_from_container(&mut image_buffer) {
if let Ok(exif) = exifreader.read_from_container(&mut cursor) {
for field in exif.fields() {
tags.insert(field.tag.to_string(), field.display_value().to_string());
}
Expand All @@ -61,8 +76,9 @@ impl IndexedImage {
Ok(
IndexedImage {
id: 0,
filename: path.file_name().unwrap().to_str().unwrap().to_string(),
path: stringify_filepath(path),
filename: filename,
path: path,
container_path: "".to_owned(),
resolution: (img.width(), img.height()),
thumbnail: qoi_thumb,
created: Instant::now(),
Expand Down