Skip to content

Commit

Permalink
Incorporate HIP1 and TYC2 supplement 1 in cross match
Browse files Browse the repository at this point in the history
  • Loading branch information
ajtribick committed Dec 20, 2021
1 parent 5543ecc commit 088cbe9
Show file tree
Hide file tree
Showing 8 changed files with 107 additions and 32 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "celestia_gaia"
authors = ["Andrew Tribick <[email protected]>"]
version = "0.1.0"
version = "0.2.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand Down
10 changes: 10 additions & 0 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use std::error;
use std::fmt;
use std::io;
use std::num::{ParseFloatError, ParseIntError};
use std::str::Utf8Error;

use pyo3::exceptions::PyRuntimeError;
use pyo3::PyErr;
Expand All @@ -38,6 +39,7 @@ pub enum AppError {
MissingId(String),
InvalidFloat(ParseFloatError),
InvalidInt(ParseIntError),
InvalidUtf8(Utf8Error),
Io(io::Error),
Xml(quick_xml::Error),
Capacity(arrayvec::CapacityError),
Expand Down Expand Up @@ -76,6 +78,7 @@ impl fmt::Display for AppError {
Self::MissingId(s) => write!(f, "Missing ID ({})", s),
Self::InvalidFloat(_) => f.write_str("Failed to parse float"),
Self::InvalidInt(_) => f.write_str("Failed to parse int"),
Self::InvalidUtf8(_) => f.write_str("Invalid UTF-8"),
Self::Io(_) => f.write_str("IO Error"),
Self::Xml(_) => f.write_str("XML Error"),
Self::Capacity(_) => f.write_str("Capacity error"),
Expand All @@ -90,6 +93,7 @@ impl error::Error for AppError {
match self {
Self::InvalidFloat(e) => Some(e),
Self::InvalidInt(e) => Some(e),
Self::InvalidUtf8(e) => Some(e),
Self::Io(e) => Some(e),
Self::Xml(e) => Some(e),
Self::Capacity(e) => Some(e),
Expand Down Expand Up @@ -123,6 +127,12 @@ impl From<ParseIntError> for AppError {
}
}

impl From<Utf8Error> for AppError {
fn from(e: Utf8Error) -> Self {
Self::InvalidUtf8(e)
}
}

impl From<quick_xml::Error> for AppError {
fn from(e: quick_xml::Error) -> Self {
Self::Xml(e)
Expand Down
24 changes: 22 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,10 @@ use crate::tychip::load_tyc2hip;
use crate::votable::VotableReader;
use crate::xmatch::Crossmatcher;

const HIP1_PATTERN: &str = "**/gaiaedr3-hip1.vot.gz";
const HIP2_PATTERN: &str = "**/gaiaedr3-hip2-*.vot.gz";
const TYC2TDSC_PATTERN: &str = "**/gaiaedr3-tyctdsc-*.vot.gz";
const TYC2_SUPPL1_PATTERN: &str = "**/gaiaedr3-tyc2suppl1.vot.gz";
const XMATCH_PATTERN: &str = "**/xmatch-*.vot.gz";
const DISTANCE_PATTERN: &str = "**/gaiaedr3-distance-*.vot.gz";

Expand All @@ -55,21 +57,39 @@ fn full_crossmatch(
let tyc2hip = load_tyc2hip(gaia_path, vizier_path)?;
let mut crossmatcher = Crossmatcher::new(tyc2hip);

let hip1_pattern = Glob::new(HIP1_PATTERN)?.compile_matcher();
let hip2_pattern = Glob::new(HIP2_PATTERN)?.compile_matcher();
let tyc2tdsc_pattern = Glob::new(TYC2TDSC_PATTERN)?.compile_matcher();
let tyc2_suppl1_pattern = Glob::new(TYC2_SUPPL1_PATTERN)?.compile_matcher();
for entry in read_dir(gaia_path)? {
let entry = entry?;
if !entry.metadata()?.is_file() {
continue;
}
let entry_path = entry.path();
if hip2_pattern.is_match(&entry_path) {
if hip1_pattern.is_match(&entry_path) {
println!("Processing HIP1 entry: {}", entry_path.to_string_lossy());
let file = File::open(entry_path)?;
let reader = VotableReader::new(file)?;
crossmatcher.add_hip(reader)?;
} else if hip2_pattern.is_match(&entry_path) {
println!("Processing HIP2 entry: {}", entry_path.to_string_lossy());
let file = File::open(entry_path)?;
let reader = VotableReader::new(file)?;
crossmatcher.add_hip(reader)?;
} else if tyc2tdsc_pattern.is_match(&entry_path) {
println!("Processing TYC2TDSC entry: {}", entry_path.to_string_lossy());
println!(
"Processing TYC2TDSC entry: {}",
entry_path.to_string_lossy()
);
let file = File::open(entry_path)?;
let reader = VotableReader::new(file)?;
crossmatcher.add_tyc(reader)?;
} else if tyc2_suppl1_pattern.is_match(&entry_path) {
println!(
"Processing TYC2 supplement 1 entry: {}",
entry_path.to_string_lossy()
);
let file = File::open(entry_path)?;
let reader = VotableReader::new(file)?;
crossmatcher.add_tyc(reader)?;
Expand Down
2 changes: 1 addition & 1 deletion src/tychip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ fn load_tyc2tdsc_hip(path: &Path, hip2tyc: &mut TycHipMap) -> Result<(), AppErro
.read_i32(hip_col)?
.ok_or_else(|| AppError::missing_id("hip"))?,
);
let cmp = accessor.read_char::<2>(comp_col)?;
let cmp = accessor.read_string::<2>(comp_col)?;

hip2tyc.add(hip, id_tycho, cmp);
}
Expand Down
6 changes: 5 additions & 1 deletion src/votable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ pub enum DataType {
Float,
Double,
Char,
String(Option<NonZeroUsize>),
}

impl DataType {
Expand All @@ -60,7 +61,9 @@ impl DataType {
Self::Long => NonZeroUsize::new(8),
Self::Float => NonZeroUsize::new(4),
Self::Double => NonZeroUsize::new(8),
Self::Char => None,
Self::Char => NonZeroUsize::new(1),
Self::String(Some(s)) => Some(*s),
Self::String(None) => None,
}
}
}
Expand All @@ -74,6 +77,7 @@ impl fmt::Display for DataType {
Self::Float => f.write_str("float"),
Self::Double => f.write_str("double"),
Self::Char => f.write_str("char"),
Self::String(_) => f.write_str("char array"),
}
}
}
Expand Down
65 changes: 48 additions & 17 deletions src/votable/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ use std::cmp;
use std::collections::HashMap;
use std::io::{self, BufRead, BufReader, ErrorKind, Read};
use std::mem;
use std::num::NonZeroUsize;
use std::str;

use arrayvec::ArrayVec;
use bitvec::prelude::*;
Expand All @@ -34,34 +36,44 @@ use super::{DataType, VOTABLE_NS};

use crate::error::AppError;

enum ArraySize {
None,
Fixed(NonZeroUsize),
Variable,
}

fn parse_field(attributes: Attributes) -> Result<(Vec<u8>, DataType), AppError> {
let mut name = None;
let mut datatype = None;
let mut is_variable_length_array = false;
let mut array_size = ArraySize::None;
for attribute_result in attributes {
let attribute = attribute_result?;
match attribute.key {
b"name" => name = Some(attribute.value.into_owned()),
b"datatype" => datatype = Some(DataType::parse_bytes(&attribute.value)?),
b"arraysize" => {
if attribute.value.as_ref() == b"*" {
is_variable_length_array = true;
array_size = ArraySize::Variable;
} else {
return Err(AppError::parse("Fixed size arrays not supported"));
let size = str::from_utf8(&attribute.value)?.parse()?;
array_size = ArraySize::Fixed(
NonZeroUsize::new(size)
.ok_or(AppError::parse("Zero-length arrays not supported"))?,
);
}
}
_ => (),
}
}

match (name, datatype) {
(Some(n), Some(DataType::Char)) if is_variable_length_array => Ok((n, DataType::Char)),
(Some(_), Some(DataType::Char)) => Err(AppError::parse("Char fields not supported")),
(Some(_), Some(_)) if is_variable_length_array => {
Err(AppError::parse("Non-string arrays not supported"))
}
(Some(n), Some(dt)) => Ok((n, dt)),
_ => Err(AppError::parse("Field must have name and datatype")),
let name = name.ok_or(AppError::parse("Field name missing"))?;
let datatype = datatype.ok_or(AppError::parse("Field datatype missing"))?;

match (datatype, array_size) {
(DataType::Char, ArraySize::Variable) => Ok((name, DataType::String(None))),
(DataType::Char, ArraySize::Fixed(n)) => Ok((name, DataType::String(Some(n)))),
(_, ArraySize::None) => Ok((name, datatype)),
_ => Err(AppError::parse("Non-string arrays not supported")),
}
}

Expand Down Expand Up @@ -307,23 +319,42 @@ impl<'a> RecordAccessor<'a> {
Ok((&self.data[offset..offset + mem::size_of::<f64>()]).read_f64::<BigEndian>()?)
}

pub fn read_char<const CAP: usize>(
pub fn read_string<const CAP: usize>(
&self,
ordinal: usize,
) -> Result<ArrayVec<u8, CAP>, AppError> {
let field_type = self.field_types[ordinal];
if field_type != DataType::Char {
return Err(AppError::field_type(ordinal, DataType::Char, field_type));
if !matches!(field_type, DataType::Char | DataType::String(_)) {
return Err(AppError::field_type(
ordinal,
DataType::String(None),
field_type,
));
}

if self.mask[ordinal] {
return Ok(ArrayVec::new());
}

let offset = self.field_offsets[ordinal];
let data_offset = offset + mem::size_of::<u32>();
let length = (&self.data[offset..data_offset]).read_u32::<BigEndian>()? as usize;
Ok(self.data[data_offset..data_offset + length].try_into()?)
match field_type {
DataType::Char => Ok([self.data[offset]].as_slice().try_into()?),
DataType::String(Some(n)) => {
let slice = &self.data[offset..offset + n.get()];
let length = slice.iter().position(|&b| b == 0).unwrap_or(slice.len());
if length <= CAP {
Ok(slice[..length].try_into()?)
} else {
Err(AppError::Parse("String field too long"))
}
}
DataType::String(None) => {
let data_offset = offset + mem::size_of::<u32>();
let length = (&self.data[offset..data_offset]).read_u32::<BigEndian>()? as usize;
Ok(self.data[data_offset..data_offset + length].try_into()?)
}
_ => unreachable!(),
}
}
}

Expand Down
28 changes: 19 additions & 9 deletions src/xmatch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ struct TycOrdinals {
pub de_deg: usize,
pub bt_mag: usize,
pub vt_mag: usize,
pub ep_ra1990: usize,
pub ep_de1990: usize,
pub ep_ra1990: Option<usize>,
pub ep_de1990: Option<usize>,
}

impl TycOrdinals {
Expand All @@ -106,8 +106,8 @@ impl TycOrdinals {
de_deg: reader.ordinal(b"tyc_dec")?,
bt_mag: reader.ordinal(b"bt_mag")?,
vt_mag: reader.ordinal(b"vt_mag")?,
ep_ra1990: reader.ordinal(b"ep_ra1990")?,
ep_de1990: reader.ordinal(b"ep_de1990")?,
ep_ra1990: reader.ordinal(b"ep_ra1990").ok(),
ep_de1990: reader.ordinal(b"ep_de1990").ok(),
})
}
}
Expand Down Expand Up @@ -378,7 +378,9 @@ impl CrossmatchStar {
ra: accessor.read_f64(ordinals.hip_ra)?,
dec: accessor.read_f64(ordinals.hip_dec)?,
},
hp_mag: accessor.read_f64(ordinals.hp_mag)?,
hp_mag: accessor
.read_f64(ordinals.hp_mag)
.or_else(|_| accessor.read_f32(ordinals.hp_mag).map(|h| h as f64))?,
bt_mag: f32::NAN,
vt_mag: f32::NAN,
epoch_ra: 1.25,
Expand All @@ -399,10 +401,18 @@ impl CrossmatchStar {
dec: accessor.read_f64(ordinals.de_deg)?,
},
hp_mag: f64::NAN,
bt_mag: accessor.read_f32(ordinals.bt_mag)?,
vt_mag: accessor.read_f32(ordinals.vt_mag)?,
epoch_ra: accessor.read_f32(ordinals.ep_ra1990)?,
epoch_dec: accessor.read_f32(ordinals.ep_de1990)?,
bt_mag: accessor
.read_f32(ordinals.bt_mag)
.or_else(|_| accessor.read_f64(ordinals.bt_mag).map(|x| x as f32))?,
vt_mag: accessor
.read_f32(ordinals.vt_mag)
.or_else(|_| accessor.read_f64(ordinals.vt_mag).map(|x| x as f32))?,
epoch_ra: ordinals
.ep_ra1990
.map_or(Ok(1.25), |ord| accessor.read_f32(ord))?,
epoch_dec: ordinals
.ep_de1990
.map_or(Ok(1.25), |ord| accessor.read_f32(ord))?,
})
}

Expand Down

0 comments on commit 088cbe9

Please sign in to comment.