Skip to content

Commit

Permalink
Incorporate TYC2 supplement 1 in ID crossmatch
Browse files Browse the repository at this point in the history
  • Loading branch information
ajtribick committed Dec 20, 2021
1 parent db04260 commit 5543ecc
Show file tree
Hide file tree
Showing 6 changed files with 289 additions and 110 deletions.
8 changes: 4 additions & 4 deletions celestia_gaia/gaia_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ def download_tyc2tdsc_xmatch():
tyc2tdsc_xmatch_file = GAIA_EDR3_DIR/'tyc2tdsc_hip_xmatch.vot.gz'
if (
tyc2tdsc_xmatch_file.exists()
and not confirm_action('TYC2TDSC-based TYC-HIP crossmatch exists, replace?')
and not confirm_action('Re-download TYC2TDSC-HIP identifier map?')
):
return

Expand Down Expand Up @@ -311,15 +311,15 @@ def download_gaia() -> None:

hip_ranges = _getranges(1, _HIP_MAX, GAIA_EDR3_DIR, 'gaiaedr3-hip2-*.vot.gz')
if not hip_ranges:
if confirm_action('HIP2 cross-match data already downloaded, replace?'):
if confirm_action('HIP2-Gaia cross-match data already downloaded, replace?'):
hip_ranges = MultiRange(1, _HIP_MAX)
download_gaia_hip2(hip_ranges)

download_gaia_hip1()

tyc_ranges = _getranges(1, _TYC_MAX, GAIA_EDR3_DIR, 'gaiaedr3-tyctdsc-*.vot.gz')
if not tyc_ranges:
if confirm_action('TYC TDSC cross-match data already downloaded, replace?'):
if confirm_action('TYC2TDSC-Gaia cross-match data already downloaded, replace?'):
tyc_ranges = MultiRange(1, _TYC_MAX)
download_gaia_tyctdsc(tyc_ranges)

Expand All @@ -332,7 +332,7 @@ def build_xmatches() -> None:
not (GAIA_EDR3_DIR/'xmatch-gaia-hiptyc.vot.gz').exists()
or confirm_action('Re-generate HIP/TYC cross-match?')
):
build_xmatch(GAIA_EDR3_DIR, 'xmatch-gaia-hiptyc.vot.gz')
build_xmatch(GAIA_EDR3_DIR, VIZIER_DIR, 'xmatch-gaia-hiptyc.vot.gz')


def download_gaia_distances(chunk_size: int = 250000) -> None:
Expand Down
27 changes: 23 additions & 4 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use std::borrow::Cow;
use std::error;
use std::fmt;
use std::io;
use std::num::{ParseFloatError, ParseIntError};

use pyo3::exceptions::PyRuntimeError;
use pyo3::PyErr;
Expand All @@ -35,6 +36,8 @@ pub enum AppError {
FieldType(usize, DataType, DataType),
MissingField(Cow<'static, [u8]>),
MissingId(String),
InvalidFloat(ParseFloatError),
InvalidInt(ParseIntError),
Io(io::Error),
Xml(quick_xml::Error),
Capacity(arrayvec::CapacityError),
Expand Down Expand Up @@ -71,10 +74,12 @@ impl fmt::Display for AppError {
),
Self::MissingField(s) => write!(f, "Missing field {}", String::from_utf8_lossy(s)),
Self::MissingId(s) => write!(f, "Missing ID ({})", s),
Self::Io(e) => write!(f, "Io error: {}", e),
Self::Xml(e) => write!(f, "XML error: {}", e),
Self::Capacity(e) => write!(f, "Capacity error: {}", e),
Self::Other(e) => write!(f, "Error: {}", e),
Self::InvalidFloat(_) => f.write_str("Failed to parse float"),
Self::InvalidInt(_) => f.write_str("Failed to parse int"),
Self::Io(_) => f.write_str("IO Error"),
Self::Xml(_) => f.write_str("XML Error"),
Self::Capacity(_) => f.write_str("Capacity error"),
Self::Other(_) => f.write_str("Error occurred"),
Self::Thread(e) => write!(f, "Thread error {:?}", e),
}
}
Expand All @@ -83,6 +88,8 @@ impl fmt::Display for AppError {
impl error::Error for AppError {
fn source(&self) -> Option<&(dyn error::Error + 'static)> {
match self {
Self::InvalidFloat(e) => Some(e),
Self::InvalidInt(e) => Some(e),
Self::Io(e) => Some(e),
Self::Xml(e) => Some(e),
Self::Capacity(e) => Some(e),
Expand All @@ -104,6 +111,18 @@ impl From<io::ErrorKind> for AppError {
}
}

impl From<ParseFloatError> for AppError {
fn from(e: ParseFloatError) -> Self {
Self::InvalidFloat(e)
}
}

impl From<ParseIntError> for AppError {
fn from(e: ParseIntError) -> Self {
Self::InvalidInt(e)
}
}

impl From<quick_xml::Error> for AppError {
fn from(e: quick_xml::Error) -> Self {
Self::Xml(e)
Expand Down
40 changes: 14 additions & 26 deletions src/hip2dist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/

use std::borrow::Cow;
use std::fs::File;
use std::io::{self, BufReader, BufWriter, ErrorKind, Write};
use std::path::Path;
Expand Down Expand Up @@ -52,24 +53,24 @@ struct DistanceInfo {
upper: f64,
}

fn load_priors(path: impl AsRef<Path>) -> io::Result<Vec<PriorInfo>> {
fn load_priors(path: impl AsRef<Path>) -> Result<Vec<PriorInfo>, AppError> {
let file = File::open(path)?;
let mut reader = CsvReader::new(BufReader::new(file))?;
let healpix_col = reader
.index("healpix")
.ok_or_else(|| io::Error::new(ErrorKind::InvalidData, "Missing healpix field"))?;
.ok_or(AppError::MissingField(Cow::Borrowed(b"healpix")))?;
let ggd_l_col = reader
.index("GGDrlen")
.ok_or_else(|| io::Error::new(ErrorKind::InvalidData, "Missing GGDrlen field"))?;
.ok_or(AppError::MissingField(Cow::Borrowed(b"GGDrlen")))?;
let ggd_alpha_col = reader
.index("GGDalpha")
.ok_or_else(|| io::Error::new(ErrorKind::InvalidData, "Missing GGDalpha field"))?;
.ok_or(AppError::MissingField(Cow::Borrowed(b"GGDalpha")))?;
let ggd_beta_col = reader
.index("GGDbeta")
.ok_or_else(|| io::Error::new(ErrorKind::InvalidData, "Missing field GGDbeta"))?;
.ok_or(AppError::MissingField(Cow::Borrowed(b"GGDbeta")))?;
let edsd_length_col = reader
.index("EDSDrlen")
.ok_or_else(|| io::Error::new(ErrorKind::InvalidData, "Missing EDSDrlen field"))?;
.ok_or(AppError::MissingField(Cow::Borrowed(b"EDSDrlen")))?;

let mut result = Vec::with_capacity(12288);
while reader.next()?.is_some() {
Expand All @@ -78,28 +79,15 @@ fn load_priors(path: impl AsRef<Path>) -> io::Result<Vec<PriorInfo>> {
.parse()
.map_err(|e| io::Error::new(ErrorKind::InvalidData, e))?;
if healpix != result.len() {
return Err(io::Error::new(
ErrorKind::InvalidData,
"Prior file is not sequential",
));
return Err(
io::Error::new(ErrorKind::InvalidData, "Prior file is not sequential").into(),
);
}
let prior_info = PriorInfo {
ggd_l: reader
.field(ggd_l_col)
.parse()
.map_err(|e| io::Error::new(ErrorKind::InvalidData, e))?,
ggd_alpha: reader
.field(ggd_alpha_col)
.parse()
.map_err(|e| io::Error::new(ErrorKind::InvalidData, e))?,
ggd_beta: reader
.field(ggd_beta_col)
.parse()
.map_err(|e| io::Error::new(ErrorKind::InvalidData, e))?,
edsd_length: reader
.field(edsd_length_col)
.parse()
.map_err(|e| io::Error::new(ErrorKind::InvalidData, e))?,
ggd_l: reader.field(ggd_l_col).parse()?,
ggd_alpha: reader.field(ggd_alpha_col).parse()?,
ggd_beta: reader.field(ggd_beta_col).parse()?,
edsd_length: reader.field(edsd_length_col).parse()?,
};
result.push(prior_info);
}
Expand Down
25 changes: 4 additions & 21 deletions src/hip2dist/estimate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,27 +95,10 @@ impl<B: BufRead + Send> Parser<B> {
let mut processed = 0;
while self.reader.next()?.is_some() {
let hip_info = HipInfo {
hip: HipId(
self.reader
.field(self.hip_col)
.parse()
.map_err(|e| io::Error::new(ErrorKind::InvalidData, e))?,
),
plx: self
.reader
.field(self.plx_col)
.parse()
.map_err(|e| io::Error::new(ErrorKind::InvalidData, e))?,
e_plx: self
.reader
.field(self.e_plx_col)
.parse()
.map_err(|e| io::Error::new(ErrorKind::InvalidData, e))?,
healpix: self
.reader
.field(self.healpix_col)
.parse()
.map_err(|e| io::Error::new(ErrorKind::InvalidData, e))?,
hip: HipId(self.reader.field(self.hip_col).parse()?),
plx: self.reader.field(self.plx_col).parse()?,
e_plx: self.reader.field(self.e_plx_col).parse()?,
healpix: self.reader.field(self.healpix_col).parse()?,
};

self.sender
Expand Down
85 changes: 30 additions & 55 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ mod astro;
mod csv;
mod error;
mod hip2dist;
mod tychip;
mod votable;
mod xmatch;

Expand All @@ -41,77 +42,41 @@ use crate::tychip::load_tyc2hip;
use crate::votable::VotableReader;
use crate::xmatch::Crossmatcher;

const HIP_PATTERN: &str = "**/gaiaedr3-hip2-*.vot.gz";
const TYC_PATTERN: &str = "**/gaiaedr3-tyctdsc-*.vot.gz";
const HIP2_PATTERN: &str = "**/gaiaedr3-hip2-*.vot.gz";
const TYC2TDSC_PATTERN: &str = "**/gaiaedr3-tyctdsc-*.vot.gz";
const XMATCH_PATTERN: &str = "**/xmatch-*.vot.gz";
const DISTANCE_PATTERN: &str = "**/gaiaedr3-distance-*.vot.gz";

fn load_tyc2hip(path: &Path) -> Result<HashMap<TycId, HipId>, AppError> {
let mut path = PathBuf::from(path);
path.push("tyc2tdsc_hip_xmatch.vot.gz");

let file = File::open(path)?;
let mut reader = VotableReader::new(file)?;

let id_tycho_col = reader.ordinal(b"id_tycho")?;
let hip_col = reader.ordinal(b"hip")?;
let comp_col = reader.ordinal(b"cmp")?;

let mut hip2tyc = HashMap::new();
while let Some(accessor) = reader.read()? {
let id_tycho = TycId(
accessor
.read_i64(id_tycho_col)?
.ok_or_else(|| AppError::missing_id("id_tycho"))?,
);
let hip = HipId(
accessor
.read_i32(hip_col)?
.ok_or_else(|| AppError::missing_id("hip"))?,
);
let cmp = accessor.read_char::<2>(comp_col)?;

match hip2tyc.entry(hip) {
Entry::Vacant(v) => {
v.insert((id_tycho, cmp));
}
Entry::Occupied(mut o) => {
if cmp < o.get().1 {
o.insert((id_tycho, cmp));
}
}
}
}

Ok(hip2tyc.into_iter().map(|(h, (t, _))| (t, h)).collect())
}

fn full_crossmatch(path: &Path, output_name: &str) -> Result<(), AppError> {
let tyc2hip = load_tyc2hip(path)?;
fn full_crossmatch(
gaia_path: &Path,
vizier_path: &Path,
output_name: &str,
) -> Result<(), AppError> {
let tyc2hip = load_tyc2hip(gaia_path, vizier_path)?;
let mut crossmatcher = Crossmatcher::new(tyc2hip);

let hip_pattern = Glob::new(HIP_PATTERN)?.compile_matcher();
let tyc_pattern = Glob::new(TYC_PATTERN)?.compile_matcher();
for entry in read_dir(path)? {
let hip2_pattern = Glob::new(HIP2_PATTERN)?.compile_matcher();
let tyc2tdsc_pattern = Glob::new(TYC2TDSC_PATTERN)?.compile_matcher();
for entry in read_dir(gaia_path)? {
let entry = entry?;
if !entry.metadata()?.is_file() {
continue;
}
let entry_path = entry.path();
if hip_pattern.is_match(&entry_path) {
println!("Processing HIP entry: {}", entry_path.to_string_lossy());
if hip2_pattern.is_match(&entry_path) {
println!("Processing HIP2 entry: {}", entry_path.to_string_lossy());
let file = File::open(entry_path)?;
let reader = VotableReader::new(file)?;
crossmatcher.add_hip(reader)?;
} else if tyc_pattern.is_match(&entry_path) {
println!("Processing TYC entry: {}", entry_path.to_string_lossy());
} else if tyc2tdsc_pattern.is_match(&entry_path) {
println!("Processing TYC2TDSC entry: {}", entry_path.to_string_lossy());
let file = File::open(entry_path)?;
let reader = VotableReader::new(file)?;
crossmatcher.add_tyc(reader)?;
}
}

let mut output_path = path.to_path_buf();
let mut output_path = gaia_path.to_path_buf();
output_path.push(output_name);

let file = File::create(output_path)?;
Expand Down Expand Up @@ -210,9 +175,19 @@ fn apply_distances(gaia_dir: &Path, source_ids: &[i64]) -> Result<Vec<f32>, AppE
#[pymodule]
fn celestia_gaia(_py: Python, m: &PyModule) -> PyResult<()> {
#[pyfn(m)]
#[pyo3(name = "build_xmatch", text_signature = "()")]
fn build_xmatch_py<'py>(_py: Python<'py>, gaia_dir: &PyAny, output_name: &str) -> PyResult<()> {
full_crossmatch(gaia_dir.str()?.to_str()?.as_ref(), output_name)?;
#[pyo3(
name = "build_xmatch",
text_signature = "(gaia_dir, vizier_dir, output_name, /)"
)]
fn build_xmatch_py<'py>(
_py: Python<'py>,
gaia_dir: &PyAny,
vizier_dir: &PyAny,
output_name: &str,
) -> PyResult<()> {
let gaia_dir = gaia_dir.str()?.to_str()?.as_ref();
let vizier_dir = vizier_dir.str()?.to_str()?.as_ref();
full_crossmatch(gaia_dir, vizier_dir, output_name)?;
Ok(())
}

Expand Down
Loading

0 comments on commit 5543ecc

Please sign in to comment.