Skip to content

Commit

Permalink
Generate auxiliary files
Browse files Browse the repository at this point in the history
  • Loading branch information
ajtribick committed Aug 4, 2021
1 parent 8671370 commit f8e3adb
Show file tree
Hide file tree
Showing 9 changed files with 150 additions and 19 deletions.
33 changes: 26 additions & 7 deletions celestia_gaia/gaia_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,13 @@
from astroquery.gaia import Gaia
import numpy as np

from .directories import GAIA_EDR3_DIR
from .directories import AUXFILES_DIR, GAIA_EDR3_DIR
from .ranges import MultiRange
from .utils import confirm_action
from .celestia_gaia import build_hip_xmatch, build_tyc_xmatch, get_source_ids
from .celestia_gaia import (
build_hip_xmatch, build_tyc_xmatch, create_hip_aux_xmatch, create_tyc_aux_xmatch,
get_required_dist_source_ids,
)


_HIP_MAX = 120404
Expand Down Expand Up @@ -134,7 +137,7 @@ def download_gaia_tyc(ranges: MultiRange, chunk_size: int = 200) -> None:
"""Download TYC/TDSC data from the Gaia archive."""
for section in ranges.chunk_ranges(chunk_size):
tyc_file = (
GAIA_EDR3_DIR/f'gaiaedr3-tyctdsc-part{section.begin:04}-{section.end:04}.vot.gz'
GAIA_EDR3_DIR/f'gaiaedr3-tyctdsc-{section.begin:04}-{section.end:04}.vot.gz'
)

query = _tyc_query(section.begin, section.end)
Expand Down Expand Up @@ -179,13 +182,13 @@ def download_gaia() -> None:
def build_xmatches() -> None:
"""Build the cross-matches"""
if (
not (GAIA_EDR3_DIR / HIP_XMATCH).exists()
not (GAIA_EDR3_DIR/HIP_XMATCH).exists()
or confirm_action('Re-generate Hipparcos cross-match?')
):
build_hip_xmatch(GAIA_EDR3_DIR, HIP_XMATCH)

if (
not (GAIA_EDR3_DIR / TYC_XMATCH).exists()
not (GAIA_EDR3_DIR/TYC_XMATCH).exists()
or confirm_action('Re-generate Tycho cross-match?')
):
build_tyc_xmatch(GAIA_EDR3_DIR, TYC_XMATCH)
Expand All @@ -200,11 +203,11 @@ def download_gaia_distances(chunk_size: int = 250000) -> None:
LEFT JOIN external.gaiaedr3_distance d ON s.source_id = d.source_id
"""

source_ids: np.ndarray = get_source_ids(GAIA_EDR3_DIR)
source_ids: np.ndarray = get_required_dist_source_ids(GAIA_EDR3_DIR)
if len(source_ids) == 0 and confirm_action('Re-download distances?'):
for f in GAIA_EDR3_DIR.glob('gaiaedr3-distance-*.vot.gz'):
f.unlink()
source_ids = get_source_ids(GAIA_EDR3_DIR)
source_ids = get_required_dist_source_ids(GAIA_EDR3_DIR)
source_ids = source_ids.astype('int64') # https://github.com/numpy/numpy/issues/12264
position = 0
part = 1
Expand All @@ -226,3 +229,19 @@ def download_gaia_distances(chunk_size: int = 250000) -> None:
position = next_position
part += 1
p += 1

def create_aux_xmatch_files() -> None:
"""Creates the auxiliary cross-match CSV files"""
hip_xmatch_file = AUXFILES_DIR/'hip-gaia-xmatch.csv'
if (
not hip_xmatch_file.exists()
or confirm_action('HIP-Gaia xmatch csv exists, re-create it?')
):
create_hip_aux_xmatch(GAIA_EDR3_DIR/HIP_XMATCH, hip_xmatch_file)

tyc_xmatch_file = AUXFILES_DIR/'tyc-gaia-xmatch.csv'
if (
not tyc_xmatch_file.exists()
or confirm_action('TYC-Gaia xmatch csv exists, re-create it?')
):
create_tyc_aux_xmatch(GAIA_EDR3_DIR/TYC_XMATCH, tyc_xmatch_file)
1 change: 1 addition & 0 deletions celestia_gaia/hip_dist.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,5 @@ def build_hip2_distances() -> None:
):
return

print('Estimating HIP2 distances from parallaxes')
estimate_distances(_PRIORS_FILE, _HEALPIX_FILE, _DIST_FILE)
11 changes: 10 additions & 1 deletion celestia_gaia/make_stardb.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import numpy as np
from astropy.table import MaskedColumn, Table, join, unique, vstack

from .directories import OUTPUT_DIR, VIZIER_DIR
from .directories import AUXFILES_DIR, OUTPUT_DIR, VIZIER_DIR
from .parse_hip import process_hip
from .parse_tyc import process_tyc
from .spparse import CEL_UNKNOWN_STAR, parse_spectrum
Expand Down Expand Up @@ -408,3 +408,12 @@ def make_stardb() -> None:
contents = ['stars.dat', 'hdxindex.dat', 'saoxindex.dat', 'LICENSE.txt', 'CREDITS.md']
for f in contents:
zf.write(OUTPUT_DIR/f, arcname=Path(archivename)/f)

archivename = f'celestia-gaia-auxiliary-{VERSION}'
with ZipFile(f'{archivename}.zip', 'w', compression=ZIP_DEFLATED, compresslevel=9) as zf:
contents = [
'hip2dist.csv', 'hip-gaia-xmatch.csv', 'tyc-gaia-xmatch.csv',
'LICENSE.txt', 'CREDITS.md',
]
for f in contents:
zf.write(AUXFILES_DIR/f, arcname=Path(archivename)/f)
5 changes: 4 additions & 1 deletion download_data
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
"""Entry point for downloading the data files."""

from celestia_gaia.download_data import download_vizier, download_sao_xmatch
from celestia_gaia.gaia_data import build_xmatches, download_gaia, download_gaia_distances
from celestia_gaia.gaia_data import (
build_xmatches, download_gaia, download_gaia_distances, create_aux_xmatch_files,
)
from celestia_gaia.hip_dist import download_dist_prior, build_hip2_distances

download_vizier()
Expand All @@ -29,4 +31,5 @@ build_hip2_distances()
download_sao_xmatch()
download_gaia()
build_xmatches()
create_aux_xmatch_files()
download_gaia_distances()
51 changes: 46 additions & 5 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use std::{
borrow::Cow,
collections::{HashMap, HashSet},
fs::{read_dir, File},
io::{BufWriter, Write},
iter::FromIterator,
path::Path,
};
Expand All @@ -41,7 +42,10 @@ use crate::{
error::AppError,
hip2dist::estimate_distances,
votable::{VotableReader, VotableRecord},
xmatch::{Crossmatchable, Crossmatcher, GaiaStar, HipStar, TycStar},
xmatch::{
hip_csv_crossmatch, tyc_csv_crossmatch, Crossmatchable, Crossmatcher, GaiaStar, HipStar,
TycStar,
},
};

const HIP_PATTERN: &str = "**/gaiaedr3-hip2-*.vot.gz";
Expand Down Expand Up @@ -80,7 +84,7 @@ where
Ok(())
}

fn get_xmatch_source_ids(path: &Path) -> Result<Vec<i64>, AppError> {
fn get_required_dist_source_ids(path: &Path) -> Result<Vec<i64>, AppError> {
let pattern = Glob::new(XMATCH_PATTERN)?.compile_matcher();
let mut source_ids = HashSet::new();
for entry_result in read_dir(path)? {
Expand All @@ -92,6 +96,7 @@ fn get_xmatch_source_ids(path: &Path) -> Result<Vec<i64>, AppError> {

let file = File::open(entry_path)?;
let mut reader = VotableReader::new(file)?;

let ordinal = reader.ordinal(b"source_id")?;
while let Some(accessor) = reader.read()? {
let source_id = accessor
Expand All @@ -111,6 +116,7 @@ fn get_xmatch_source_ids(path: &Path) -> Result<Vec<i64>, AppError> {

let file = File::open(entry_path)?;
let mut reader = VotableReader::new(file)?;

let ordinal = reader.ordinal(b"source_id")?;
while let Some(accessor) = reader.read()? {
if let Some(source_id) = accessor.read_i64(ordinal)? {
Expand Down Expand Up @@ -195,10 +201,13 @@ fn celestia_gaia(_py: Python, m: &PyModule) -> PyResult<()> {
.map_err(Into::into)
}

#[pyfn(m, "get_source_ids")]
#[pyfn(m, "get_required_dist_source_ids")]
#[text_signature = "(gaia_dir, /)"]
fn get_source_ids_py<'py>(py: Python<'py>, gaia_dir: &PyAny) -> PyResult<&'py PyArray1<i64>> {
Ok(get_xmatch_source_ids(gaia_dir.str()?.to_str()?.as_ref())?.into_pyarray(py))
fn get_required_dist_source_ids_py<'py>(
py: Python<'py>,
gaia_dir: &PyAny,
) -> PyResult<&'py PyArray1<i64>> {
Ok(get_required_dist_source_ids(gaia_dir.str()?.to_str()?.as_ref())?.into_pyarray(py))
}

#[pyfn(m, "apply_distances")]
Expand Down Expand Up @@ -231,5 +240,37 @@ fn celestia_gaia(_py: Python, m: &PyModule) -> PyResult<()> {
.map_err(Into::into)
}

#[pyfn(m, "create_hip_aux_xmatch")]
#[text_signature = "(crossmatch_file, output_file, /)"]
fn create_hip_aux_xmatch_py<'py>(
_py: Python<'py>,
crossmatch_file: &'py PyAny,
output_file: &'py PyAny,
) -> PyResult<()> {
let input = File::open(crossmatch_file.str()?.to_str()?)?;
let reader = VotableReader::new(input)?;
let output = File::create(output_file.str()?.to_str()?)?;
let mut writer = BufWriter::new(output);
hip_csv_crossmatch(reader, &mut writer)?;
writer.flush()?;
Ok(())
}

#[pyfn(m, "create_tyc_aux_xmatch")]
#[text_signature = "(crossmatch_file, output_file, /)"]
fn create_tyc_aux_xmatch_py<'py>(
_py: Python<'py>,
crossmatch_file: &'py PyAny,
output_file: &'py PyAny,
) -> PyResult<()> {
let input = File::open(crossmatch_file.str()?.to_str()?)?;
let reader = VotableReader::new(input)?;
let output = File::create(output_file.str()?.to_str()?)?;
let mut writer = BufWriter::new(output);
tyc_csv_crossmatch(reader, &mut writer)?;
writer.flush()?;
Ok(())
}

Ok(())
}
4 changes: 3 additions & 1 deletion src/votable/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ impl<R: Read> VotableReader<R> {
let mut is_binary2 = false;
loop {
match xml_reader.read_namespaced_event(&mut buf, &mut ns_buf)? {
(Some(VOTABLE_NS), Event::Start(ref e)) => match e.name() {
(Some(VOTABLE_NS), Event::Start(ref e))
| (Some(VOTABLE_NS), Event::Empty(ref e)) => match e.name() {
b"FIELD" => {
let (name, data_type) = parse_field(e.attributes())?;
field_names.insert(name, field_names.len());
Expand All @@ -109,6 +110,7 @@ impl<R: Read> VotableReader<R> {
b"STREAM" => break,
_ => (),
},

(_, Event::Eof) => return Err(ErrorKind::UnexpectedEof.into()),
_ => (),
}
Expand Down
4 changes: 2 additions & 2 deletions src/xmatch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ use super::{
mod hip;
mod tyc;

pub use hip::HipStar;
pub use tyc::TycStar;
pub use hip::{hip_csv_crossmatch, HipStar};
pub use tyc::{tyc_csv_crossmatch, TycStar};

pub trait Crossmatchable<C> {
fn score(&self, gaia_star: &C) -> f64;
Expand Down
28 changes: 27 additions & 1 deletion src/xmatch/hip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/

use std::io::Read;
use std::io::{Read, Write};

use lazy_static::lazy_static;

Expand Down Expand Up @@ -172,3 +172,29 @@ impl Crossmatchable<GaiaStar> for HipStar {
pm_diff + (mag_diff / 0.1).sqr() + (dist / MAS_TO_DEG).sqr()
}
}

pub fn hip_csv_crossmatch(
mut reader: VotableReader<impl Read>,
writer: &mut impl Write,
) -> Result<(), AppError> {
let mut records = Vec::with_capacity(117955);
let hip_ordinal = reader.ordinal(b"hip")?;
let source_id_ordinal = reader.ordinal(b"source_id")?;
writeln!(writer, "hip,source_id")?;
while let Some(record) = reader.read()? {
let hip = record
.read_i32(hip_ordinal)?
.ok_or(AppError::missing_id("hip"))?;
let source_id = record
.read_i64(source_id_ordinal)?
.ok_or(AppError::missing_id("source_id"))?;
records.push((hip, source_id));
}

records.sort_unstable_by(|(a, _), (b, _)| a.cmp(b));
for (hip, source_id) in records {
writeln!(writer, "{},{}", hip, source_id)?;
}

Ok(())
}
32 changes: 31 additions & 1 deletion src/xmatch/tyc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/

use std::io::Read;
use std::io::{Read, Write};

use lazy_static::lazy_static;

Expand Down Expand Up @@ -257,3 +257,33 @@ impl Crossmatchable<GaiaStar> for TycStar {
pm_diff + (idx_diff as f64 / 0.06).sqr() + (dist / MAS_TO_DEG).sqr()
}
}

pub fn tyc_csv_crossmatch(
mut reader: VotableReader<impl Read>,
writer: &mut impl Write,
) -> Result<(), AppError> {
let mut results = Vec::with_capacity(2561887);
let tyc_ordinal = reader.ordinal(b"id_tycho")?;
let source_id_ordinal = reader.ordinal(b"source_id")?;
writeln!(writer, "tyc,source_id")?;
while let Some(record) = reader.read()? {
let tyc = record
.read_i64(tyc_ordinal)?
.ok_or(AppError::missing_id("id_tycho"))?;
let source_id = record
.read_i64(source_id_ordinal)?
.ok_or(AppError::missing_id("source_id"))?;
results.push((tyc, source_id));
}

results.sort_unstable_by(|(a, _), (b, _)| a.cmp(b));

for (tyc, source_id) in results {
let tyc1 = tyc / 1000000;
let tyc2 = (tyc / 10) % 100000;
let tyc3 = tyc % 10;
writeln!(writer, "\"{}-{}-{}\",{}", tyc1, tyc2, tyc3, source_id)?;
}

Ok(())
}

0 comments on commit f8e3adb

Please sign in to comment.