Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement FromPyObject and IntoPy Traits for AnyValue #37

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
186 changes: 184 additions & 2 deletions pyo3-polars/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//! This crate offers a [`PySeries`] and a [`PyDataFrame`] which are simple wrapper around `Series` and `DataFrame`. The
//! This crate offers [`PySeries`], [`PyDataFrame`] and [`PyAnyValue`] which are simple wrapper around `Series`, `DataFrame` and `AnyValue`. The
//! advantage of these wrappers is that they can be converted to and from python as they implement `FromPyObject` and `IntoPy`.
//!
//! # Example
Expand Down Expand Up @@ -51,7 +51,7 @@ mod ffi;
use crate::error::PyPolarsErr;
use crate::ffi::to_py::to_py_array;
use polars::prelude::*;
use pyo3::{FromPyObject, IntoPy, PyAny, PyObject, PyResult, Python, ToPyObject};
use pyo3::{FromPyObject, IntoPy, PyAny, PyObject, PyResult, Python, ToPyObject, types::{PyDelta, PyNone, PyDate, PyDateTime, PyTime}, exceptions::PyTypeError};

#[cfg(feature = "lazy")]
use {polars_lazy::frame::LazyFrame, polars_plan::logical_plan::LogicalPlan};
Expand All @@ -66,6 +66,11 @@ pub struct PySeries(pub Series);
/// A wrapper around a [`DataFrame`] that can be converted to and from python with `pyo3`.
pub struct PyDataFrame(pub DataFrame);

#[repr(transparent)]
#[derive(Debug, Clone)]
/// A wrapper around [`AnyValue`] that can be converted to and from python with `pyo3`.
pub struct PyAnyValue<'a>(pub AnyValue<'a>);

#[cfg(feature = "lazy")]
#[repr(transparent)]
#[derive(Clone)]
Expand All @@ -90,6 +95,12 @@ impl From<PySeries> for Series {
}
}

impl<'a> From<PyAnyValue<'a>> for AnyValue<'a> {
fn from(value: PyAnyValue<'a>) -> Self {
value.0
}
}

#[cfg(feature = "lazy")]
impl From<PyLazyFrame> for LazyFrame {
fn from(value: PyLazyFrame) -> Self {
Expand All @@ -109,6 +120,12 @@ impl AsRef<DataFrame> for PyDataFrame {
}
}

impl<'a> AsRef<AnyValue<'a>> for PyAnyValue<'a> {
fn as_ref(&self) -> &AnyValue<'a> {
&self.0
}
}

#[cfg(feature = "lazy")]
impl AsRef<LazyFrame> for PyLazyFrame {
fn as_ref(&self) -> &LazyFrame {
Expand Down Expand Up @@ -145,6 +162,73 @@ impl<'a> FromPyObject<'a> for PyDataFrame {
}
}

impl<'a> FromPyObject<'a> for PyAnyValue<'a> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fn extract(ob: &'a PyAny) -> PyResult<Self> {
let object_type = ob
.getattr("__class__")?
.getattr("__name__")?
.extract::<&str>()?;

match object_type {
"float" => Ok(PyAnyValue(AnyValue::Float64(ob.extract::<f64>()?))),
"int" => Ok(PyAnyValue(AnyValue::Int64(ob.extract::<i64>()?))),
"str" => Ok(PyAnyValue(AnyValue::Utf8(ob.extract::<&str>()?))),
"bool" => Ok(PyAnyValue(AnyValue::Boolean(ob.extract::<bool>()?))),
"datetime" => {
let timestamp = (ob.call_method0("timestamp")?.extract::<f64>()? * 1_000.0) as i64;
Ok(PyAnyValue(AnyValue::Datetime(
timestamp,
TimeUnit::Milliseconds,
&None,
)))
}
"date" => {
let days: Result<i32, PyErr> = Python::with_gil(|py| {
let datetime = py.import("datetime")?;

let epoch = datetime.call_method1("date", (1970, 1, 1))?;

let days = ob
.call_method1("__sub__", (epoch,))?
.getattr("days")?
.extract::<i32>()?;

Ok(days)
});
Ok(PyAnyValue(AnyValue::Date(days?)))
}
"timedelta" => {
let seconds =
(ob.call_method0("total_seconds")?.extract::<f64>()? * 1_000.0) as i64;
Ok(PyAnyValue(AnyValue::Duration(
seconds,
TimeUnit::Milliseconds,
)))
}
"time" => {
let hours = ob.getattr("hour")?.extract::<i64>()?;
let minutes = ob.getattr("minute")?.extract::<i64>()?;
let seconds = ob.getattr("second")?.extract::<i64>()?;
let microseconds = ob.getattr("microsecond")?.extract::<i64>()?;

Ok(PyAnyValue(AnyValue::Time(
(hours * 3_600_000_000_000)
+ (minutes * 60_000_000_000)
+ (seconds * 1_000_000_000)
+ (microseconds * 1_000),
)))
}
"Series" => Ok(PyAnyValue(AnyValue::List(ob.extract::<PySeries>()?.0))),
"bytes" => Ok(PyAnyValue(AnyValue::Binary(ob.extract::<&[u8]>()?))),
"NoneType" => Ok(PyAnyValue(AnyValue::Null)),
_ => Err(PyTypeError::new_err(format!(
"'{}' object cannot be interpreted",
object_type
))),
}
}
}

#[cfg(feature = "lazy")]
impl<'a> FromPyObject<'a> for PyLazyFrame {
fn extract(ob: &'a PyAny) -> PyResult<Self> {
Expand Down Expand Up @@ -188,6 +272,104 @@ impl IntoPy<PyObject> for PyDataFrame {
}
}

macro_rules! convert_duration (
($py:expr, $difference:expr, $second_factor:literal) => {
{
let days = $difference / ($second_factor * 86_400);
let remaining_after_days = $difference % ($second_factor * 86_400);
let seconds = remaining_after_days / $second_factor;
let remaining_after_seconds = remaining_after_days % $second_factor;
let microseconds = remaining_after_seconds * (1_000_000 / $second_factor);

PyDelta::new(
$py,
i32::try_from(days).unwrap(),
i32::try_from(seconds).unwrap(),
i32::try_from(microseconds).unwrap(),
false,
)
.unwrap()
.into_py($py)
}
}
);

impl IntoPy<PyObject> for PyAnyValue<'_> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

idem, I think this should copy from py-polars.

fn into_py(self, py: Python<'_>) -> PyObject {
match self.0 {
AnyValue::Binary(val) => val.into_py(py),
AnyValue::Null => PyNone::get(py).into_py(py),
AnyValue::Boolean(val) => val.into_py(py),
AnyValue::Utf8(val) => val.into_py(py),
AnyValue::UInt8(val) => val.into_py(py),
AnyValue::UInt16(val) => val.into_py(py),
AnyValue::UInt32(val) => val.into_py(py),
AnyValue::UInt64(val) => val.into_py(py),
AnyValue::Int8(val) => val.into_py(py),
AnyValue::Int16(val) => val.into_py(py),
AnyValue::Int32(val) => val.into_py(py),
AnyValue::Int64(val) => val.into_py(py),
AnyValue::Float32(val) => val.into_py(py),
AnyValue::Float64(val) => val.into_py(py),
AnyValue::Date(days) => PyDate::from_timestamp(py, (days * 86_400).into())
.unwrap()
.into_py(py),
// The timezone is ignored - This may lead to wrong conversions
AnyValue::Datetime(time, unit, _timezone) => match unit {
polars::prelude::TimeUnit::Milliseconds => {
PyDateTime::from_timestamp(py, time as f64 / 1_000.0, None)
.unwrap()
.into_py(py)
}
polars::prelude::TimeUnit::Microseconds => {
PyDateTime::from_timestamp(py, time as f64 / 1_000_000.0, None)
.unwrap()
.into_py(py)
}
polars::prelude::TimeUnit::Nanoseconds => {
PyDateTime::from_timestamp(py, time as f64 / 1_000_000_000.0, None)
.unwrap()
.into_py(py)
}
},
AnyValue::Duration(difference, unit) => match unit {
polars::prelude::TimeUnit::Milliseconds => {
convert_duration!(py, difference, 1_000)
}
polars::prelude::TimeUnit::Microseconds => {
convert_duration!(py, difference, 1_000_000)
}
polars::prelude::TimeUnit::Nanoseconds => {
convert_duration!(py, difference, 1_000_000_000)
}
},
AnyValue::Time(nanoseconds) => {
let hours = nanoseconds / 3_600_000_000_000;
let remaining_after_hours = nanoseconds % 3_600_000_000_000;
let minutes = remaining_after_hours / 60_000_000_000;
let remaining_after_minutes = remaining_after_hours % 60_000_000_000;
let seconds = remaining_after_minutes / 1_000_000_000;
let remaining_after_seconds = remaining_after_minutes % 1_000_000_000;
let microseconds = remaining_after_seconds / 1_000;

PyTime::new(
py,
u8::try_from(hours).unwrap(),
u8::try_from(minutes).unwrap(),
u8::try_from(seconds).unwrap(),
u32::try_from(microseconds).unwrap(),
None,
)
.unwrap()
.into_py(py)
}
AnyValue::List(val) => PySeries(val).into_py(py),
AnyValue::Utf8Owned(val) => val.into_py(py),
AnyValue::BinaryOwned(val) => val.into_py(py),
}
}
}

#[cfg(feature = "lazy")]
impl IntoPy<PyObject> for PyLazyFrame {
fn into_py(self, py: Python<'_>) -> PyObject {
Expand Down