-
Notifications
You must be signed in to change notification settings - Fork 50
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement FromPyObject and IntoPy Traits for AnyValue #37
Open
JabobKrauskopf
wants to merge
11
commits into
pola-rs:main
Choose a base branch
from
JabobKrauskopf:main
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 8 commits
Commits
Show all changes
11 commits
Select commit
Hold shift + click to select a range
4500d8d
Implement FromPyObject and IntoPy for AnyValue
JabobKrauskopf b77b8b0
Merge branch 'pola-rs:main' into main
JabobKrauskopf 038fe47
#37 Remove println
JabobKrauskopf ea36a64
#37 Fix conversion error and improve number readability
JabobKrauskopf 8dafa58
#37 Improve error handling
JabobKrauskopf 256bc04
#37 Spelling and grammar
JabobKrauskopf 8adcb61
#37 Improve datetime conversion accuracy
JabobKrauskopf a5afc25
#37 Undo part of last commit which decreased conversion accuracy
JabobKrauskopf b61fbde
Merge branch 'pola-rs:main' into main
JabobKrauskopf 1e145e8
Merge remote-tracking branch 'upstream/main'
JabobKrauskopf 6b946ef
refactor: IntoPy and FromPyObject now work the same as in py-polars
JabobKrauskopf File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
//! This crate offers a [`PySeries`] and a [`PyDataFrame`] which are simple wrapper around `Series` and `DataFrame`. The | ||
//! This crate offers [`PySeries`], [`PyDataFrame`] and [`PyAnyValue`] which are simple wrapper around `Series`, `DataFrame` and `AnyValue`. The | ||
//! advantage of these wrappers is that they can be converted to and from python as they implement `FromPyObject` and `IntoPy`. | ||
//! | ||
//! # Example | ||
|
@@ -51,7 +51,7 @@ mod ffi; | |
use crate::error::PyPolarsErr; | ||
use crate::ffi::to_py::to_py_array; | ||
use polars::prelude::*; | ||
use pyo3::{FromPyObject, IntoPy, PyAny, PyObject, PyResult, Python, ToPyObject}; | ||
use pyo3::{FromPyObject, IntoPy, PyAny, PyObject, PyResult, Python, ToPyObject, types::{PyDelta, PyNone, PyDate, PyDateTime, PyTime}, exceptions::PyTypeError}; | ||
|
||
#[cfg(feature = "lazy")] | ||
use {polars_lazy::frame::LazyFrame, polars_plan::logical_plan::LogicalPlan}; | ||
|
@@ -66,6 +66,11 @@ pub struct PySeries(pub Series); | |
/// A wrapper around a [`DataFrame`] that can be converted to and from python with `pyo3`. | ||
pub struct PyDataFrame(pub DataFrame); | ||
|
||
#[repr(transparent)] | ||
#[derive(Debug, Clone)] | ||
/// A wrapper around [`AnyValue`] that can be converted to and from python with `pyo3`. | ||
pub struct PyAnyValue<'a>(pub AnyValue<'a>); | ||
|
||
#[cfg(feature = "lazy")] | ||
#[repr(transparent)] | ||
#[derive(Clone)] | ||
|
@@ -90,6 +95,12 @@ impl From<PySeries> for Series { | |
} | ||
} | ||
|
||
impl<'a> From<PyAnyValue<'a>> for AnyValue<'a> { | ||
fn from(value: PyAnyValue<'a>) -> Self { | ||
value.0 | ||
} | ||
} | ||
|
||
#[cfg(feature = "lazy")] | ||
impl From<PyLazyFrame> for LazyFrame { | ||
fn from(value: PyLazyFrame) -> Self { | ||
|
@@ -109,6 +120,12 @@ impl AsRef<DataFrame> for PyDataFrame { | |
} | ||
} | ||
|
||
impl<'a> AsRef<AnyValue<'a>> for PyAnyValue<'a> { | ||
fn as_ref(&self) -> &AnyValue<'a> { | ||
&self.0 | ||
} | ||
} | ||
|
||
#[cfg(feature = "lazy")] | ||
impl AsRef<LazyFrame> for PyLazyFrame { | ||
fn as_ref(&self) -> &LazyFrame { | ||
|
@@ -145,6 +162,73 @@ impl<'a> FromPyObject<'a> for PyDataFrame { | |
} | ||
} | ||
|
||
impl<'a> FromPyObject<'a> for PyAnyValue<'a> { | ||
fn extract(ob: &'a PyAny) -> PyResult<Self> { | ||
let object_type = ob | ||
.getattr("__class__")? | ||
.getattr("__name__")? | ||
.extract::<&str>()?; | ||
|
||
match object_type { | ||
"float" => Ok(PyAnyValue(AnyValue::Float64(ob.extract::<f64>()?))), | ||
"int" => Ok(PyAnyValue(AnyValue::Int64(ob.extract::<i64>()?))), | ||
"str" => Ok(PyAnyValue(AnyValue::Utf8(ob.extract::<&str>()?))), | ||
"bool" => Ok(PyAnyValue(AnyValue::Boolean(ob.extract::<bool>()?))), | ||
"datetime" => { | ||
let timestamp = (ob.call_method0("timestamp")?.extract::<f64>()? * 1_000.0) as i64; | ||
Ok(PyAnyValue(AnyValue::Datetime( | ||
timestamp, | ||
TimeUnit::Milliseconds, | ||
&None, | ||
))) | ||
} | ||
"date" => { | ||
let days: Result<i32, PyErr> = Python::with_gil(|py| { | ||
let datetime = py.import("datetime")?; | ||
|
||
let epoch = datetime.call_method1("date", (1970, 1, 1))?; | ||
|
||
let days = ob | ||
.call_method1("__sub__", (epoch,))? | ||
.getattr("days")? | ||
.extract::<i32>()?; | ||
|
||
Ok(days) | ||
}); | ||
Ok(PyAnyValue(AnyValue::Date(days?))) | ||
} | ||
"timedelta" => { | ||
let seconds = | ||
(ob.call_method0("total_seconds")?.extract::<f64>()? * 1_000.0) as i64; | ||
Ok(PyAnyValue(AnyValue::Duration( | ||
seconds, | ||
TimeUnit::Milliseconds, | ||
))) | ||
} | ||
"time" => { | ||
let hours = ob.getattr("hour")?.extract::<i64>()?; | ||
let minutes = ob.getattr("minute")?.extract::<i64>()?; | ||
let seconds = ob.getattr("second")?.extract::<i64>()?; | ||
let microseconds = ob.getattr("microsecond")?.extract::<i64>()?; | ||
|
||
Ok(PyAnyValue(AnyValue::Time( | ||
(hours * 3_600_000_000_000) | ||
+ (minutes * 60_000_000_000) | ||
+ (seconds * 1_000_000_000) | ||
+ (microseconds * 1_000), | ||
))) | ||
} | ||
"Series" => Ok(PyAnyValue(AnyValue::List(ob.extract::<PySeries>()?.0))), | ||
"bytes" => Ok(PyAnyValue(AnyValue::Binary(ob.extract::<&[u8]>()?))), | ||
"NoneType" => Ok(PyAnyValue(AnyValue::Null)), | ||
_ => Err(PyTypeError::new_err(format!( | ||
"'{}' object cannot be interpreted", | ||
object_type | ||
))), | ||
} | ||
} | ||
} | ||
|
||
#[cfg(feature = "lazy")] | ||
impl<'a> FromPyObject<'a> for PyLazyFrame { | ||
fn extract(ob: &'a PyAny) -> PyResult<Self> { | ||
|
@@ -188,6 +272,104 @@ impl IntoPy<PyObject> for PyDataFrame { | |
} | ||
} | ||
|
||
macro_rules! convert_duration ( | ||
($py:expr, $difference:expr, $second_factor:literal) => { | ||
{ | ||
let days = $difference / ($second_factor * 86_400); | ||
let remaining_after_days = $difference % ($second_factor * 86_400); | ||
let seconds = remaining_after_days / $second_factor; | ||
let remaining_after_seconds = remaining_after_days % $second_factor; | ||
let microseconds = remaining_after_seconds * (1_000_000 / $second_factor); | ||
|
||
PyDelta::new( | ||
$py, | ||
i32::try_from(days).unwrap(), | ||
i32::try_from(seconds).unwrap(), | ||
i32::try_from(microseconds).unwrap(), | ||
false, | ||
) | ||
.unwrap() | ||
.into_py($py) | ||
} | ||
} | ||
); | ||
|
||
impl IntoPy<PyObject> for PyAnyValue<'_> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. idem, I think this should copy from py-polars. |
||
fn into_py(self, py: Python<'_>) -> PyObject { | ||
match self.0 { | ||
AnyValue::Binary(val) => val.into_py(py), | ||
AnyValue::Null => PyNone::get(py).into_py(py), | ||
AnyValue::Boolean(val) => val.into_py(py), | ||
AnyValue::Utf8(val) => val.into_py(py), | ||
AnyValue::UInt8(val) => val.into_py(py), | ||
AnyValue::UInt16(val) => val.into_py(py), | ||
AnyValue::UInt32(val) => val.into_py(py), | ||
AnyValue::UInt64(val) => val.into_py(py), | ||
AnyValue::Int8(val) => val.into_py(py), | ||
AnyValue::Int16(val) => val.into_py(py), | ||
AnyValue::Int32(val) => val.into_py(py), | ||
AnyValue::Int64(val) => val.into_py(py), | ||
AnyValue::Float32(val) => val.into_py(py), | ||
AnyValue::Float64(val) => val.into_py(py), | ||
AnyValue::Date(days) => PyDate::from_timestamp(py, (days * 86_400).into()) | ||
.unwrap() | ||
.into_py(py), | ||
// The timezone is ignored - This may lead to wrong conversions | ||
AnyValue::Datetime(time, unit, _timezone) => match unit { | ||
polars::prelude::TimeUnit::Milliseconds => { | ||
PyDateTime::from_timestamp(py, time as f64 / 1_000.0, None) | ||
.unwrap() | ||
.into_py(py) | ||
} | ||
polars::prelude::TimeUnit::Microseconds => { | ||
PyDateTime::from_timestamp(py, time as f64 / 1_000_000.0, None) | ||
.unwrap() | ||
.into_py(py) | ||
} | ||
polars::prelude::TimeUnit::Nanoseconds => { | ||
PyDateTime::from_timestamp(py, time as f64 / 1_000_000_000.0, None) | ||
.unwrap() | ||
.into_py(py) | ||
} | ||
}, | ||
AnyValue::Duration(difference, unit) => match unit { | ||
polars::prelude::TimeUnit::Milliseconds => { | ||
convert_duration!(py, difference, 1_000) | ||
} | ||
polars::prelude::TimeUnit::Microseconds => { | ||
convert_duration!(py, difference, 1_000_000) | ||
} | ||
polars::prelude::TimeUnit::Nanoseconds => { | ||
convert_duration!(py, difference, 1_000_000_000) | ||
} | ||
}, | ||
AnyValue::Time(nanoseconds) => { | ||
let hours = nanoseconds / 3_600_000_000_000; | ||
let remaining_after_hours = nanoseconds % 3_600_000_000_000; | ||
let minutes = remaining_after_hours / 60_000_000_000; | ||
let remaining_after_minutes = remaining_after_hours % 60_000_000_000; | ||
let seconds = remaining_after_minutes / 1_000_000_000; | ||
let remaining_after_seconds = remaining_after_minutes % 1_000_000_000; | ||
let microseconds = remaining_after_seconds / 1_000; | ||
|
||
PyTime::new( | ||
py, | ||
u8::try_from(hours).unwrap(), | ||
u8::try_from(minutes).unwrap(), | ||
u8::try_from(seconds).unwrap(), | ||
u32::try_from(microseconds).unwrap(), | ||
None, | ||
) | ||
.unwrap() | ||
.into_py(py) | ||
} | ||
AnyValue::List(val) => PySeries(val).into_py(py), | ||
AnyValue::Utf8Owned(val) => val.into_py(py), | ||
AnyValue::BinaryOwned(val) => val.into_py(py), | ||
} | ||
} | ||
} | ||
|
||
#[cfg(feature = "lazy")] | ||
impl IntoPy<PyObject> for PyLazyFrame { | ||
fn into_py(self, py: Python<'_>) -> PyObject { | ||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this should do exactly what py-polars does. See: https://github.com/pola-rs/polars/blob/149297acec860aaba9eb6b33277cbad489cab7f8/py-polars/src/conversion.rs#L780