Skip to content

Commit 482232b

Browse files
committed
0.0.9 update upsert
1 parent 709f379 commit 482232b

File tree

3 files changed

+50
-10
lines changed

3 files changed

+50
-10
lines changed
Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,23 @@
1-
def upsert(df, primary_key:str, record:dict):
2-
df.loc[record[primary_key]] = record
3-
return df
1+
import pandas as pd
2+
def upsert(df, *primary_keys: str, record: dict):
3+
index_keys = df.index.names if isinstance(df.index, pd.MultiIndex) else [df.index.name]
4+
condition = True
5+
6+
for key in primary_keys:
7+
if key in df.columns:
8+
condition &= (df[key] == record[key])
9+
elif key in index_keys:
10+
condition &= (df.index.get_level_values(key) == record[key])
11+
else:
12+
raise KeyError(f"'{key}' not found in either columns or index")
13+
14+
match_indices = df[condition].index
15+
16+
if not match_indices.empty:
17+
for col, value in record.items():
18+
if col in df.columns:
19+
df.loc[match_indices, col] = value
20+
else:
21+
df = pd.concat([df, pd.DataFrame([record])], ignore_index=True)
22+
23+
return df

frame/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "davidkhala.data.frame"
3-
version = "0.0.8"
3+
version = "0.0.9"
44
description = ""
55
authors = [{ name = "David Liu", email = "[email protected]" }]
66
requires-python = ">=3.10"

frame/tests/pandas_test.py

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,20 +15,40 @@ def test_duckDB(self):
1515
conn.register("airports", df_airports)
1616
conn.close()
1717

18+
from davidkhala.data.frame.pandas import upsert
1819
class SyntaxTestCase(unittest.TestCase):
20+
1921
def test_upsert(self):
20-
from davidkhala.data.frame.pandas import upsert
21-
prim_key ='id'
22+
2223
df = pandas.DataFrame({
23-
prim_key: [1, 2],
24+
'id': [1, 2],
2425
'name': ['Alice', 'Bob'],
2526
'score': [85, 90]
26-
}).set_index(prim_key)
27+
})
28+
prim_key ='id'
29+
df = df.set_index(prim_key)
2730
new_record = {'name': 'Charlie', 'score': 95, prim_key:2}
28-
upsert(df, prim_key, new_record)
31+
upsert(df, prim_key, record = new_record)
2932

3033
self.assertEqual(95, df.at[2, 'score'])
31-
34+
def test_upsert2(self):
35+
df = pandas.DataFrame([
36+
{'School': 'Oxford', 'Country': 'UK', 'Students': 1000},
37+
{'School': 'Harvard', 'Country': 'US', 'Students': 1500}
38+
]).set_index(['School', 'Country'])
39+
new_students = 2000
40+
record = {'School': 'Oxford', 'Country': 'UK', 'Students': new_students}
41+
df = upsert(df, 'School', 'Country', record=record)
42+
self.assertEqual(new_students, df.loc[('Oxford', 'UK')].Students)
43+
def test_upsert3(self):
44+
df = pandas.DataFrame([
45+
{'School': 'Oxford', 'Country': 'UK', 'Students': 1000},
46+
{'School': 'Harvard', 'Country': 'US', 'Students': 1500}
47+
])
48+
new_students = 2000
49+
record = {'School': 'Oxford', 'Country': 'UK', 'Students': new_students}
50+
df = upsert(df, 'School', 'Country', record=record)
51+
self.assertEqual(new_students, df[(df['School'] == 'Oxford') & (df['Country'] == 'UK')].iloc[0].Students)
3252

3353

3454

0 commit comments

Comments
 (0)