Skip to content

Commit 905e26a

Browse files
committed
backup
1 parent f832c42 commit 905e26a

File tree

2 files changed

+58
-36
lines changed

2 files changed

+58
-36
lines changed
Lines changed: 35 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,39 @@
11
import pandas as pd
2-
def upsert(df, *primary_keys: str, record: dict):
3-
index_keys = df.index.names if isinstance(df.index, pd.MultiIndex) else [df.index.name]
4-
condition = pd.Series(True, index=df.index)
5-
if df.columns.empty and df.index.empty:
6-
return pd.DataFrame([record])
72

8-
for key in primary_keys:
9-
if key in df.columns:
10-
condition &= (df[key] == record[key])
11-
elif key in index_keys:
12-
condition &= (df.index.get_level_values(key) == record[key])
13-
else:
14-
raise KeyError(f"'{key}' not found in either columns or index")
3+
def upsert(df: pd.DataFrame, *primary_keys: str, record: dict) -> pd.DataFrame:
4+
# 判断是否已有索引结构
5+
index_names = df.index.names if isinstance(df.index, pd.MultiIndex) else [df.index.name]
6+
has_index = all(k in index_names for k in primary_keys if k is not None)
157

16-
match_indices = df[condition].index
8+
# 如果非空,尝试匹配并更新
9+
if not df.empty:
10+
condition = pd.Series(True, index=df.index)
11+
for key in primary_keys:
12+
if key in df.columns:
13+
condition &= (df[key] == record[key])
14+
elif has_index:
15+
condition &= (df.index.get_level_values(key) == record[key])
16+
else:
17+
raise KeyError(f"'{key}' not found in columns or index")
1718

18-
if not match_indices.empty:
19-
for col, value in record.items():
20-
if col in df.columns:
21-
df.loc[match_indices, col] = value
22-
return df
23-
return pd.concat([df, pd.DataFrame([record])])
19+
if condition.any():
20+
for col, val in record.items():
21+
if col in df.columns:
22+
df.loc[condition, col] = val
23+
return df
24+
25+
# 插入新行
26+
index_vals = tuple(record[k] for k in primary_keys)
27+
data = {k: v for k, v in record.items() if not has_index or k not in primary_keys}
28+
29+
new_row = pd.DataFrame([data], columns=df.columns if not df.columns.empty else None)
30+
31+
if has_index:
32+
new_row.index = (
33+
pd.Index([index_vals[0]], name=primary_keys[0])
34+
if len(primary_keys) == 1
35+
else pd.MultiIndex.from_tuples([index_vals], names=primary_keys)
36+
)
37+
return pd.concat([df, new_row])
38+
else:
39+
return pd.concat([df, new_row], ignore_index=True)

frame/tests/pandas_test.py

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55

66
class AirlineTestCase(unittest.TestCase):
77
def test_import(self):
8-
df_airports = pandas.read_csv("fixtures/airports.csv")
9-
df_airlines = pandas.read_csv("fixtures/airlines.csv")
8+
pandas.read_csv("fixtures/airports.csv")
9+
pandas.read_csv("fixtures/airlines.csv")
1010

1111
def test_duckDB(self):
1212
conn = duckdb.connect()
@@ -15,31 +15,37 @@ def test_duckDB(self):
1515
conn.register("airports", df_airports)
1616
conn.close()
1717

18+
1819
from davidkhala.data.frame.pandas import upsert
19-
class SyntaxTestCase(unittest.TestCase):
2020

21-
def test_upsert(self):
2221

23-
df = pandas.DataFrame({
24-
'id': [1, 2],
25-
'name': ['Alice', 'Bob'],
26-
'score': [85, 90]
27-
})
28-
prim_key ='id'
29-
df = df.set_index(prim_key)
30-
new_record = {'name': 'Charlie', 'score': 95, prim_key:2}
31-
upsert(df, prim_key, record = new_record)
22+
class SyntaxTestCase(unittest.TestCase):
23+
single_index_df = pandas.DataFrame({
24+
'id': [1, 2],
25+
'name': ['Alice', 'Bob'],
26+
'score': [85, 90]
27+
}).set_index('id')
3228

29+
def test_upsert(self):
30+
df = self.single_index_df
31+
prim_key = df.index.name
32+
new_record = {'name': 'Charlie', 'score': 95, prim_key: 2}
33+
df = upsert(df, prim_key, record=new_record)
3334
self.assertEqual(95, df.at[2, 'score'])
35+
self.assertEqual(2, len(df.columns))
36+
3437
def test_upsert_empty(self):
3538
df = pandas.DataFrame()
3639
prim_key = 'id'
3740
new_record = {'name': 'Charlie', 'score': 95, prim_key: 2}
38-
upsert(df, prim_key, record=new_record)
39-
41+
df = upsert(df, prim_key, record=new_record)
42+
self.assertEqual(3, len(df.columns))
43+
self.assertEqual(1, len(df))
4044

4145
df = pandas.DataFrame(columns=[prim_key]).set_index(prim_key)
42-
upsert(df, prim_key, record=new_record)
46+
df = upsert(df, prim_key, record=new_record)
47+
self.assertEqual(2, len(df.columns))
48+
self.assertEqual(1, len(df))
4349

4450
def test_upsert2(self):
4551
df = pandas.DataFrame([
@@ -51,6 +57,7 @@ def test_upsert2(self):
5157
df = upsert(df, 'School', 'Country', record=record)
5258
self.assertEqual(new_students, df.loc[('Oxford', 'UK')].Students)
5359
print(df)
60+
5461
def test_upsert3(self):
5562
df = pandas.DataFrame([
5663
{'School': 'Oxford', 'Country': 'UK', 'Students': 1000},
@@ -62,6 +69,5 @@ def test_upsert3(self):
6269
self.assertEqual(new_students, df[(df['School'] == 'Oxford') & (df['Country'] == 'UK')].iloc[0].Students)
6370

6471

65-
6672
if __name__ == '__main__':
6773
unittest.main()

0 commit comments

Comments
 (0)