Skip to content

Commit 1144ef6

Browse files
authored
Merge pull request #11 from PyCampES/interactive_category_skeleton
Interactive category skeleton
2 parents 5707091 + 86b5990 commit 1144ef6

File tree

7 files changed

+200
-24
lines changed

7 files changed

+200
-24
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,3 +167,7 @@ cython_debug/
167167

168168
samples/
169169
.ruff_cache/
170+
categories_database.json
171+
ficamp.db
172+
gcache.json
173+

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ requests = "^2.31.0"
1616
python-dotenv = "^1.0.1"
1717
numpy = "^1.26.4"
1818
scikit-learn = "^1.4.1.post1"
19+
sqlmodel = "^0.0.16"
20+
questionary = "^2.0.1"
1921

2022
[tool.poetry.group.dev.dependencies]
2123
mypy = "^1.9.0"

src/ficamp/__main__.py

Lines changed: 152 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,177 @@
11
import argparse
2+
import json
3+
import os
4+
import shutil
5+
from enum import StrEnum
26

7+
import questionary
38
from dotenv import load_dotenv
9+
from sqlmodel import Session, SQLModel, create_engine, select
410

11+
from ficamp.classifier.infer import infer_tx_category
12+
from ficamp.datastructures import Tx
513
from ficamp.parsers.abn import AbnParser
614

715

816
def cli() -> argparse.Namespace:
9-
"""Parses the first argument from the command line and prints it."""
17+
"""Creates a command line interface with subcommands for import and categorize."""
1018

11-
# Create an argument parser
19+
# Create the main parser
1220
parser = argparse.ArgumentParser(
13-
prog="ficamp", description="Print the first argument from the CLI"
21+
prog="ficamp", description="Parse and categorize your expenses."
1422
)
1523

16-
parser.add_argument("--bank", choices=["abn"], default="abn")
17-
parser.add_argument("filename", help="The spreadsheet to load")
24+
# Create subparsers for the two subcommands
25+
subparsers = parser.add_subparsers(dest="command", required=True)
26+
27+
# Subparser for the import command
28+
import_parser = subparsers.add_parser("import", help="Import a Transactions")
29+
import_parser.add_argument(
30+
"--bank", choices=["abn"], default="abn", help="Specify the bank for the import"
31+
)
32+
import_parser.add_argument("filename", help="File to load")
33+
import_parser.set_defaults(func=import_data)
34+
35+
# Subparser for the categorize command
36+
categorize_parser = subparsers.add_parser(
37+
"categorize", help="Categorize transactions"
38+
)
39+
categorize_parser.add_argument("--infer-category", action="store_true")
40+
categorize_parser.set_defaults(func=categorize)
1841

19-
# Parse the arguments
2042
args = parser.parse_args()
2143

22-
# Print the first argument
2344
return args
2445

2546

26-
def main():
27-
args = cli()
28-
args.filename
29-
args.bank
30-
47+
def import_data(args, engine):
48+
"""Run the parsers."""
49+
print(f"Importing data from {args.filename} for bank {args.bank}.")
3150
# TODO: Build enum for banks
3251
if args.bank == "abn":
3352
parser = AbnParser()
3453
parser.load(args.filename)
3554
transactions = parser.parse()
36-
print(transactions)
37-
# TODO: Add categorizer!
55+
for tx in transactions:
56+
with Session(engine) as session:
57+
# Assuming 'date' and 'amount' can uniquely identify a transaction
58+
statement = select(Tx).where(
59+
Tx.date == tx.date, Tx.amount == tx.amount, Tx.concept == tx.concept
60+
)
61+
result = session.exec(statement).first()
62+
if result is None: # No existing transaction found
63+
session.add(tx)
64+
session.commit()
65+
else:
66+
print(f"Transaction already exists in the database. {tx}")
67+
68+
69+
def get_category_dict(categories_database_path="categories_database.json"):
70+
# FIXME: move categories to SQLITE instead of json file.
71+
if not os.path.exists(categories_database_path):
72+
return {}
73+
with open(categories_database_path, "r") as file:
74+
category_dict = json.load(file)
75+
string_to_category = {
76+
string: category
77+
for category, strings in category_dict.items()
78+
for string in strings
79+
}
80+
return string_to_category
81+
82+
83+
def revert_and_save_dict(string_to_category, filename="categories_database.json"):
84+
# Reverting the dictionary
85+
category_to_strings = {}
86+
for string, category in string_to_category.items():
87+
category_to_strings.setdefault(category, []).append(string)
88+
89+
# Saving to a JSON file
90+
if os.path.exists(filename):
91+
shutil.move(filename, "/tmp/categories_db_bkp.json")
92+
with open(filename, "w") as file:
93+
json.dump(category_to_strings, file, indent=4)
94+
95+
96+
class DefaultAnswers(StrEnum):
97+
SKIP = "Skip this Tx"
98+
NEW = "Type a new category"
99+
100+
101+
def query_business_category(tx, categories_dict, infer_category=False):
102+
# first try to get from the category_dict
103+
category = categories_dict.get(tx.concept)
104+
if category:
105+
return category
106+
# ask the user if we don't know it
107+
categories_choices = list(set(categories_dict.values()))
108+
categories_choices.extend([DefaultAnswers.NEW, DefaultAnswers.SKIP])
109+
default_choice = DefaultAnswers.SKIP
110+
if infer_category:
111+
inferred_category = infer_tx_category(tx)
112+
if inferred_category:
113+
categories_choices.append(inferred_category)
114+
default_choice = inferred_category
115+
print(f"{tx.date.isoformat()} {tx.amount} {tx.concept}")
116+
answer = questionary.select(
117+
"Please select the category for this TX",
118+
choices=categories_choices,
119+
default=default_choice,
120+
show_selected=True,
121+
).ask()
122+
if answer == DefaultAnswers.NEW:
123+
answer = questionary.text("What's the category for the TX above").ask()
124+
if answer == DefaultAnswers.SKIP:
125+
return None
126+
if answer is None:
127+
# https://questionary.readthedocs.io/en/stable/pages/advanced.html#keyboard-interrupts
128+
raise KeyboardInterrupt
129+
if answer:
130+
categories_dict[tx.concept] = answer
131+
category = answer
132+
return category
133+
134+
135+
def categorize(args, engine):
136+
"""Function to categorize transactions."""
137+
categories_dict = get_category_dict()
138+
try:
139+
with Session(engine) as session:
140+
statement = select(Tx).where(Tx.category.is_(None))
141+
results = session.exec(statement).all()
142+
for tx in results:
143+
print(f"Processing {tx}")
144+
tx_category = query_business_category(
145+
tx, categories_dict, infer_category=args.infer_category
146+
)
147+
if tx_category:
148+
print(f"Saving category for {tx.concept}: {tx_category}")
149+
tx.category = tx_category
150+
# update DB
151+
session.add(tx)
152+
session.commit()
153+
revert_and_save_dict(categories_dict)
154+
else:
155+
print("Not saving any category for thi Tx")
156+
revert_and_save_dict(categories_dict)
157+
except KeyboardInterrupt:
158+
print("Closing")
159+
160+
161+
def main():
162+
# create DB
163+
engine = create_engine("sqlite:///ficamp.db")
164+
# create tables
165+
SQLModel.metadata.create_all(engine)
166+
167+
try:
168+
args = cli()
169+
if args.command:
170+
args.func(args, engine)
171+
except KeyboardInterrupt:
172+
print("\nClosing")
38173

39174

40-
load_dotenv()
41-
main()
175+
if __name__ == "__main__":
176+
load_dotenv()
177+
main()

src/ficamp/classifier/features.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def extract_payment_method(d: dict) -> str | dict[str, Any]:
5656
res = "<UNK>"
5757
for method in payment_methods:
5858
if method in d["desc"]:
59-
return method
59+
res = method
6060
return d | {"payment_method": res}
6161

6262

src/ficamp/classifier/google_apis.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def search_google_maps(business_name, location=None, api_key=GOOGLE_API_KEY):
2020
response = requests.get(base_url, params=params)
2121
response.raise_for_status()
2222
if response.json()["status"] != "OK":
23-
raise GoogleException(response.json()["error_message"])
23+
raise GoogleException(response.json())
2424
results = response.json().get("results", [])
2525
if results:
2626
# Assuming the first result is the most relevant
@@ -40,7 +40,7 @@ def get_place_details(place_id):
4040
response = requests.get(url, headers=headers)
4141
response.raise_for_status()
4242
if response.json()["status"] != "OK":
43-
raise GoogleException(response.json()["error_message"])
43+
raise GoogleException(response.json())
4444
return response.json().get("types", [])
4545

4646

@@ -55,7 +55,7 @@ def query_google_places_new(query):
5555
response = requests.post(url, headers=headers, data=json.dumps(payload))
5656
response.raise_for_status()
5757
if response.json()["status"] != "OK":
58-
raise GoogleException(response.json()["error_message"])
58+
raise GoogleException(response.json())
5959
places = response.json().get("places", [])
6060
if places:
6161
categories = places[0].get("types", [])
@@ -65,6 +65,7 @@ def query_google_places_new(query):
6565

6666

6767
def find_business_category_in_google(field, location=None):
68+
"""Queries Google maps and try to get a category from it"""
6869
keys_to_remove = ["point_of_interest", "establishment", "store", "department_store"]
6970
# first try using google map places search
7071
place_id_gmaps, categories = search_google_maps(field, location)
@@ -84,3 +85,22 @@ def find_business_category_in_google(field, location=None):
8485
categories = list(set(categories) - set(keys_to_remove))
8586
return categories[0]
8687
raise GoogleException
88+
89+
90+
def query_gmaps_category(concept):
91+
"""Pycamp internet is slow. saving data locally to go faster"""
92+
with open("gcache.json") as cache_file:
93+
cached = json.load(cache_file)
94+
cached_category = cached.get(concept)
95+
if not cached_category:
96+
try:
97+
gmaps_category = find_business_category_in_google(concept)
98+
except GoogleException:
99+
gmaps_category = "Unknown"
100+
# print(gmaps_category)
101+
with open("gcache.json", "w") as cache_file:
102+
cached[concept] = gmaps_category
103+
json.dump(cached, cache_file)
104+
else:
105+
gmaps_category = cached_category
106+
return gmaps_category

src/ficamp/classifier/infer.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from ficamp.classifier.google_apis import query_gmaps_category
2+
3+
4+
def infer_tx_category(tx):
5+
"""Will try to guess the category using different actions."""
6+
gmap_category = query_gmaps_category(tx.concept)
7+
if gmap_category != "Unknown":
8+
print(f"Google Maps category is {gmap_category}")
9+
return gmap_category

src/ficamp/datastructures.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22
from datetime import datetime
33
from decimal import Decimal
44
from enum import StrEnum
5+
from typing import Optional
6+
7+
from sqlalchemy import JSON, Column
8+
from sqlmodel import Field, SQLModel
59

610

711
class Currency(StrEnum):
@@ -23,13 +27,14 @@ class Concept:
2327

2428

2529
@dataclass
26-
class Tx:
30+
class Tx(SQLModel, table=True):
2731
"""Represents a transaction extracted from a bank"""
2832

33+
id: Optional[int] = Field(default=None, primary_key=True)
2934
date: datetime
3035
amount: Decimal
3136
currency: Currency
32-
concept: str | Concept
37+
concept: str
3338
category: None | str
34-
metadata: dict[str, str]
35-
tags: list[str]
39+
tx_metadata: dict[str, str] = Field(sa_column=Column(JSON))
40+
tags: list[str] = Field(sa_column=Column(JSON))

0 commit comments

Comments
 (0)