sakowicz · sakowicz · Mar 23, 2025 · Feb 26, 2025 · Feb 28, 2025 · Mar 1, 2025
diff --git a/.env.example b/.env.example
@@ -2,8 +2,15 @@ ACTUAL_SERVER_URL=http://actual_server:5006
 ACTUAL_PASSWORD=
 ACTUAL_BUDGET_ID=
 CLASSIFICATION_SCHEDULE_CRON="0 */4 * * *"
-CLASSIFY_ON_STARTUP=true
-SYNC_ACCOUNTS_BEFORE_CLASSIFY=true
+
+# Feature flags - can be specified as an array 
+FEATURES='["freeWebSearch", "suggestNewCategories", "rerunMissedTransactions", "classifyOnStartup", "syncAccountsBeforeClassify"]'
+
+# Tools and API keys
+# ENABLED_TOOLS=webSearch
+VALUESERP_API_KEY=
+
+# LLM configuration
 LLM_PROVIDER=openai
 OPENAI_API_KEY=
 OPENAI_MODEL=gpt-4o-mini

diff --git a/.eslintrc.json b/.eslintrc.json
@@ -31,7 +31,7 @@
       }
     ],
     "no-unused-vars": "off",
-    "@typescript-eslint/no-unused-vars": ["error"]
+    "@typescript-eslint/no-unused-vars": ["error", { "argsIgnorePattern": "^_" }]
   },
   "parserOptions": {
     "ecmaVersion": 2020,

diff --git a/.gitignore b/.gitignore
@@ -3,3 +3,4 @@ tmp/budgets/
 dist/
 
 .env
+*.log
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -16,6 +16,25 @@
             "sourceMaps": true,
             "console": "integratedTerminal",
             "cwd": "${workspaceFolder}"
+        },
+        {
+            "type": "node",
+            "request": "launch",
+            "name": "Debug Tests",
+            "runtimeExecutable": "npm",
+            "runtimeArgs": [
+                "run",
+                "test",
+                "--",
+                "--runInBand",
+                "--watchAll=false"
+            ],
+            "skipFiles": [
+                "<node_internals>/**"
+            ],
+            "sourceMaps": true,
+            "console": "integratedTerminal",
+            "cwd": "${workspaceFolder}"
         }
     ]
 } 
diff --git a/README.md b/README.md
@@ -31,6 +31,22 @@ The app sends requests to the LLM to classify transactions based on their descri
 
 #### ✅ Every guessed transaction is marked as guessed in notes, so you can review the classification.
 
+#### 🌱 Suggest and create new categories for transactions that don't fit existing ones
+
+When enabled, the LLM can suggest entirely new categories for transactions it cannot classify, and optionally create them automatically.
+
+#### 🌐 Web search for unfamiliar merchants
+
+Using the ValueSerp API, the system can search the web for information about unfamiliar merchants to help the LLM make better categorization decisions.
+
+#### 🔎 Free web search alternative
+
+A self-hosted alternative to ValueSerp that uses free public search API (DuckDuckGo) to search for merchant information without requiring an API key.
+
+#### 🔄 Re-run missed transactions
+
+Re-process transactions previously marked as unclassified.
+
 ## 🚀 Usage
 
 Sample `docker-compose.yml` file:
@@ -53,9 +69,9 @@ services:
       ACTUAL_PASSWORD: your_actual_password
       ACTUAL_BUDGET_ID: your_actual_budget_id # This is the ID from Settings → Show advanced settings → Sync ID
       CLASSIFICATION_SCHEDULE_CRON: 0 */4 * * * # How often to run classification.
-      CLASSIFY_ON_STARTUP: true # Whether to classify transactions on startup (don't wait for cron schedule)
-      SYNC_ACCOUNTS_BEFORE_CLASSIFY: false # Whether to sync accounts before classification
       LLM_PROVIDER: openai # Can be "openai", "anthropic", "google-generative-ai", "ollama" or "groq"
+      FEATURES: '["classifyOnStartup", "syncAccountsBeforeClassify", "freeWebSearch", "suggestNewCategories"]'
+#      VALUESERP_API_KEY: your_valueserp_api_key # API key for ValueSerp, required if webSearch tool is enabled
 #      OPENAI_API_KEY:  # optional. required if you want to use the OpenAI API
 #      OPENAI_MODEL:  # optional. required if you want to use a specific model, default is "gpt-4o-mini"
 #      OPENAI_BASE_URL:  # optional. required if you don't want to use the OpenAI API but OpenAI compatible API, ex: "http://ollama:11424/v1
@@ -95,6 +111,26 @@ services:
 #        ANSWER BY A CATEGORY ID - DO NOT CREATE ENTIRE SENTENCE - DO NOT WRITE CATEGORY NAME, JUST AN ID. Do not guess, if you don't know the answer, return "uncategorized".
 ```
 
+## Feature Configuration
+
+You can configure features in using the FEATURES array (recommended):
+
+The `FEATURES` environment variable accepts a JSON array of feature names to enable:
+
+```
+FEATURES='["freeWebSearch", "suggestNewCategories", "classifyOnStartup", "syncAccountsBeforeClassify"]'
+```
+
+Available features:
+- `webSearch` - Enable web search for merchant information
+- `freeWebSearch` - Enable free web search for merchant information (self-hosted alternative to ValueSerp)
+- `suggestNewCategories` - Allow suggesting new categories for transactions
+- `classifyOnStartup` - Run classification when the application starts
+- `syncAccountsBeforeClassify` - Sync accounts before running classification
+- `dryRun` - Run in dry run mode (enabled by default)
+- `dryRunNewCategories` - Only log suggested categories without creating them (enabled by default)
+- `rerunMissedTransactions` - Re-process transactions previously marked as unclassified
+
 ## Customizing the Prompt
 
 To create a custom prompt, modify the `PROMPT_TEMPLATE` environment variable to include or exclude variables as needed.
@@ -120,3 +156,58 @@ loops.
 7. `date`: The date of the transaction. This is taken from `transaction.date`.
 8. `cleared`: A boolean indicating if the transaction is cleared. This is taken from `transaction.cleared`.
 9. `reconciled`: A boolean indicating if the transaction is reconciled. This is taken from `transaction.reconciled`.
+
+## New Category Suggestions
+
+When `suggestNewCategories` feature is enabled, the system will:
+
+1. First try to classify transactions using existing categories
+2. For transactions that can't be classified, request a new category suggestion from the LLM
+3. Check if similar categories already exist
+4. If in dry run mode (`dryRunNewCategories` is enabled), just log the suggestions
+5. If not in dry run mode, create the new categories and assign transactions to them
+
+This feature is particularly useful when you have transactions that don't fit your current category structure and you want the LLM to help expand your categories intelligently.
+
+## Tools Integration
+
+The system supports various tools that can be enabled to enhance the LLM's capabilities:
+
+1. Enable tools by including them in the `FEATURES` array or by setting `ENABLED_TOOLS`
+2. Provide any required API keys for the tools you want to use
+
+Currently supported tools:
+
+### webSearch
+
+The webSearch tool uses the ValueSerp API to search for information about merchants that the LLM might not be familiar with, providing additional context for categorization decisions.
+
+To use this tool:
+1. Include `webSearch` in your `FEATURES` array or `ENABLED_TOOLS` list
+2. Provide your ValueSerp API key as `VALUESERP_API_KEY`
+
+This is especially helpful for:
+- New or uncommon merchants
+- Merchants with ambiguous names
+- Specialized services that might be difficult to categorize without additional information
+
+The search results are included in the prompts sent to the LLM, helping it make more accurate category assignments or suggestions.
+
+## Dry Run Mode
+
+The `dryRun` feature is enabled by default. In this mode:
+- No transactions will be modified
+- No categories will be created
+- All proposed changes will be logged to console
+- System will show what would happen with real execution
+
+To perform actual changes:
+1. Remove `dryRun` from your FEATURES array
+2. Ensure `suggestNewCategories` is enabled if you want new category creation
+3. Run the classification process
+
+Dry run messages will show:
+- Which transactions would be categorized
+- Which rules would be applied
+- What new categories would be created
+- How many transactions would be affected by each change
diff --git a/app.ts b/app.ts
@@ -1,5 +1,5 @@
 import cron from 'node-cron';
-import { cronSchedule, classifyOnStartup } from './src/config';
+import { cronSchedule, isFeatureEnabled } from './src/config';
 import actualAi from './src/container';
 
 if (!cron.validate(cronSchedule)) {
@@ -12,7 +12,7 @@ cron.schedule(cronSchedule, async () => {
 });
 
 console.log('Application started');
-if (classifyOnStartup) {
+if (isFeatureEnabled('classifyOnStartup')) {
   (async () => {
     await actualAi.classify();
   })();