Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
360e573
Fix command
ppinchuk Feb 12, 2026
1487cc3
Bump elm version
ppinchuk Feb 25, 2026
874561b
Minor prompt update
ppinchuk Mar 9, 2026
6fe3c86
Merge remote-tracking branch 'origin/main' into pp/ghp_start
ppinchuk Mar 10, 2026
0b0ea61
Update lockfile
ppinchuk Mar 10, 2026
7ceef99
Fix linter
ppinchuk Mar 10, 2026
9adf343
Merge remote-tracking branch 'origin/main' into pp/ghp_start
ppinchuk Mar 11, 2026
45ccb83
Merge remote-tracking branch 'origin/main' into pp/ghp_start
ppinchuk Mar 11, 2026
8f86ab7
Documentation updates
ppinchuk Mar 11, 2026
4ae511b
First pass of GHP schema
ppinchuk Mar 11, 2026
c8a0a8c
Add basic plugin config
ppinchuk Mar 12, 2026
0cee1ea
Wire up GHP plugin
ppinchuk Mar 12, 2026
1e1139e
Have function return created class
ppinchuk Mar 12, 2026
c9baf2e
Clarification for noise
ppinchuk Mar 12, 2026
d38d6c3
Clarification for setbacks
ppinchuk Mar 12, 2026
3a2f80a
Clarification
ppinchuk Mar 12, 2026
fdab685
Use general guidance instead
ppinchuk Mar 12, 2026
3a995c9
Add clarification to definitions
ppinchuk Mar 12, 2026
405c4b3
Single row instruction
ppinchuk Mar 12, 2026
7d7cc19
Add clarification
ppinchuk Mar 12, 2026
60527cd
Allow nulls
ppinchuk Mar 12, 2026
980b7c6
Add instruction
ppinchuk Mar 12, 2026
96d68a6
update instructions
ppinchuk Mar 12, 2026
ab80874
Update instructions around null
ppinchuk Mar 12, 2026
a70c030
Tighten schema
ppinchuk Mar 12, 2026
02f01d8
Updates to schema
ppinchuk Mar 12, 2026
d650176
Add debug statements
ppinchuk Mar 12, 2026
4b22349
Update prompt
ppinchuk Mar 12, 2026
135e49f
Add logging
ppinchuk Mar 12, 2026
d655335
More logging
ppinchuk Mar 12, 2026
3f2fb7e
Update descriptions
ppinchuk Mar 13, 2026
4e24cb1
Update instructions
ppinchuk Mar 13, 2026
71eaf70
Add clarification
ppinchuk Mar 13, 2026
f939b81
Add info
ppinchuk Mar 13, 2026
8e9e1e8
Add more info to logger
ppinchuk Mar 13, 2026
51284c1
Add task ids
ppinchuk Mar 13, 2026
b18d805
Trimmed
ppinchuk Mar 13, 2026
5cb0c75
Update schema
ppinchuk Mar 13, 2026
ab4ac12
Update prompt
ppinchuk Mar 13, 2026
a9e29ca
Generalize implementation of `_get_model_config` and use it
ppinchuk Mar 13, 2026
2729dcd
Update logging statement
ppinchuk Mar 13, 2026
7f7a8eb
Change logging level
ppinchuk Mar 13, 2026
6b44d19
Fix import
ppinchuk Mar 13, 2026
faa0baa
Align playwright versions
ppinchuk Mar 13, 2026
dc4d87a
Fix bug in llm config retrieval
ppinchuk Mar 13, 2026
84422f1
Provide additional context even if user submits prompt
ppinchuk Mar 15, 2026
ebff812
Fix pandas link
ppinchuk Mar 15, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions compass/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
# Temporarily import to register plugins
# Can drop once plugins register themselves
from .extraction import (
COMPASSWindExtractor,
COMPASSSolarExtractor,
COMPASSGeoHeatPumpExtractor,
COMPASSSmallWindExtractor,
COMPASSSolarExtractor,
COMPASSWindExtractor,
TexasWaterRightsExtractor,
)

Expand Down
12 changes: 6 additions & 6 deletions compass/common/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -737,9 +737,9 @@ def _add_value_and_units_clarification_nodes(G): # noqa: N803
"1) Extract only the explicit numerical value provided for "
"the restriction. Do not infer values from related "
"restrictions.\n"
"2) If multiple values are given, select the most restrictive "
"one (i.e., the smallest allowable limit, the lowest maximum, "
"etc.).\n"
"2) If multiple **applicable** values are given, select the most "
"restrictive one (i.e., the smallest allowable limit, the lowest "
"maximum, etc.).\n"
"3) Please focus only on {restriction} that would apply for "
"{system_size_reminder}\n"
"4) Pay close attention to clarifying details in parentheses, "
Expand Down Expand Up @@ -775,9 +775,9 @@ def _add_value_and_units_clarification_nodes(G): # noqa: N803
"2) The unit is expressed using standard, conventional unit "
"names (e.g., 'feet', 'meters', 'acres', 'dBA', etc.). "
"{unit_clarification}\n"
"3) If multiple values are mentioned, return only the units "
"for the most restrictive value that directly pertains to the "
"restriction.\n"
"3) If multiple **applicable** values are mentioned, return only "
"the units for the most restrictive value that directly pertains "
"to the restriction.\n"
"\nExample Inputs and Outputs:\n"
"Text: 'For all WES there is a limitation of overall height "
"of 200 feet (including blades).'\n"
Expand Down
5 changes: 3 additions & 2 deletions compass/extraction/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@

# Temporarily import to register plugins
# Can drop once plugins register themselves
from .wind import COMPASSWindExtractor
from .solar import COMPASSSolarExtractor
from .ghp import COMPASSGeoHeatPumpExtractor
from .small_wind import COMPASSSmallWindExtractor
from .solar import COMPASSSolarExtractor
from .water import TexasWaterRightsExtractor
from .wind import COMPASSWindExtractor
11 changes: 11 additions & 0 deletions compass/extraction/ghp/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""COMPASS Geothermal ground source heat pump plugin"""

import importlib.resources

from compass.plugin import create_schema_based_one_shot_extraction_plugin


COMPASSGeoHeatPumpExtractor = create_schema_based_one_shot_extraction_plugin(
importlib.resources.files("compass.extraction.ghp") / "plugin_config.yaml",
tech="ghp",
)
344 changes: 344 additions & 0 deletions compass/extraction/ghp/geothermal_heat_pump_schema.json5

Large diffs are not rendered by default.

103 changes: 103 additions & 0 deletions compass/extraction/ghp/plugin_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
schema: ./geothermal_heat_pump_schema.json5

data_type_short_desc: geothermal ground source heat pump ordinance

query_templates:
- "{jurisdiction} geothermal ground source heat pump code"
- "filetype:pdf {jurisdiction} geothermal heat pump ordinance"
- "{jurisdiction} geothermal energy system ordinance"
- "Where can I find the legal text for geothermal heat pump zoning ordinances in {jurisdiction}?"
- "What is the specific legal information regarding zoning ordinances for geothermal heat pumps in {jurisdiction}?"

website_keywords:
# These still need massaging
# - e.g. include something about open loop or closed loop?
pdf: 92160
pump: 46080
geothermal: 23040
ordinance: 11520
zoning: 5760
regulation: 2880
code: 1440
heating: 1440
planning: 720
government: 180

heuristic_keywords:
good_tech_acronyms:
- "ghp"
- "gshp"
good_tech_keywords:
- "geoexchange"
- "geo-exchange"
- "wellfield"
- "direct-use"
- "closed-loop"
good_tech_phrases:
- "well field"
- "geothermal resource"
- "geothermal drilling"
- "geothermal well"
- "geothermal reservoir"
- "geothermal permit"
- "geothermal ordinance"
- "geothermal zoning"
- "closed loop"
- "open loop"
- "vertical loop"
- "horizontal loop"
- "heating and cooling"
- "space heating"
- "direct use"
- "district heating"
- "greenhouse heating"
- "residential geothermal"
- "heat pump"
- "geothermal heat pump"
- "ground source heat pump"
- "ground-source heat pump"
- "ground heat pump"
- "ground-coupled heat pump"
- "ground coupled heat pump"
- "earth-coupled heat pump"
- "earth-source heat pump"
- "closed loop ground source"
- "open loop ground source"
not_tech_words:
- "production well"
- "geothermal exploration"
- "geothermal generating"
- "geothermal generation"
- "geothermal power"
- "geothermal production"
- "geothermal project"
- "geothermal overlay zone"
- "geothermal power plant"
- "geothermal facility"
- "geothermal electric"
- "geothermal energy facility"
- "geothermal lease"
- "geothermal development"
- "steam turbine"
- "binary cycle"
- "flash steam"
- "dry steam"
- "enhanced geothermal"
- "reservoir temperature"
- "reinjection well"
- "production zone"
- "geothermal production project"
- "exploratory well"
- "injection well"

collection_prompts: True

extraction_system_prompt: |-
You are a legal scholar extracting structured data from geothermal ground source heat pump ordinances.
Be focused and literal: extract only enacted, explicit, in-scope requirements.
Be thorough and complete: review all relevant sections, including tables and
lists, so no explicitly stated feature is missed.
Before finalizing, perform a feature-coverage check against the schema enum and
ensure each explicitly supported feature is captured exactly once.
Pay extra attention to any descriptions, instructions, examples, and definitions in the schema,
and be sure to follow all schema instructions carefully.
6 changes: 6 additions & 0 deletions compass/extraction/small_wind/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,9 @@ class StructuredSmallWindOrdinanceParser(StructuredSmallWindParser):
OUT_LABEL = "ordinance_values"
"""Identifier for structured ordinance data output by this class"""

TASK_ID = LLMUsageCategory.ORDINANCE_VALUE_EXTRACTION
"""Identifier for this parser's specific LLM task category"""

async def parse(self, text):
"""Parse text and extract structure ordinance data
Expand Down Expand Up @@ -569,6 +572,9 @@ class StructuredSmallWindPermittedUseDistrictsParser(
OUT_LABEL = "permitted_district_values"
"""Identifier for structured ordinance data output by this class"""

TASK_ID = LLMUsageCategory.PERMITTED_USE_VALUE_EXTRACTION
"""Identifier for this parser's specific LLM task category"""

_SMALL_WES_CLARIFICATION = (
"Small wind energy systems (AWES) may also be referred to as "
"non-commercial wind energy systems, on-site wind energy systems, "
Expand Down
6 changes: 6 additions & 0 deletions compass/extraction/solar/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,9 @@ class StructuredSolarOrdinanceParser(StructuredSolarParser):
OUT_LABEL = "ordinance_values"
"""Identifier for structured ordinance data output by this class"""

TASK_ID = LLMUsageCategory.ORDINANCE_VALUE_EXTRACTION
"""Identifier for this parser's specific LLM task category"""

async def parse(self, text):
"""Parse text and extract structure ordinance data
Expand Down Expand Up @@ -505,6 +508,9 @@ class StructuredSolarPermittedUseDistrictsParser(StructuredSolarParser):
OUT_LABEL = "permitted_district_values"
"""Identifier for structured ordinance data output by this class"""

TASK_ID = LLMUsageCategory.PERMITTED_USE_VALUE_EXTRACTION
"""Identifier for this parser's specific LLM task category"""

_LARGE_SEF_CLARIFICATION = (
"Large solar energy systems (SES) may also be referred to as solar "
"panels, solar energy conversion systems (SECS), solar energy "
Expand Down
6 changes: 6 additions & 0 deletions compass/extraction/wind/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,9 @@ class StructuredWindOrdinanceParser(StructuredWindParser):
OUT_LABEL = "ordinance_values"
"""Identifier for structured ordinance data output by this class"""

TASK_ID = LLMUsageCategory.ORDINANCE_VALUE_EXTRACTION
"""Identifier for this parser's specific LLM task category"""

async def parse(self, text):
"""Parse text and extract structure ordinance data

Expand Down Expand Up @@ -508,6 +511,9 @@ class StructuredWindPermittedUseDistrictsParser(StructuredWindParser):
OUT_LABEL = "permitted_district_values"
"""Identifier for structured ordinance data output by this class"""

TASK_ID = LLMUsageCategory.PERMITTED_USE_VALUE_EXTRACTION
"""Identifier for this parser's specific LLM task category"""

_LARGE_WES_CLARIFICATION = (
"Large wind energy systems (WES) may also be referred to as wind "
"turbines, wind energy conversion systems (WECS), wind energy "
Expand Down
7 changes: 7 additions & 0 deletions compass/plugin/one_shot/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,12 @@ def create_schema_based_one_shot_extraction_plugin(config, tech): # noqa: C901
Technology identifier to use for the plugin (e.g., "wind",
"solar"). Must be unique from the identifiers of any existing
plugins.

Returns
-------
callable
A `SchemaBasedExtractionPlugin` subclass configured according to
the input configuration.
"""
if not isinstance(config, dict):
config = load_config(config)
Expand Down Expand Up @@ -401,6 +407,7 @@ def _validate_website_keywords(self):
"""

register_plugin(SchemaBasedExtractionPlugin)
return SchemaBasedExtractionPlugin


def _collectors_from_config(config):
Expand Down
50 changes: 42 additions & 8 deletions compass/plugin/one_shot/components.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""COMPASS extraction schema-based plugin component implementations"""

import json
import asyncio
import logging
from datetime import datetime
from abc import ABC, abstractmethod

import pandas as pd
Expand Down Expand Up @@ -91,7 +93,7 @@

"""
_DATA_PARSER_MAIN_PROMPT = """\
Extract all {desc}features from the following text:
Extract all applicable {desc}features explicitly supported by following text:

{text}

Expand All @@ -101,6 +103,12 @@
You are a legal scholar extracting structured data from {desc}documents. \
Follow all instructions in the schema descriptions carefully.\
"""
_DATA_PARSER_ADDITIONAL_CONTEXT = """\
# ADDITIONAL CONTEXT #
- Today's date is {todays_date}. If you are extracting a moratorium or \
temporary restriction that includes an explicit end date that has already \
passed as of today, treat it as expired and omit that prohibition feature.\
"""


class SchemaBasedTextCollector(SchemaOutputLLMCaller, BaseTextCollector, ABC):
Expand Down Expand Up @@ -176,11 +184,17 @@ async def check_chunk(self, chunk_parser, ind):

async def _check_chunk_with_prompt(self, key, text_chunk):
"""Call LLM on a chunk of text to check for ordinance"""
main_prompt = _TEXT_COLLECTION_MAIN_PROMPT.format(
schema=self.SCHEMA, text=text_chunk
)
logger.debug("Checking text chunk with LLM: %s", text_chunk)
logger.debug_to_file(
"\t- System Message:\n%s", _TEXT_COLLECTION_SYSTEM_PROMPT
)
logger.debug_to_file("\t- Main prompt:\n%s", main_prompt)
content = await self.call(
sys_msg=_TEXT_COLLECTION_SYSTEM_PROMPT,
content=_TEXT_COLLECTION_MAIN_PROMPT.format(
schema=self.SCHEMA, text=text_chunk
),
content=main_prompt,
response_format={
"type": "json_schema",
"json_schema": {
Expand All @@ -191,7 +205,7 @@ async def _check_chunk_with_prompt(self, key, text_chunk):
},
usage_sub_label=LLMUsageCategory.DOCUMENT_CONTENT_VALIDATION,
)
logger.debug("LLM response: %s", content)
logger.debug("LLM response:\n%s", json.dumps(content, indent=4))
return content.get(key, False)

def _store_chunk(self, parser, chunk_ind):
Expand Down Expand Up @@ -239,8 +253,10 @@ async def _process(self, text_chunks):
"""Perform extraction processing"""

logger.info(
"Extracting summary text from %d text chunks asynchronously...",
"Extracting summary text from %d text chunks asynchronously "
"using LLM: %r...",
len(text_chunks),
self.llm_service.model_name,
)
outer_task_name = asyncio.current_task().get_name()
summaries = [
Expand Down Expand Up @@ -330,9 +346,24 @@ async def parse(self, text):
else ""
)

todays_date = datetime.now().strftime("%B %d, %Y")
sys_prompt = (
f"{self.SYSTEM_PROMPT}\n\n{_DATA_PARSER_ADDITIONAL_CONTEXT}"
)
sys_prompt = sys_prompt.format(
desc=desc, schema=self.SCHEMA, todays_date=todays_date
)

main_prompt = _DATA_PARSER_MAIN_PROMPT.format(desc=desc, text=text)
logger.debug(
"Extracting ordinances with LLM: %r", self.llm_service.model_name
)
logger.debug_to_file("\t- System Message:\n%s", sys_prompt)
logger.debug_to_file("\t- Main prompt:\n%s", main_prompt)

extraction = await self.call(
sys_msg=self.SYSTEM_PROMPT.format(desc=desc),
content=_DATA_PARSER_MAIN_PROMPT.format(desc=desc, text=text),
sys_msg=sys_prompt,
content=main_prompt,
response_format={
"type": "json_schema",
"json_schema": {
Expand All @@ -343,6 +374,9 @@ async def parse(self, text):
},
usage_sub_label=LLMUsageCategory.ORDINANCE_VALUE_EXTRACTION,
)
logger.debug_to_file(
"LLM response:\n%s", json.dumps(extraction, indent=4)
)
data = extraction["outputs"]
if not data:
logger.debug(
Expand Down
Loading
Loading