Skip to content

Commit 65ef33a

Browse files
committed
lots of minor improvements, /rag improved and working woo! . /brainblast as well. a few more things to take care of
1 parent 796b525 commit 65ef33a

File tree

15 files changed

+816
-394
lines changed

15 files changed

+816
-394
lines changed

npcpy/data/load.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import numpy as np
77
from typing import Optional
88

9+
import os
910

1011
def load_csv(file_path):
1112
df = pd.read_csv(file_path)
@@ -113,3 +114,82 @@ def load_pdf(file_path):
113114
"ISO": "archives",
114115
}
115116

117+
118+
def load_file_contents(file_path, chunk_size=250):
119+
"""
120+
Load and format the contents of a file based on its extension.
121+
Returns a list of chunks from the file content.
122+
"""
123+
file_ext = os.path.splitext(file_path)[1].upper().lstrip('.')
124+
chunks = []
125+
126+
try:
127+
if file_ext == 'PDF':
128+
# Load PDF content
129+
pdf_document = fitz.open(file_path)
130+
full_text = ""
131+
132+
# Extract text from each page
133+
for page in pdf_document:
134+
full_text += page.get_text() + "\n\n"
135+
136+
# Chunk the text
137+
for i in range(0, len(full_text), chunk_size):
138+
chunk = full_text[i:i+chunk_size].strip()
139+
if chunk: # Skip empty chunks
140+
chunks.append(chunk)
141+
142+
elif file_ext == 'CSV':
143+
df = pd.read_csv(file_path)
144+
# Add metadata as first chunk
145+
meta = f"CSV Columns: {', '.join(df.columns)}\nRows: {len(df)}"
146+
chunks.append(meta)
147+
148+
# Convert sample data to string and chunk it
149+
sample = df.head(20).to_string()
150+
for i in range(0, len(sample), chunk_size):
151+
chunk = sample[i:i+chunk_size].strip()
152+
if chunk:
153+
chunks.append(chunk)
154+
155+
elif file_ext in ['XLS', 'XLSX']:
156+
df = pd.read_excel(file_path)
157+
# Add metadata as first chunk
158+
meta = f"Excel Columns: {', '.join(df.columns)}\nRows: {len(df)}"
159+
chunks.append(meta)
160+
161+
# Convert sample data to string and chunk it
162+
sample = df.head(20).to_string()
163+
for i in range(0, len(sample), chunk_size):
164+
chunk = sample[i:i+chunk_size].strip()
165+
if chunk:
166+
chunks.append(chunk)
167+
168+
elif file_ext == 'TXT':
169+
with open(file_path, 'r', encoding='utf-8') as f:
170+
content = f.read()
171+
172+
# Chunk the text
173+
for i in range(0, len(content), chunk_size):
174+
chunk = content[i:i+chunk_size].strip()
175+
if chunk:
176+
chunks.append(chunk)
177+
178+
elif file_ext == 'JSON':
179+
with open(file_path, 'r', encoding='utf-8') as f:
180+
data = json.load(f)
181+
content = json.dumps(data, indent=2)
182+
183+
# Chunk the JSON
184+
for i in range(0, len(content), chunk_size):
185+
chunk = content[i:i+chunk_size].strip()
186+
if chunk:
187+
chunks.append(chunk)
188+
189+
else:
190+
chunks.append(f"Unsupported file format: {file_ext}")
191+
192+
return chunks
193+
194+
except Exception as e:
195+
return [f"Error loading file {file_path}: {str(e)}"]

npcpy/gen/embeddings.py

Lines changed: 0 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -70,79 +70,6 @@ def delete_embeddings_from_collection(collection, ids):
7070
collection.delete(ids=ids) # Only delete if ids are provided
7171

7272

73-
def search_similar_texts(
74-
query: str,
75-
chroma_client,
76-
embedding_model: str,
77-
embedding_provider: str ,
78-
docs_to_embed: Optional[List[str]] = None,
79-
top_k: int = 5,
80-
) -> List[Dict[str, any]]:
81-
"""
82-
Search for similar texts using either a Chroma database or direct embedding comparison.
83-
"""
84-
85-
print(f"\nQuery to embed: {query}")
86-
embedded_search_term = get_ollama_embeddings([query], embedding_model)[0]
87-
# print(f"Query embedding: {embedded_search_term}")
88-
89-
if docs_to_embed is None:
90-
# Fetch from the database if no documents to embed are provided
91-
collection_name = f"{embedding_provider}_{embedding_model}_embeddings"
92-
collection = chroma_client.get_collection(collection_name)
93-
results = collection.query(
94-
query_embeddings=[embedded_search_term], n_results=top_k
95-
)
96-
# Constructing and returning results
97-
return [
98-
{"id": id, "score": float(distance), "text": document}
99-
for id, distance, document in zip(
100-
results["ids"][0], results["distances"][0], results["documents"][0]
101-
)
102-
]
103-
104-
print(f"\nNumber of documents to embed: {len(docs_to_embed)}")
105-
106-
# Get embeddings for provided documents
107-
raw_embeddings = get_ollama_embeddings(docs_to_embed, embedding_model)
108-
109-
output_embeddings = []
110-
for idx, emb in enumerate(raw_embeddings):
111-
if emb: # Exclude any empty embeddings
112-
output_embeddings.append(emb)
113-
114-
# Convert to numpy arrays for calculations
115-
doc_embeddings = np.array(output_embeddings)
116-
query_embedding = np.array(embedded_search_term)
117-
118-
# Check for zero-length embeddings
119-
if len(doc_embeddings) == 0:
120-
raise ValueError("No valid document embeddings found")
121-
122-
# Normalize embeddings to avoid division by zeros
123-
doc_norms = np.linalg.norm(doc_embeddings, axis=1, keepdims=True)
124-
query_norm = np.linalg.norm(query_embedding)
125-
126-
# Ensure no zero vectors are being used in cosine similarity
127-
if query_norm == 0:
128-
raise ValueError("Query embedding is zero-length")
129-
130-
# Calculate cosine similarities
131-
cosine_similarities = np.dot(doc_embeddings, query_embedding) / (
132-
doc_norms.flatten() * query_norm
133-
)
134-
135-
# Get indices of top K documents
136-
top_indices = np.argsort(cosine_similarities)[::-1][:top_k]
137-
138-
return [
139-
{
140-
"id": str(idx),
141-
"score": float(cosine_similarities[idx]),
142-
"text": docs_to_embed[idx],
143-
}
144-
for idx in top_indices
145-
]
14673
def get_embeddings(
14774
texts: List[str],
14875
model: str ,

npcpy/gen/image_gen.py

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,7 @@
77
from PIL import Image
88

99
from litellm import image_generation
10-
from npcpy.npc_sysenv import (
11-
NPCSH_IMAGE_GEN_MODEL,
12-
NPCSH_IMAGE_GEN_PROVIDER,
13-
)
10+
1411

1512

1613
def generate_image_diffusers(
@@ -35,7 +32,7 @@ def generate_image_diffusers(
3532

3633
def openai_image_gen(
3734
prompt: str,
38-
model: str = "gpt-image-1",
35+
model: str = "dall-e-2",
3936
attachments: Union[List[Union[str, bytes, Image.Image]], None] = None,
4037
height: int = 1024,
4138
width: int = 1024,
@@ -65,8 +62,6 @@ def openai_image_gen(
6562
attachment.save(img_byte_arr, format='PNG')
6663
img_byte_arr.seek(0)
6764
processed_images.append(img_byte_arr)
68-
print(processed_images )
69-
print(height, width, n_images, model, prompt)
7065
# Use images.edit for image editing
7166
result = client.images.edit(
7267
model=model,
@@ -82,14 +77,12 @@ def openai_image_gen(
8277
prompt=prompt,
8378
n=n_images,
8479
size=f"{height}x{width}",
85-
response_format="b64_json" # Request base64 encoded image
8680
)
8781

88-
# Process the result
8982
image_base64 = result.data[0].b64_json
9083
image_bytes = base64.b64decode(image_base64)
9184
image = Image.open(io.BytesIO(image_bytes))
92-
85+
image.save('generated_image.png')
9386
return image
9487

9588

@@ -161,8 +154,8 @@ def gemini_image_gen(
161154

162155
def generate_image(
163156
prompt: str,
164-
model: str = NPCSH_IMAGE_GEN_MODEL,
165-
provider: str = NPCSH_IMAGE_GEN_PROVIDER,
157+
model: str ,
158+
provider: str ,
166159
height: int = 1024,
167160
width: int = 1024,
168161
n_images: int = 1,
@@ -192,7 +185,7 @@ def generate_image(
192185
# Set default model if none provided
193186
if model is None:
194187
if provider == "openai":
195-
model = "gpt-image-1"
188+
model = "dall-e-2"
196189
elif provider == "diffusers":
197190
model = "runwayml/stable-diffusion-v1-5"
198191
elif provider == "gemini":

npcpy/llm_funcs.py

Lines changed: 49 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from npcpy.npc_sysenv import (
1919
render_markdown,
2020
lookup_provider,
21+
request_user_input
2122
)
2223
from npcpy.gen.response import get_litellm_response
2324
from npcpy.gen.image_gen import generate_image, edit_image
@@ -27,7 +28,6 @@ def gen_image(
2728
prompt: str,
2829
model: str = None,
2930
provider: str = None,
30-
filename: str = None,
3131
npc: Any = None,
3232
height: int = 1024,
3333
width: int = 1024,
@@ -44,7 +44,6 @@ def gen_image(
4444
Returns:
4545
str: The filename of the saved image.
4646
"""
47-
print(height)
4847
if model is not None and provider is not None:
4948
pass
5049
elif model is not None and provider is None:
@@ -56,13 +55,7 @@ def gen_image(
5655
model = npc.model
5756
if npc.api_url is not None:
5857
api_url = npc.api_url
59-
if filename is None:
60-
# Generate a filename based on the prompt and the date time
61-
os.makedirs(os.path.expanduser("~/.npcsh/images/"), exist_ok=True)
62-
filename = (
63-
os.path.expanduser("~/.npcsh/images/")
64-
+ f"image_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
65-
)
58+
6659
image = generate_image(
6760
prompt=prompt,
6861
model=model,
@@ -72,26 +65,8 @@ def gen_image(
7265
attachments=input_images,
7366

7467
)
75-
if isinstance(image, PIL.Image.Image):
76-
image.save(filename)
77-
return filename
68+
return image
7869

79-
else:
80-
try:
81-
# image is at a private url (for dall-e?)
82-
response = requests.get(image.data[0].url)
83-
with open(filename, "wb") as file:
84-
file.write(response.content)
85-
from PIL import Image
86-
87-
img = Image.open(filename)
88-
img.show()
89-
# console = Console()
90-
# console.print(Image.from_path(filename))
91-
return filename
92-
93-
except AttributeError as e:
94-
print(f"Error saving image: {e}")
9570

9671

9772
def generate_video(
@@ -508,6 +483,7 @@ def handle_tool_call(
508483
api_url=api_url,
509484
api_key=api_key,
510485
npc=npc,
486+
context=context
511487
)
512488
try:
513489
# Clean the response of markdown formatting
@@ -553,6 +529,7 @@ def handle_tool_call(
553529
stream=stream,
554530
attempt=attempt + 1,
555531
n_attempts=n_attempts,
532+
context=context
556533
)
557534
return {
558535
"response": f"Missing inputs for tool '{tool_name}': {missing_inputs}",
@@ -610,9 +587,10 @@ def handle_tool_call(
610587
stream=stream,
611588
attempt=attempt + 1,
612589
n_attempts=n_attempts,
590+
context=context,
613591
)
614592
# process the tool call
615-
render_markdown(f""" ## TOOL OUTPUT FROM CALLING {tool_name} \n \n {tool_output}""" )
593+
render_markdown(f""" ## TOOL OUTPUT FROM CALLING {tool_name} \n \n output:{tool_output['output']}""" )
616594
response = get_llm_response(f"""
617595
The user had the following request: {command}.
618596
Here were the tool outputs from calling {tool_name}: {tool_output}
@@ -626,6 +604,7 @@ def handle_tool_call(
626604
api_key=api_key,
627605
npc=npc,
628606
messages=messages,
607+
context=context,
629608
stream=stream,
630609
)
631610
messages = response['messages']
@@ -634,6 +613,46 @@ def handle_tool_call(
634613
return {'messages': messages, 'response': response}
635614

636615

616+
def handle_request_input(
617+
context: str,
618+
model: str ,
619+
provider: str
620+
):
621+
"""
622+
Analyze text and decide what to request from the user
623+
"""
624+
prompt = f"""
625+
Analyze the text:
626+
{context}
627+
and determine what additional input is needed.
628+
Return a JSON object with:
629+
{{
630+
"input_needed": boolean,
631+
"request_reason": string explaining why input is needed,
632+
"request_prompt": string to show user if input needed
633+
}}
634+
635+
Do not include any additional markdown formatting or leading ```json tags. Your response
636+
must be a valid JSON object.
637+
"""
638+
639+
response = get_llm_response(
640+
prompt,
641+
model=model,
642+
provider=provider,
643+
messages=[],
644+
format="json",
645+
)
646+
647+
result = response.get("response", {})
648+
if isinstance(result, str):
649+
result = json.loads(result)
650+
651+
user_input = request_user_input(
652+
{"reason": result["request_reason"], "prompt": result["request_prompt"]},
653+
)
654+
return user_input
655+
637656

638657
def check_llm_command(
639658
command: str,
@@ -900,7 +919,7 @@ def check_llm_command(
900919
print(npc_to_pass)
901920
agent_passes = []
902921
if team is not None:
903-
print(f"team npcs: {team.npcs}")
922+
#print(f"team npcs: {team.npcs}")
904923
match = team.npcs.get(npc_to_pass)
905924
if match is not None:
906925
npc_to_pass_obj = match

0 commit comments

Comments
 (0)