Skip to content

Commit

Permalink
added env variables for SQL & Teradata
Browse files Browse the repository at this point in the history
  • Loading branch information
gbecerra1982 committed Jun 9, 2024
1 parent 0d7871c commit 5491551
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 52 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ csx
.vs
edge
Publish
.vscode/

# Loadtest
Results/
Expand Down
6 changes: 6 additions & 0 deletions .vscode/extensions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"recommendations": [
"ms-azuretools.vscode-azurefunctions",
"ms-python.python"
]
}
15 changes: 15 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "Attach to Python Functions",
"type": "debugpy",
"request": "attach",
"connect": {
"host": "localhost",
"port": 9091
},
"preLaunchTask": "func: host start"
}
]
}
8 changes: 8 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"azureFunctions.deploySubpath": ".",
"azureFunctions.scmDoBuildDuringDeployment": true,
"azureFunctions.pythonVenv": ".venv",
"azureFunctions.projectLanguage": "Python",
"azureFunctions.projectRuntime": "~4",
"debug.internalConsoleOptions": "neverOpen"
}
27 changes: 27 additions & 0 deletions .vscode/tasks.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"version": "2.0.0",
"tasks": [
{
"type": "func",
"label": "func: host start",
"command": "host start",
"problemMatcher": "$func-python-watch",
"isBackground": true,
"dependsOn": "pip install (functions)"
},
{
"label": "pip install (functions)",
"type": "shell",
"osx": {
"command": "${config:azureFunctions.pythonVenv}/bin/python -m pip install -r requirements.txt"
},
"windows": {
"command": "${config:azureFunctions.pythonVenv}\\Scripts\\python -m pip install -r requirements.txt"
},
"linux": {
"command": "${config:azureFunctions.pythonVenv}/bin/python -m pip install -r requirements.txt"
},
"problemMatcher": []
}
]
}
110 changes: 59 additions & 51 deletions orc/plugins/Retrieval/native_function.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from shared.util import get_secret, get_aoai_config,extract_text_from_html,get_possitive_int_or_default
from shared.util import get_secret, get_aoai_config, extract_text_from_html, get_possitive_int_or_default
# from semantic_kernel.skill_definition import sk_function
from openai import AzureOpenAI
from semantic_kernel.functions import kernel_function
Expand Down Expand Up @@ -33,34 +33,40 @@
# Azure search Integration Settings
AZURE_OPENAI_EMBEDDING_MODEL = os.environ.get("AZURE_OPENAI_EMBEDDING_MODEL")

TERM_SEARCH_APPROACH='term'
VECTOR_SEARCH_APPROACH='vector'
HYBRID_SEARCH_APPROACH='hybrid'
AZURE_SEARCH_USE_SEMANTIC=os.environ.get("AZURE_SEARCH_USE_SEMANTIC") or "false"
AZURE_SEARCH_APPROACH=os.environ.get("AZURE_SEARCH_APPROACH") or HYBRID_SEARCH_APPROACH
TERM_SEARCH_APPROACH = 'term'
VECTOR_SEARCH_APPROACH = 'vector'
HYBRID_SEARCH_APPROACH = 'hybrid'
AZURE_SEARCH_USE_SEMANTIC = os.environ.get("AZURE_SEARCH_USE_SEMANTIC") or "false"
AZURE_SEARCH_APPROACH = os.environ.get("AZURE_SEARCH_APPROACH") or HYBRID_SEARCH_APPROACH

AZURE_SEARCH_SERVICE = os.environ.get("AZURE_SEARCH_SERVICE")
AZURE_SEARCH_INDEX = os.environ.get("AZURE_SEARCH_INDEX")
AZURE_SEARCH_API_VERSION = os.environ.get("AZURE_SEARCH_API_VERSION", "2023-11-01")
if AZURE_SEARCH_API_VERSION < '2023-10-01-Preview': # query is using vectorQueries that requires at least 2023-10-01-Preview'.
AZURE_SEARCH_API_VERSION = '2023-11-01'
if AZURE_SEARCH_API_VERSION < '2023-10-01-Preview': # query is using vectorQueries that requires at least 2023-10-01-Preview'.
AZURE_SEARCH_API_VERSION = '2023-11-01'

AZURE_SEARCH_TOP_K = os.environ.get("AZURE_SEARCH_TOP_K") or "3"

AZURE_SEARCH_OYD_USE_SEMANTIC_SEARCH = os.environ.get("AZURE_SEARCH_OYD_USE_SEMANTIC_SEARCH") or "false"
AZURE_SEARCH_OYD_USE_SEMANTIC_SEARCH = True if AZURE_SEARCH_OYD_USE_SEMANTIC_SEARCH == "true" else False
AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG = os.environ.get("AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG") or "my-semantic-config"
AZURE_SEARCH_ENABLE_IN_DOMAIN = os.environ.get("AZURE_SEARCH_ENABLE_IN_DOMAIN") or "true"
AZURE_SEARCH_ENABLE_IN_DOMAIN = True if AZURE_SEARCH_ENABLE_IN_DOMAIN == "true" else False
AZURE_SEARCH_ENABLE_IN_DOMAIN = True if AZURE_SEARCH_ENABLE_IN_DOMAIN == "true" else False
AZURE_SEARCH_CONTENT_COLUMNS = os.environ.get("AZURE_SEARCH_CONTENT_COLUMNS") or "content"
AZURE_SEARCH_FILENAME_COLUMN = os.environ.get("AZURE_SEARCH_FILENAME_COLUMN") or "filepath"
AZURE_SEARCH_TITLE_COLUMN = os.environ.get("AZURE_SEARCH_TITLE_COLUMN") or "title"
AZURE_SEARCH_URL_COLUMN = os.environ.get("AZURE_SEARCH_URL_COLUMN") or "url"
#Bing search Integration Settings
# Bing search Integration Settings
BING_SEARCH_TOP_K = os.environ.get("BING_SEARCH_TOP_K") or "3"
BING_CUSTOM_SEARCH_URL="https://api.bing.microsoft.com/v7.0/custom/search?"
BING_CUSTOM_SEARCH_URL = "https://api.bing.microsoft.com/v7.0/custom/search?"
BING_SEARCH_MAX_TOKENS = os.environ.get("BING_SEARCH_MAX_TOKENS") or "1000"
#DB Integration Settings
# SQL Integration Settings
SQL_TOP_K = os.environ.get("SQL_TOP_K") or "3"
SQL_MAX_TOKENS = os.environ.get("SQL_MAX_TOKENS") or "1000"
# Teradata Integration Settings
TERADATA_TOP_K = os.environ.get("TERADATA_TOP_K") or "3"
TERADATA_MAX_TOKENS = os.environ.get("TERADATA_MAX_TOKENS") or "1000"
# DB Integration Settings
AZURE_OPENAI_CHATGPT_MODEL = os.environ.get("AZURE_OPENAI_CHATGPT_MODEL")
AZURE_OPENAI_CHATGPT_DEPLOYMENT = os.environ.get("AZURE_OPENAI_CHATGPT_DEPLOYMENT")
AZURE_OPENAI_RESOURCE = os.environ.get("AZURE_OPENAI_RESOURCE")
Expand All @@ -77,19 +83,17 @@
@retry(wait=wait_random_exponential(min=2, max=60), stop=stop_after_attempt(6), reraise=True)
# Function to generate embeddings for title and content fields, also used for query embeddings
def generate_embeddings(text):

embeddings_config = get_aoai_config(AZURE_OPENAI_EMBEDDING_MODEL)

client = AzureOpenAI(
api_version = embeddings_config['api_version'],
azure_endpoint = embeddings_config['endpoint'],
azure_ad_token = embeddings_config['api_key'],
api_version=embeddings_config['api_version'],
azure_endpoint=embeddings_config['endpoint'],
azure_ad_token=embeddings_config['api_key'],
)

embeddings = client.embeddings.create(input = [text], model= embeddings_config['deployment']).data[0].embedding

return embeddings
embeddings = client.embeddings.create(input=[text], model=embeddings_config['deployment']).data[0].embedding

return embeddings

class Retrieval:
@kernel_function(
Expand All @@ -106,16 +110,16 @@ def VectorIndexRetrieval(
start_time = time.time()
logging.info(f"[sk_retrieval] generating question embeddings. search query: {search_query}")
embeddings_query = generate_embeddings(search_query)
response_time = round(time.time() - start_time,2)
response_time = round(time.time() - start_time, 2)
logging.info(f"[sk_retrieval] finished generating question embeddings. {response_time} seconds")
azureSearchKey = get_secret('azureSearchKey')
azureSearchKey = get_secret('azureSearchKey')

logging.info(f"[sk_retrieval] querying azure ai search. search query: {search_query}")
# prepare body
body = {
"select": "title, content, url, filepath, chunk_id",
"top": AZURE_SEARCH_TOP_K
}
}
if AZURE_SEARCH_APPROACH == TERM_SEARCH_APPROACH:
body["search"] = search_query
elif AZURE_SEARCH_APPROACH == VECTOR_SEARCH_APPROACH:
Expand Down Expand Up @@ -143,7 +147,7 @@ def VectorIndexRetrieval(
'api-key': azureSearchKey
}
search_endpoint = f"https://{AZURE_SEARCH_SERVICE}.search.windows.net/indexes/{AZURE_SEARCH_INDEX}/docs/search?api-version={AZURE_SEARCH_API_VERSION}"

start_time = time.time()
response = requests.post(search_endpoint, headers=headers, json=body)
status_code = response.status_code
Expand All @@ -154,23 +158,22 @@ def VectorIndexRetrieval(
logging.error(f"[sk_retrieval] error {status_code} when searching documents. {error_message}")
else:
if response.json()['value']:
for doc in response.json()['value']:
search_results.append(doc['filepath'] + ": "+ doc['content'].strip() + "\n")

response_time = round(time.time() - start_time,2)
# logging.info(f"[sk_retrieval] search query body: {body}")
for doc in response.json()['value']:
search_results.append(doc['filepath'] + ": " + doc['content'].strip() + "\n")

response_time = round(time.time() - start_time, 2)
logging.info(f"[sk_retrieval] finished querying azure ai search. {response_time} seconds")
except Exception as e:
error_message = str(e)
logging.error(f"[sk_retrieval] error when getting the answer {error_message}")

sources = ' '.join(search_results)
return sources

@kernel_function(
description="Search bing for sources to ground and give context to answer a user question. Return sources.",
name="BingRetrieval",
)
)
def BingRetrieval(
self,
input: Annotated[str, "The user question"]
Expand All @@ -179,34 +182,34 @@ def BingRetrieval(
bing_custom_config_id = get_secret('bingCustomConfigId')
client = CustomSearchClient(endpoint=BING_CUSTOM_SEARCH_URL, credentials=CognitiveServicesCredentials(bing_custom_search_subscription_key))
start_time = time.time()
web_data = client.custom_instance.search(query=input, custom_config=bing_custom_config_id,count=BING_SEARCH_TOP_K)
web_data = client.custom_instance.search(query=input, custom_config=bing_custom_config_id, count=BING_SEARCH_TOP_K)
logging.info(f"[bing retrieval] bing search. {input}. {time.time() - start_time} seconds.")
bing_sources = ""
if web_data.web_pages and hasattr(web_data.web_pages, 'value'):
for web in web_data.web_pages.value:
try:
start_time = time.time()
html=extract_text_from_html(web.url)
bing_sources+=html[:get_possitive_int_or_default(BING_SEARCH_MAX_TOKENS,1000)]
html = extract_text_from_html(web.url)
bing_sources += html[:get_possitive_int_or_default(BING_SEARCH_MAX_TOKENS, 1000)]
logging.info(f"[bing retrieval] finished scraping web. {web.url}. {time.time() - start_time} seconds.")
except Exception as e:
logging.error(f"[bing retrieval] could not scrape web. {web.url}. {e}")
bing_sources+=web.snippet
bing_sources += web.snippet
return bing_sources

@kernel_function(
description="Search a SQL or Teradata DB for sources to ground and give context to answer a user question. Return sources.",
name="DBRetrieval",
)
)
def DBRetrieval(self,
input: Annotated[str, "The user question"],
db_type: Annotated[str, "The type of database to connect to (sql or teradata)"],
db_server: Annotated[str, "The server to connect to"],
db_database: Annotated[str, "The database to connect to"],
db_table_info: Annotated[str, "The tables to search for information"],
db_username: Annotated[str, "The username to connect to the database"],
db_top_k: Annotated[str, "The number of results to return"]
)-> Annotated[str, "the output is a string with the search results"]:
input: Annotated[str, "The user question"],
db_type: Annotated[str, "The type of database to connect to (sql or teradata)"],
db_server: Annotated[str, "The server to connect to"],
db_database: Annotated[str, "The database to connect to"],
db_table_info: Annotated[str, "The tables to search for information"],
db_username: Annotated[str, "The username to connect to the database"],
db_top_k: Annotated[str, "The number of results to return"]
) -> Annotated[str, "the output is a string with the search results"]:
logging.info('Python HTTP trigger function processed a request.')

try:
Expand All @@ -216,10 +219,14 @@ def DBRetrieval(self,
# Connect to Key Vault and get database password
if db_type == "sql":
db_password = get_secret("sqlpassword")
db_top_k = get_possitive_int_or_default(db_top_k, SQL_TOP_K)
max_tokens = get_possitive_int_or_default(SQL_MAX_TOKENS, 1000)
elif db_type == "teradata":
db_password = get_secret("teradatapassword")
db_top_k = get_possitive_int_or_default(db_top_k, TERADATA_TOP_K)
max_tokens = get_possitive_int_or_default(TERADATA_MAX_TOKENS, 1000)
else:
logging.error(f"[DBRetrieval]Invalid db_type specified")
logging.error(f"[DBRetrieval] Invalid db_type specified")
return ""
azureOpenAIKey = get_secret("azureOpenAIKey")

Expand All @@ -237,10 +244,11 @@ def DBRetrieval(self,
conn_str = f'mssql+pyodbc:///?odbc_connect={params}'
elif db_type == "teradata":
driver = 'Teradata'
params = urllib.parse.quote_plus(f"DRIVER={driver};SERVER={db_server};DATABASE={db_database};UID={db_username};PWD={db_password}")
params = urllib.parse.quote_plus(f"DRIVER={driver};DBCNAME={db_server};DATABASE={db_database};UID={db_username};PWD={db_password}")
conn_str = f'teradata:///?odbc_connect={params}'
else:
logging.error("[DBRetrieval] Invalid db_type specified")
return ""

engine = create_engine(conn_str)
logging.info(f"[{db_type} Retrieval] Connection to database is successful")
Expand Down Expand Up @@ -287,12 +295,12 @@ def DBRetrieval(self,
)

query_engine = SQLTableRetrieverQueryEngine(
sql_database, obj_index.as_retriever(similarity_top_k=get_possitive_int_or_default(db_top_k, 3))
sql_database, obj_index.as_retriever(similarity_top_k=db_top_k)
)

query = input
query = input[:max_tokens]
response = query_engine.query(query)
result=response.response
result = response.response
logging.info(f"[{db_type} Retrieval] SQLQuery: {response.metadata.get('sql_query')}")
engine.dispose()
return result
Expand All @@ -301,4 +309,4 @@ def DBRetrieval(self,
return ""
except Exception as e:
logging.error(f"[{db_type} Retrieval] Unexpected error: {e}")
return ""
return ""

0 comments on commit 5491551

Please sign in to comment.