Skip to content

Commit

Permalink
Merge branch 'main' of github.com:h2oai/sql-sidekick into main
Browse files Browse the repository at this point in the history
  • Loading branch information
pramitchoudhary committed Feb 1, 2024
2 parents 28e8240 + ac3544c commit 6286adb
Show file tree
Hide file tree
Showing 11 changed files with 212 additions and 157 deletions.
31 changes: 28 additions & 3 deletions app.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[App]
name = "ai.h2o.wave.sql-sidekick"
title = "SQL-Sidekick"
description = "QnA with tabular data using NLQ"
Name = "ai.h2o.wave.sql-sidekick"
Title = "SQL-Sidekick"
Description = "QnA with tabular data using NLQ"
LongDescription = "about.md"
InstanceLifecycle = "MANAGED"
Tags = ["DATA_SCIENCE", "MACHINE_LEARNING", "NLP", "GENERATIVE_AI"]
Expand All @@ -22,3 +22,28 @@ EnableOIDC = true
[[Env]]
Name = "H2O_WAVE_MAX_REQUEST_SIZE"
Value = "20M"

[[Env]]
Name = "HEAP_ID"
Secret = "heap-analytics"
SecretKey = "id"

[[Env]]
Name = "H2OGPT_URL"
Secret = "h2ogpt-oss-sqlsidekick"
SecretKey = "h2ogpt-url"

[[Env]]
Name = "H2OGPT_API_TOKEN"
Secret = "h2ogpt-oss-sqlsidekick"
SecretKey = "h2ogpt-key"

[[Env]]
Name = "H2OGPTE_URL"
Secret = "h2ogpte-sqlsidekick"
SecretKey = "h2ogpte-url"

[[Env]]
Name = "H2OGPTE_API_TOKEN"
Secret = "h2ogpte-sqlsidekick"
SecretKey = "h2ogpte-key"
8 changes: 4 additions & 4 deletions examples/notebooks/Guardrails_SQL_injection.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,10 @@
"# env variables\n",
"\n",
"os.environ['OPENAI_API_KEY'] = \"\"\n",
"os.environ['H2O_BASE_MODEL_URL'] = ''\n",
"os.environ['H2O_BASE_MODEL_API_KEY'] = \"\"\n",
"os.environ['RECOMMENDATION_MODEL_REMOTE_URL'] = \"https://h2ogpte.genai.h2o.ai\" # e.g. https://<>.h2ogpte.h2o.ai\n",
"os.environ['RECOMMENDATION_MODEL_API_KEY'] = \"\"\n",
"os.environ['H2OGPT_URL'] = ''\n",
"os.environ['H2OGPT_API_TOKEN'] = \"\"\n",
"os.environ['H2OGPTE_URL'] = \"https://h2ogpte.genai.h2o.ai\" # e.g. https://<>.h2ogpte.h2o.ai\n",
"os.environ['H2OGPTE_API_TOKEN'] = \"\"\n",
"\n",
"\n",
"base_path = \".\"\n",
Expand Down
8 changes: 4 additions & 4 deletions examples/notebooks/databricks_db.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -146,10 +146,10 @@
"os.environ['DATABRICKS_TOKEN'] = \"\"\n",
"os.environ['OPENAI_API_KEY'] = \"\"\n",
"\n",
"os.environ['H2O_BASE_MODEL_URL'] = 'http://38.128.233.247'\n",
"os.environ['H2O_BASE_MODEL_API_KEY'] = \"\"\n",
"os.environ['RECOMMENDATION_MODEL_REMOTE_URL'] = \"https://h2ogpte.genai.h2o.ai\" # e.g. https://<>.h2ogpte.h2o.ai\n",
"os.environ['RECOMMENDATION_MODEL_API_KEY'] = \"\""
"os.environ['H2OGPT_URL'] = 'http://38.128.233.247'\n",
"os.environ['H2OGPT_API_TOKEN'] = \"\"\n",
"os.environ['H2OGPTE_URL'] = \"https://h2ogpte.genai.h2o.ai\" # e.g. https://<>.h2ogpte.h2o.ai\n",
"os.environ['H2OGPTE_API_TOKEN'] = \"\""
]
},
{
Expand Down
8 changes: 4 additions & 4 deletions examples/notebooks/sdk_quick_tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,11 @@
"import os\n",
"\n",
"os.environ['OPENAI_API_KEY'] = \"\"\n",
"os.environ['H2O_BASE_MODEL_URL'] = 'http://38.128.233.247'\n",
"os.environ['H2O_BASE_MODEL_API_KEY'] = \"\"\n",
"os.environ['H2OGPT_URL'] = 'http://38.128.233.247'\n",
"os.environ['H2OGPT_API_TOKEN'] = \"\"\n",
"# To get access to h2ogpte endpoint, reach out to [email protected]\n",
"os.environ['RECOMMENDATION_MODEL_REMOTE_URL'] = \"https://h2ogpte.genai.h2o.ai\" # e.g. https://<>.h2ogpte.h2o.ai\n",
"os.environ['RECOMMENDATION_MODEL_API_KEY'] = \"\""
"os.environ['H2OGPTE_URL'] = \"https://h2ogpte.genai.h2o.ai\" # e.g. https://<>.h2ogpte.h2o.ai\n",
"os.environ['H2OGPTE_API_TOKEN'] = \"\""
]
},
{
Expand Down
11 changes: 7 additions & 4 deletions sidekick/configs/env.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
OPENAI_API_KEY = "" # Needed only for openAI models
MODEL_NAME = "h2ogpt-sql-sqlcoder-34b-alpha" # Others: e.g. gpt-4, gpt-4-32k, text-davinci-003
QUANT_TYPE = '4bit'
H2O_BASE_MODEL_URL = 'http://38.128.233.247'
H2O_BASE_MODEL_API_KEY = ""
RECOMMENDATION_MODEL_REMOTE_URL = ""
RECOMMENDATION_MODEL_API_KEY = ""

H2OGPT_URL = 'http://38.128.233.247'
H2OGPT_API_TOKEN = ""
H2OGPTE_URL = ""
H2OGPTE_API_TOKEN = ""

RECOMMENDATION_MODEL = "h2oai/h2ogpt-4096-llama2-70b-chat"
VULNERABILITY_SCANNER = "h2oai/h2ogpt-4096-llama2-70b-chat" # other options openai models depending on availability (e.g. 'gpt-3.5-turbo')
SELF_CORRECTION_MODEL = "h2oai/h2ogpt-4096-llama2-70b-chat" # other options openai models depending on availability (e.g. 'gpt-3.5-turbo')

Expand Down
10 changes: 0 additions & 10 deletions sidekick/configs/prompt_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,6 @@
# Add explanation and reasoning for each SQL query
"""

# DEBUGGING_PROMPT = {
# "system_prompt": "Act as a SQL expert for {dialect} database",
# "user_prompt": """
# ### Help fix syntax errors for provided incorrect SQL Query.
# # Error: {ex_traceback}
# # Query:\n {qry_txt}
# # Output: Add ``` as prefix and ``` as suffix to generated SQL
# """,
# }

DEBUGGING_PROMPT = {
"system_prompt": "Act as a SQL expert for {dialect} database",
"user_prompt": """
Expand Down
40 changes: 22 additions & 18 deletions sidekick/prompter.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,25 +32,29 @@
env_settings = toml.load(f"{app_base_path}/sidekick/configs/env.toml")
db_dialect = env_settings["DB-DIALECT"]["DB_TYPE"]
model_name = env_settings["MODEL_INFO"]["MODEL_NAME"]
h2o_remote_url = env_settings["MODEL_INFO"]["RECOMMENDATION_MODEL_REMOTE_URL"]
h2o_key = env_settings["MODEL_INFO"]["RECOMMENDATION_MODEL_API_KEY"]
h2o_remote_url = env_settings["MODEL_INFO"]["H2OGPTE_URL"]
h2o_key = env_settings["MODEL_INFO"]["H2OGPTE_API_TOKEN"]
# h2ogpt base model urls
h2ogpt_base_model_url = env_settings["MODEL_INFO"]["H2O_BASE_MODEL_URL"]
h2ogpt_base_model_key = env_settings["MODEL_INFO"]["H2O_BASE_MODEL_API_KEY"]
h2ogpt_base_model_url = env_settings["MODEL_INFO"]["H2OGPT_URL"]
h2ogpt_base_model_key = env_settings["MODEL_INFO"]["H2OGPT_API_TOKEN"]

self_correction_model = env_settings["MODEL_INFO"]["SELF_CORRECTION_MODEL"]
recommendation_model = env_settings["MODEL_INFO"]['RECOMMENDATION_MODEL']

os.environ["TOKENIZERS_PARALLELISM"] = "False"
# Env variables
if not os.getenv("H2O_BASE_MODEL_URL"):
os.environ["H2O_BASE_MODEL_URL"] = h2ogpt_base_model_url
if not os.getenv("H2O_BASE_MODEL_API_KEY"):
os.environ["H2O_BASE_MODEL_API_KEY"] = h2ogpt_base_model_key
if not os.getenv("RECOMMENDATION_MODEL_REMOTE_URL"):
os.environ["RECOMMENDATION_MODEL_REMOTE_URL"] = h2o_remote_url
if not os.getenv("RECOMMENDATION_MODEL_API_KEY"):
os.environ["RECOMMENDATION_MODEL_API_KEY"] = h2o_key
if not os.getenv("H2OGPT_URL"):
os.environ["H2OGPT_URL"] = h2ogpt_base_model_url
if not os.getenv("H2OGPT_API_TOKEN"):
os.environ["H2OGPT_API_TOKEN"] = h2ogpt_base_model_key
if not os.getenv("H2OGPTE_URL"):
os.environ["H2OGPTE_URL"] = h2o_remote_url
if not os.getenv("H2OGPTE_API_TOKEN"):
os.environ["H2OGPTE_API_TOKEN"] = h2o_key
if not os.getenv("SELF_CORRECTION_MODEL"):
os.environ["SELF_CORRECTION_MODEL"] = self_correction_model
if not os.getenv("RECOMMENDATION_MODEL"):
os.environ["RECOMMENDATION_MODEL"] = recommendation_model

def color(fore="", back="", text=None):
return f"{fore}{back}{text}{Style.RESET_ALL}"
Expand Down Expand Up @@ -159,17 +163,17 @@ def recommend_suggestions(cache_path: str, table_name: str, n_qs: int=10):
r_url = _key = None
# First check for keys in env variables
logger.debug(f"Checking environment settings ...")
env_url = os.environ["RECOMMENDATION_MODEL_REMOTE_URL"]
env_key = os.environ["RECOMMENDATION_MODEL_API_KEY"]
env_url = os.environ["H2OGPTE_URL"]
env_key = os.environ["H2OGPTE_API_TOKEN"]
if env_url and env_key:
r_url = env_url
_key = env_key
elif Path(f"{app_base_path}/sidekick/configs/env.toml").exists():
# Reload .env info
logger.debug(f"Checking configuration file ...")
env_settings = toml.load(f"{app_base_path}/sidekick/configs/env.toml")
r_url = env_settings["MODEL_INFO"]["RECOMMENDATION_MODEL_REMOTE_URL"]
_key = env_settings["MODEL_INFO"]["RECOMMENDATION_MODEL_API_KEY"]
r_url = env_settings["MODEL_INFO"]["H2OGPTE_URL"]
_key = env_settings["MODEL_INFO"]["H2OGPTE_API_TOKEN"]
else:
raise Exception("Model url or key is missing.")

Expand Down Expand Up @@ -666,8 +670,8 @@ def ask(
logger.debug(f"Attempt: {attempt+1}")
_tmp = err.split("\n")
_err = _tmp[0].split("Error occurred:")[1] if len(_tmp) > 0 else None
env_url = os.environ["RECOMMENDATION_MODEL_REMOTE_URL"]
env_key = os.environ["RECOMMENDATION_MODEL_API_KEY"]
env_url = os.environ["H2OGPTE_URL"]
env_key = os.environ["H2OGPTE_API_TOKEN"]
corr_sql = sql_g.self_correction(input_query=_val, error_msg=_err, remote_url=env_url, client_key=env_key)
q_res, err = DBConfig.execute_query(query=corr_sql)
if not 'Error occurred'.lower() in str(err).lower():
Expand Down
8 changes: 4 additions & 4 deletions sidekick/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ def generate_sql(
else:
if self.h2ogpt_client is None:
# Check if env variable has info about remote hosting
remote_h2ogpt_base_url = os.environ.get("H2O_BASE_MODEL_URL", None)
remote_h2ogpt_base_url = os.environ.get("H2OGPT_URL", None)
if model_name == 'h2ogpt-sql-sqlcoder-34b-alpha':
remote_h2ogpt_base_url = f"{remote_h2ogpt_base_url}:5000/v1"
elif model_name == 'h2ogpt-sql-sqlcoder2':
Expand All @@ -453,7 +453,7 @@ def generate_sql(
remote_h2ogpt_base_url = f"{remote_h2ogpt_base_url}:5002/v1"
else:
remote_h2ogpt_base_url = None
remote_h2ogpt_key = os.environ.get("H2O_BASE_MODEL_API_KEY", None)
remote_h2ogpt_key = os.environ.get("H2OGPT_API_TOKEN", None)
_api_key = remote_h2ogpt_key if remote_h2ogpt_key else "EMPTY"
if remote_h2ogpt_base_url:
client_args = dict(base_url=remote_h2ogpt_base_url, api_key=_api_key, timeout=20.0)
Expand Down Expand Up @@ -784,8 +784,8 @@ def generate_sql(
except (sqlglot.errors.ParseError, ValueError, RuntimeError) as e:
_, ex_value, ex_traceback = sys.exc_info()
logger.info(f"Attempting to fix syntax error ...,\n {e}")
env_url = os.environ["RECOMMENDATION_MODEL_REMOTE_URL"]
env_key = os.environ["RECOMMENDATION_MODEL_API_KEY"]
env_url = os.environ["H2OGPTE_URL"]
env_key = os.environ["H2OGPTE_API_TOKEN"]
try:
result = self.self_correction(input_query=res, error_msg=str(ex_traceback), remote_url=env_url, client_key=env_key)
except Exception as se:
Expand Down
7 changes: 4 additions & 3 deletions sidekick/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,8 +548,8 @@ def check_vulnerability(input_query: str):
# Step 2 is optional, if remote url is provided, check for SQL injection patterns in the generated SQL code via LLM
# Currently, only support only for models as an endpoints
logger.debug(f"Requesting additional scan using configured models")
remote_url = os.environ["RECOMMENDATION_MODEL_REMOTE_URL"]
api_key = os.environ["RECOMMENDATION_MODEL_API_KEY"]
remote_url = os.environ["H2OGPTE_URL"]
api_key = os.environ["H2OGPTE_API_TOKEN"]

_system_prompt = GUARDRAIL_PROMPT["system_prompt"].strip()
output_schema = """{
Expand Down Expand Up @@ -618,12 +618,13 @@ def generate_suggestions(remote_url, client_key:str, column_names: list, n_qs: i
input_prompt = RECOMMENDATION_PROMPT.format(data_schema=column_info, n_questions=n_qs
)

recommender_model = os.getenv("RECOMMENDATION_MODEL", "h2oai/h2ogpt-4096-llama2-70b-chat")
client = H2OGPTE(address=remote_url, api_key=client_key)
text_completion = client.answer_question(
system_prompt=f"Act as a data analyst, based on below data schema help answer the question",
text_context_list=[],
question=input_prompt,
llm='h2oai/h2ogpt-4096-llama2-70b-chat'
llm=recommender_model
)
_res = text_completion.content.split("\n")[2:]
results = "\n".join(_res)
Expand Down
Loading

0 comments on commit 6286adb

Please sign in to comment.