Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
544 changes: 544 additions & 0 deletions 02์žฅ/chapter_2_transformer_with_code.ipynb

Large diffs are not rendered by default.

947 changes: 947 additions & 0 deletions 03์žฅ/chapter_3.ipynb

Large diffs are not rendered by default.

577 changes: 577 additions & 0 deletions 05์žฅ/chapter_5.ipynb

Large diffs are not rendered by default.

397 changes: 397 additions & 0 deletions 06์žฅ/api_request_parallel_processor.py

Large diffs are not rendered by default.

485 changes: 485 additions & 0 deletions 06์žฅ/chapter_6.ipynb

Large diffs are not rendered by default.

65 changes: 65 additions & 0 deletions 06์žฅ/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import json
import pandas as pd


def change_jsonl_to_csv(input_file, output_file, prompt_column="prompt", response_column="response"):
prompts = []
responses = []
with open(input_file, 'r') as json_file:
for data in json_file:
prompts.append(json.loads(data)[0]['messages'][0]['content'])
responses.append(json.loads(data)[1]['choices'][0]['message']['content'])

df = pd.DataFrame({prompt_column: prompts, response_column: responses})
df.to_csv(output_file, index=False)
return df


def merge_gt_and_gen_result(df_gt, df_gen):
results = []
for idx, row in df_gen.iterrows():
with_sql_gt = df_gt.loc[df_gt['without_sql'] == row['without_sql']]
gt_sql = with_sql_gt['sql'].values[0]
gen_sql = row['gen_sql']
results.append((with_sql_gt['ddl'].values[0], with_sql_gt['request'].values[0], gt_sql, gen_sql))
df_result = pd.DataFrame(results, columns=["ddl", "request", "gt_sql", "gen_sql"])
return df_result

def make_evaluation_requests(df, filename, model="gpt-4-1106-preview"):
prompts = []
for idx, row in df.iterrows():
prompts.append(f"""Based on provided ddl, request, gen_sql, ground_truth_sql if gen_sql eqauls to ground_truth_sql, output "yes" else "no"
DDL:
{row['ddl']}
Request:
{row['request']}
ground_truth_sql:
{row['gt_sql']}
gen_sql:
{row['gen_sql']}

Answer:""")

jobs = [{"model": model, "messages": [{"role": "system", "content": prompt}]} for prompt in prompts]
with open(filename, "w") as f:
for job in jobs:
json_string = json.dumps(job)
f.write(json_string + "\n")

def make_prompt(ddl, request, sql=""):
prompt = f"""๋‹น์‹ ์€ SQL์„ ์ƒ์„ฑํ•˜๋Š” SQL ๋ด‡์ž…๋‹ˆ๋‹ค. DDL๊ณผ ์š”์ฒญ์‚ฌํ•ญ์„ ๋ฐ”ํƒ•์œผ๋กœ ์ ์ ˆํ•œ SQL ์ฟผ๋ฆฌ๋ฅผ ์ƒ์„ฑํ•˜์„ธ์š”.

DDL:
{ddl}

์š”์ฒญ์‚ฌํ•ญ:
{request}

SQL:
{sql}"""
return prompt


if __name__ == '__main__':
df = pd.read_csv('./nl2sql_validation.csv')
df.sample(100).to_csv('nl2sql_validation_sample.csv', index=False)
Loading