Skip to content

Commit 0f9dbc4

Browse files
committed
fix: fix datas
1 parent bc77a1b commit 0f9dbc4

File tree

2 files changed

+11
-6
lines changed

2 files changed

+11
-6
lines changed

code/text-to-repository.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,27 +8,31 @@
88
# open datasets/sql/prompts.json
99
# create id map for prompts in datasets/sql/prompts.json
1010
id_prompt_map = {}
11-
with open('../datasets/sql/prompts.json', 'r') as f:
11+
with open('../datasets/sql/llm-prompts.json', 'r') as f:
1212
data = json.loads(f.read())
1313
for row in data:
14-
id_prompt_map[row['id']] = row['prompt']
14+
id_prompt_map[row['id']] = row['requiredType']
1515

1616

1717
with open('../datasets/sql/repositories-5k.jsonl', 'r') as f:
1818
data = [json.loads(row) for row in f.readlines()]
1919

2020
with open('../datasets/sql/repository-5k-train.jsonl', 'w') as f:
2121
for row in data:
22-
print(row)
22+
requiredType = ""
2323
id = int(row['id'])
24+
if id in id_prompt_map:
25+
# id_prompt_map[id] is a list, check if it is empty
26+
if id_prompt_map[id]:
27+
requiredType = "###" + " ".join(id_prompt_map[id]) + "###"
28+
2429
item = {
2530
'instruction': 'text to kotlin repository with class',
26-
'input': row['output'],
31+
'input': row['output'] + "\n" + requiredType,
2732
'output': row['input']
2833
}
2934

30-
f.write(json.dumps(item) + '\n')
31-
35+
f.write(json.dumps(item) + '\n')
3236

3337
# 5kl to csv
3438
with open('../datasets/sql/repository-5k.csv', 'w') as f:

datasets/sql/llm-prompts.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)