Skip to content

Commit

Permalink
new experiments and updated descriptions
Browse files Browse the repository at this point in the history
  • Loading branch information
Yondijr committed Jan 21, 2021
1 parent 1d867a6 commit d8ba39d
Show file tree
Hide file tree
Showing 34 changed files with 25,721 additions and 111 deletions.
43 changes: 16 additions & 27 deletions Build_datasets.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -65,7 +65,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -76,38 +76,27 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 9,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"103127\n"
]
}
],
"source": [
"splitted = split_eos(texts)\n",
"final = [item for sublist in splitted for item in sublist]\n",
"print(len(final))\n",
"final = \"<|endoftext|>\".join(final)\n",
"f = open(\"saves/splitOnEosDataset_v2_test.txt\", \"w\",encoding = \"UTF-8\")\n",
"f.write(final)\n",
"f.close()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"16337865"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(final)"
]
},
{
"cell_type": "code",
"execution_count": 43,
Expand Down Expand Up @@ -679,12 +668,12 @@
},
{
"cell_type": "code",
"execution_count": 95,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"cor1 = pickle.load( open( \"gpt2-dataset/correctedtrain100k.p\", \"rb\" ))\n",
"cor2 = pickle.load( open( \"gpt2-dataset/correctedtrain150k.p\", \"rb\" ))"
"cor1 = pickle.load( open( \"saves/correctedtrain100k.p\", \"rb\" ))\n",
"cor2 = pickle.load( open( \"saves/correctedtrain150k.p\", \"rb\" ))"
]
},
{
Expand Down
Loading

0 comments on commit d8ba39d

Please sign in to comment.