Skip to content

Commit

Permalink
Making docker image more flexible
Browse files Browse the repository at this point in the history
  • Loading branch information
Ricardicus committed Jan 18, 2021
1 parent b405737 commit 736d70b
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 40 deletions.
9 changes: 5 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
FROM ubuntu
RUN apt-get update
RUN apt-get -y install build-essential
FROM ubuntu:18.04

RUN apt-get update && apt-get -y install build-essential

WORKDIR /usr/src/app

COPY . .

RUN make

CMD ["./net", "data/eddan_full.txt", "-st", "1000", "-ep", "40000"]
CMD ["./net", "data/eddan_full.txt", "-st", "1000", "-ep", "40000"]
ENTRYPOINT ["./net"]
47 changes: 11 additions & 36 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,41 +73,9 @@ Then run the program:
where datafile is a file with the traning data and it will start training on it. You can see the progress
over time.

If you have <i>several datafiles</i> then you can do something like:

<pre>
# bash example of how to use the -r flag
# see std_conf.h on how to change the default
# behaviour before building the program.

# Build
make
# How many cycles all source files are to be trained on
CYCLES=1000
cycle=0
# list files to train on
files=$(ls folder_with_files/*)

first=1
while [ $cycle -lt $CYCLES ]; do
echo "$(date) Starting cycle $((cycle+1))"
for file in $files; do
echo "$(date) starting to train on file: $file"
if [ $first -eq 1 ]; then
./net $file -vr 0 -ep 1 -st 0 -N 128 -L 3 -lr 0.0003
first=0
else
./net $file -vr 0 -r lstm_net.net -ep 1 -st 0 -lr 0.0003
fi
done

cycle=$((cycle+1))
done
</pre>

## Windows

Build using CMake.
Build using CMake or meson.
Run with the same arguments as in UNIX.

# Configure default behaviour before build
Expand Down Expand Up @@ -140,10 +108,11 @@ The following flags are available:
-N : Number of neurons in every layer
-vr : Verbosity level. Set to zero and only the loss function after and not during training will be printed.
-c : Don't train, only generate output. Seed given by the value. If -r is used, datafile is not considered.
-s : Save folder, where models are stored (binary and JSON).

Check std_conf.h to see what default values are used, these are set during compilation.

./net compiled Dec 24 2019 10:05:26
./net compiled Jan 18 2021 13:08:35
</pre>

The -st flags is great. Per default the network is stored upon interrupting the program with Ctrl-C. But using this argument, you can let the program train and have it store the network continously during the training process.
Expand Down Expand Up @@ -195,8 +164,14 @@ network at the moment. Take a look at Dockerfile.
Modify at will. Here is a container for training
on the poetic edda:
<pre>
# Pulls an image that trains the network on the poetic Edda
docker pull rickardhallerback/recurrent-neural-net:1.0
# Pulls an image that trains the network on the poetic Edda by default,
# but can also train on your own, other data.
docker pull rickardhallerback/recurrent-neural-net:1.1
# Run the image, training on the poetic Edda (default)
docker run rickardhallerback/recurrent-neural-net:1.1
# Run the image, training on your own file, in the current working
# directory, say 'myfile.txt'. Storing model anew every 1000th iteration.
docker run -v $(pwd):/data rickardhallerback/recurrent-neural-net:1.1 /data/myfile.txt -s /data -st 1000
</pre>

# Additional interesting stuff
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
wheel
meson
cpplint
ninja
14 changes: 14 additions & 0 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ static int write_output_directly_bytes = 0;
static char *read_network = NULL;
static char *seed = NULL;
static int store_after_training = 0;
static char save_model_folder_raw[256];
static char save_model_folder_json[256];

void store_the_net_layers(int signo)
{
Expand Down Expand Up @@ -71,6 +73,7 @@ void usage(char *argv[]) {
printf(" -N : Number of neurons in every layer\r\n");
printf(" -vr : Verbosity level. Set to zero and only the loss function after and not during training will be printed.\n");
printf(" -c : Don't train, only generate output. Seed given by the value. If -r is used, datafile is not considered.\r\n");
printf(" -s : Save folder, where models are stored (binary and JSON).\r\n");
printf("\r\n");
printf("Check std_conf.h to see what default values are used, these are set during compilation.\r\n");
printf("\r\n");
Expand Down Expand Up @@ -117,6 +120,17 @@ void parse_input_args(int argc, char** argv)
if ( params.store_network_every == 0 ) {
store_after_training = 1;
}
} else if ( !strcmp(argv[a], "-s") ) {
memset(save_model_folder_json, 0, sizeof(save_model_folder_json));
memset(save_model_folder_raw, 0, sizeof(save_model_folder_raw));

snprintf(save_model_folder_json, sizeof(save_model_folder_json),
"%s/%s", argv[a+1], STD_JSON_NET_NAME);
snprintf(save_model_folder_raw, sizeof(save_model_folder_raw),
"%s/%s", argv[a+1], STD_LOADABLE_NET_NAME);

params.store_network_name_raw = save_model_folder_raw;
params.store_network_name_json = save_model_folder_json;
} else if ( !strcmp(argv[a], "-out") ) {
write_output_directly_bytes = atoi(argv[a+1]);
if ( write_output_directly_bytes <= 0 ) {
Expand Down

0 comments on commit 736d70b

Please sign in to comment.