Skip to content

Commit

Permalink
Add tensorboard logging
Browse files Browse the repository at this point in the history
  • Loading branch information
scientist1642 committed Mar 30, 2017
1 parent 02d6f65 commit 1da2fa2
Show file tree
Hide file tree
Showing 7 changed files with 141 additions and 32 deletions.
72 changes: 70 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,70 @@
__pycache__
*.pyc
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so
*.c

# logs
runs/
checkpoints/

#other
.DS_Store

# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/

# Translations
*.mo
*.pot

# Django stuff:
*.log

# Sphinx documentation
docs/_build/

# PyBuilder
target/

#Ipython Notebook
.ipynb_checkpoints
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ Install most recent nightly build (version '0.1.10+2fd4d08' or later) of PyTorch
pip install git+https://github.com/pytorch/pytorch
`

## Dependencies
* pytorch
* torchvision
* universe (for now)
* [tensorboard logger](https://github.com/TeamHG-Memex/tensorboard_logger)

## Results

With 16 processes it converges for PongDeterministic-v3 in 15 minutes.
Expand Down
37 changes: 28 additions & 9 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,23 @@
import os
import sys
import math
import time

import torch
import torch.optim as optim
import torch.multiprocessing as mp
import torch.nn as nn
import torch.nn.functional as F
import tensorboard_logger as tb

import my_optim
from envs import create_atari_env
from model import ActorCritic
from train import train
from test import test
from utils import logger
import my_optim
from utils.shared_memory import SharedCounter


logger = logger.getLogger('main')

Expand All @@ -41,16 +46,27 @@
help='environment to train on (default: PongDeterministic-v3)')
parser.add_argument('--no-shared', default=False, metavar='O',
help='use an optimizer without shared momentum.')
parser.add_argument('--max-iters', type=int, default=math.inf,
help='maximum iterations per process.')

parser.add_argument('--max-episode-count', type=int, default=math.inf,
help='maximum number of episodes to run per process.')
parser.add_argument('--debug', action='store_true', default=False,
help='run in a way its easier to debug')
parser.add_argument('--short-description', default='no_descr',
help='Short description of the run params, (used in tensorboard)')

def setup_loggings(args):
logger.debug('CONFIGURATION: {}'.format(args))

cur_path = os.path.dirname(os.path.realpath(__file__))
args.summ_base_dir = (cur_path+'/runs/{}/{}({})').format(args.env_name,
time.strftime('%d.%m-%H.%M'), args.short_description)
logger.info('logging run logs to {}'.format(args.summ_base_dir))
tb.configure(args.summ_base_dir)

if __name__ == '__main__':
args = parser.parse_args()

setup_loggings(args)
torch.manual_seed(args.seed)

env = create_atari_env(args.env_name)
shared_model = ActorCritic(
env.observation_space.shape[0], env.action_space)
Expand All @@ -61,20 +77,23 @@
else:
optimizer = my_optim.SharedAdam(shared_model.parameters(), lr=args.lr)
optimizer.share_memory()


gl_step_cnt = SharedCounter()

if not args.debug:
processes = []

p = mp.Process(target=test, args=(args.num_processes, args, shared_model))
p = mp.Process(target=test, args=(args.num_processes, args,
shared_model, gl_step_cnt))
p.start()
processes.append(p)
for rank in range(0, args.num_processes):
p = mp.Process(target=train, args=(rank, args, shared_model, optimizer))
p = mp.Process(target=train, args=(rank, args, shared_model,
gl_step_cnt, optimizer))
p.start()
processes.append(p)
for p in processes:
p.join()
else: ## debug is enabled
# run only one process in a main, easier to debug
train(0, args, shared_model, optimizer)
train(0, args, shared_model, gl_step_cnt, optimizer)
4 changes: 0 additions & 4 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,8 @@ def __init__(self, num_inputs, action_space):
self.lstm = nn.LSTMCell(32 * 3 * 3, 256)

num_outputs = action_space.n

self.critic_linear = nn.Linear(256, 1)
self.actor_linear = nn.Linear(256, num_outputs)
#self.critic_linear = nn.Linear(288, 1)
#self.actor_linear = nn.Linear(288, num_outputs)

self.apply(weights_init)
self.actor_linear.weight.data = normalized_columns_initializer(
self.actor_linear.weight.data, 0.01)
Expand Down
17 changes: 14 additions & 3 deletions test.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
import math
import os
import sys
import time

import torch
import torch.nn.functional as F
import torch.optim as optim
import tensorboard_logger as tb

from envs import create_atari_env
from model import ActorCritic
from torch.autograd import Variable
from torchvision import datasets, transforms
import time
from collections import deque
from utils import logger

logger = logger.getLogger('test')

def test(rank, args, shared_model):
def test(rank, args, shared_model, gl_step_cnt):
torch.manual_seed(args.seed + rank)

env = create_atari_env(args.env_name)
Expand All @@ -32,6 +34,8 @@ def test(rank, args, shared_model):

start_time = time.time()

local_episode_num = 0

# a quick hack to prevent the agent from stucking
actions = deque(maxlen=100)
episode_length = 0
Expand Down Expand Up @@ -61,10 +65,17 @@ def test(rank, args, shared_model):
done = True

if done:
passed_time = time.time() - start_time
local_episode_num += 1
global_step_count = gl_step_cnt.get_value()

logger.info("Time {}, episode reward {}, episode length {}".format(
time.strftime("%Hh %Mm %Ss",
time.gmtime(time.time() - start_time)),
time.gmtime(passed_time)),
reward_sum, episode_length))
tb.log_value('steps_second', global_step_count / passed_time, global_step_count)
tb.log_value('reward', reward_sum, global_step_count)

reward_sum = 0
episode_length = 0
actions.clear()
Expand Down
24 changes: 10 additions & 14 deletions train.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import math
import os
import sys
import resource
import gc

import torch
import torch.nn.functional as F
import torch.optim as optim

from envs import create_atari_env
from model import ActorCritic
from torch.autograd import Variable
Expand All @@ -21,7 +20,7 @@ def ensure_shared_grads(model, shared_model):
return
shared_param._grad = param.grad

def train(rank, args, shared_model, optimizer=None):
def train(rank, args, shared_model, gl_step_count, optimizer=None):
torch.manual_seed(args.seed + rank)

env = create_atari_env(args.env_name)
Expand All @@ -39,8 +38,7 @@ def train(rank, args, shared_model, optimizer=None):
done = True

episode_length = 0

iteration = 0
episode_count = 0

while True:

Expand All @@ -49,17 +47,11 @@ def train(rank, args, shared_model, optimizer=None):
rewards = []
entropies = []

if iteration == args.max_iters:
logger.info('Max iteration {} reached..'.format(args.max_iters))
if episode_count == args.max_episode_count:
logger.info('Maxiumum episode count {} reached..'.format(args.max_episode_count))
# TODO make sure if no train process is running test.py closes as well
break

if iteration % 200 == 0 and rank == 0:
mem_used = int(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
mem_used_mb = mem_used / 1024
logger.info('Memory usage of one proc: {} (mb)'.format(mem_used_mb))


iteration += 1
episode_length += 1

# Sync with the shared model
Expand Down Expand Up @@ -89,6 +81,7 @@ def train(rank, args, shared_model, optimizer=None):

if done:
episode_length = 0
episode_count += 1
state = env.reset()

state = torch.from_numpy(state)
Expand All @@ -99,6 +92,9 @@ def train(rank, args, shared_model, optimizer=None):
if done:
break

# increment global step count
gl_step_count.increment_by(step)

R = torch.zeros(1, 1)
if not done:
value, _, _ = model((Variable(state.unsqueeze(0)), (hx, cx)))
Expand Down
13 changes: 13 additions & 0 deletions utils/shared_memory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from multiprocessing import Value, Lock

class SharedCounter:
def __init__(self):
self.lock = Lock()
self.n = Value('i', 0)

def increment_by(self, k):
with self.lock:
self.n.value += k

def get_value(self):
return self.n.value

0 comments on commit 1da2fa2

Please sign in to comment.