Skip to content

Commit

Permalink
Merge pull request #766 from memex-explorer/ahmadia/tag-0.4
Browse files Browse the repository at this point in the history
ahmadia/tag 0.4
  • Loading branch information
ahmadia committed Nov 12, 2015
2 parents 1c10627 + d040be1 commit 28cfb04
Show file tree
Hide file tree
Showing 7 changed files with 227 additions and 69 deletions.
10 changes: 10 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
# 0.4

Released November 2015

* Nutch streaming visualizations
* Nutch REST API support
* Removed log.io
* DataWake trail support
* Bug fixes

# 0.3

Released September 2015
Expand Down
127 changes: 68 additions & 59 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,17 @@ RUN apt-get install -y \
git \
libssl-dev \
make \
nginx \
openjdk-7-jdk \
pkg-config \
rabbitmq-server \
wget

RUN groupadd -r explorer -g 433 && \
useradd -u 431 -m -r -g explorer -d /home/explorer -s /bin/bash -c "Docker image user" explorer && \
chown -R explorer:explorer /home/explorer
RUN adduser explorer sudo
RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers

USER explorer

Expand All @@ -33,108 +37,113 @@ RUN bash ./Miniconda-latest-Linux-x86_64.sh -b
RUN git clone https://github.com/memex-explorer/memex-explorer

WORKDIR /home/explorer/memex-explorer/
RUN git checkout origin/ahmadia/tad_integration_master

WORKDIR /home/explorer/memex-explorer
ENV PATH /home/explorer/miniconda/bin:$PATH
ENV PATH /home/explorer/miniconda2/bin:$PATH
RUN conda update conda -y
RUN conda install conda-env -y
RUN conda env update --file local-environment.yml
RUN conda env update

WORKDIR /home/explorer/memex-explorer/source/memex
RUN cp settings_files/dev_settings.py settings.py

WORKDIR /home/explorer/memex-explorer/source
#replaces source activate... sorta
ENV PATH /home/explorer/miniconda/envs/memex/bin:$PATH
ENV PATH /home/explorer/miniconda2/envs/memex/bin:$PATH
ENV CONDA_DEFAULT_ENV memex
ENV CONDA_ENV_PATH /home/explorer/miniconda
RUN python manage.py migrate
ENV CONDA_ENV_PATH /home/explorer/miniconda2

WORKDIR /home/explorer/memex-explorer/deploy
RUN python logio_settings.py
# move these into another environment.yaml? Or better, apply them as a patch?
RUN conda install -c memex ddt
# TAD dependencies
RUN pip install celery fisher elasticsearch flask flask_restful

### TAD
# Move this up eventually
USER root
RUN apt-get install -y rabbitmq-server
# ready for deployment
RUN python manage.py migrate
RUN python manage.py collectstatic -v0 --noinput

USER explorer
ENV PATH /home/explorer/miniconda/envs/memex/bin:$PATH
ENV CONDA_DEFAULT_ENV memex
ENV CONDA_ENV_PATH /home/explorer/miniconda
RUN pip install celery
RUN pip install fisher elasticsearch
RUN pip install flask flask_restful
# Install elasticdump
RUN npm install -g elasticdump

#------------- Pull software --------------#
#------------- Pull TAD software --------------#
USER root
RUN mkdir -p /service/build
WORKDIR /service/build
RUN git clone https://github.com/autonlab/tad.git

# Build and install TAD library.
RUN ln -s /service/build/tad/service /service/tad
### TAD!

RUN mkdir /service/tad/config

# Need a special supervisord.conf that knows how to bind to 0.0.0.0
# also this knows to run TAD
# Explorer configuration
USER explorer
COPY supervisord.conf /home/explorer/memex-explorer/source/supervisord.conf
# Need dev_settings that point to right locations
COPY docker_settings.py /home/explorer/memex-explorer/source/memex/settings.py
# patched in
WORKDIR /home/explorer/memex-explorer
RUN conda install markdown
RUN markdown_py CHANGES.md > source/base/static/changes.html
WORKDIR /home/explorer/memex-explorer/docs
RUN make html
RUN mv build/html ../source/base/static/docs

# Now bring over elasticsearch index data
COPY elasticdump.json /home/explorer/elasticdump.json
# TAD configuration
COPY tad.cfg /service/tad/config/tad.cfg

# Install elasticdump
RUN npm install -g elasticdump

# UNCOMMENT THIS SECTION TO PRELOAD THE ELASTICSEARCH INSTANCE
# ------------------------------------------------------------- #

# Now bring over elasticsearch index data
# COPY elasticdump.json /home/explorer/elasticdump.json

# Load data
# Elasticsearch data
COPY populate_elasticsearch.sh /home/explorer/populate_elasticsearch.sh
WORKDIR /home/explorer
RUN /bin/bash ./populate_elasticsearch.sh
# Crawl data
COPY resources /home/explorer/memex-explorer/source/resources/
# COPY populate_elasticsearch.sh /home/explorer/populate_elasticsearch.sh
# WORKDIR /home/explorer
# RUN /bin/bash ./populate_elasticsearch.sh
# ------------------------------------------------------------- #


# ------------------------------------------------------------- #
# UNCOMMENT THIS SECTION TO PRELOAD CRAWL DATA
# Crawl data
# COPY resources /home/explorer/memex-explorer/source/resources/
# ------------------------------------------------------------- #


# ------------------------------------------------------------- #
# UNCOMMENT THIS SECTION TO PRELOAD SQL DATABASE
# SQLite (Django)
COPY db.sqlite3 /home/explorer/memex-explorer/source/db.sqlite3
# COPY db.sqlite3 /home/explorer/memex-explorer/source/db.sqlite3
# ------------------------------------------------------------- #

# TAD run script
COPY tad_run /home/explorer/miniconda/envs/memex/bin/tad
COPY tad_run /home/explorer/miniconda2/envs/memex/bin/tad

# Permissions clean-up
USER root
RUN chown -R explorer /home/explorer/memex-explorer/source
RUN chmod +x /home/explorer/miniconda/envs/memex/bin/tad
RUN chown -R explorer /home/explorer/miniconda/envs/memex/bin/tad
COPY nginx.conf /etc/nginx/sites-enabled/default
# uncomment these to install secrets into the container
# COPY secrets/nginx.crt /etc/nginx/ssl/nginx.crt
# COPY secrets/nginx.key /etc/nginx/ssl/nginx.key
# COPY secrets/htpasswd /etc/nginx/htpasswd

#------------- Service Configuration --------------#
# Permissions clean-up
# Permissions clean-up
USER root
RUN mkdir /service/tad/config
# TAD configuration file
COPY tad.cfg /service/tad/config/tad.cfg
RUN chown -R explorer /service
RUN adduser explorer sudo
RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers

USER explorer
WORKDIR /home/explorer/memex-explorer/
RUN git fetch && checkout origin/ahmadia/tad_integration_master
RUN chown -R explorer /home/explorer/memex-explorer/source
RUN chmod +x /home/explorer/miniconda2/envs/memex/bin/tad
RUN chown -R explorer /home/explorer/miniconda2/envs/memex/bin/tad

##################### INSTALLATION END #####################

# Expose the default ports
EXPOSE 8000
EXPOSE 9200
EXPOSE 9300
EXPOSE 5601
EXPOSE 8084
EXPOSE 5000
EXPOSE 80
EXPOSE 443

# Set default container command
USER explorer
WORKDIR /home/explorer/memex-explorer/source
ENTRYPOINT "supervisord"
ENV HTTP_PROTOCOL http
ENV WS_PROTOCOL ws
ENV INLINE 1
ENTRYPOINT "supervisord"
60 changes: 60 additions & 0 deletions docker/docker_settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""
Django settings for memex project.
For more information on this file, see
https://docs.djangoproject.com/en/1.7/topics/settings/
For the full list of settings and their values, see
https://docs.djangoproject.com/en/1.7/ref/settings/
"""

from common_settings import *
import os

# SECURITY WARNING: Not setting VIRTUAL_HOST prevents Django from being able to verify headers
ALLOWED_HOSTS = [os.environ.get('VIRTUAL_HOST', '*')]

# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = '0#t((zq66&3*87djaltu-pn34%0p!*v_332f2p!$2i)w5y17f8'

# SECURITY WARNING: don't run with debug turned on in production!
PRODUCTION = os.environ.get('PRODUCTION', False)

# when INLINE is true point to local sources for changes/documentation instead of remote ones
INLINE = os.environ.get('INLINE', False)

if PRODUCTION:
DEBUG = False
TEMPLATE_DEBUG = False
DEPLOYMENT = True
else:
DEBUG = True
TEMPLATE_DEBUG = True
INSTALLED_APPS += ('debug_toolbar',)
DEPLOYMENT = False


MEDIA_ROOT = os.path.join(BASE_DIR, 'resources')
PROJECT_PATH = os.path.join(MEDIA_ROOT, "projects")

VIRTUAL_HOST = os.environ.get('VIRTUAL_HOST', 'localhost')
PROTOCOL = os.environ.get('HTTP_PROTOCOL', 'http')

REST_FRAMEWORK = {
'DEFAULT_AUTHENTICATION_CLASSES': (
'rest_framework.authentication.SessionAuthentication',
),
'DEFAULT_PERMISSION_CLASSES': (
'rest_framework.permissions.AllowAny',
),
'DEFAULT_FILTER_BACKENDS': ('rest_framework.filters.DjangoFilterBackend',)
}

# ddt is treated as an external service for now

EXTERNAL_APP_LOCATIONS = {
'bokeh-server': '/bokeh',
'ddt': PROTOCOL + '://' + VIRTUAL_HOST + ':8084',
'tad': '/tad',
'kibana': '/kibana',
}
40 changes: 40 additions & 0 deletions docker/nginx.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
server {
listen 80;
server_name explorer.continuum.io;

client_max_body_size 100M;

location / {
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header Host $host;
#todo: make this a variable below
proxy_pass http://127.0.0.1:8000;
}

location /kibana/ {
rewrite /kibana/(.*) /$1 break;
proxy_pass http://127.0.0.1:5601/;
proxy_redirect off;
proxy_set_header Host $host;
}

location /bokeh/ {
proxy_pass http://127.0.0.1:5006/;
proxy_cache off;
proxy_cookie_domain localhost explorer.continuum.io;
sub_filter 'http://localhost:5006' 'https://explorer.continuum.io';
}

location /bokeh/sub {
proxy_pass http://localhost:5006;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
}

location /static {
rewrite ^/static/(.*)$ /$1 break;
root /home/explorer/memex-explorer/source/base/static/ ;
}
}
38 changes: 30 additions & 8 deletions docker/supervisord.conf
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,33 @@ minprocs=200 ; (min. avail process descriptors;default 200)
[inet_http_server]
port = 127.0.0.1:9001

[program:nginx]
command=sudo nginx -g 'daemon off;'
priority=1
autostart=true

[program:redis]
command=redis-server
priority=1

[program:celery]
command=celery -A memex worker -l info --loglevel=debug --logfile=memex/logs/celeryd.log
priority=2

[program:elasticsearch]
command=elasticsearch
priority=1

[program:nutch]
command=nutch startserver
priority=1

[program:rabbitmq]
command=sudo rabbitmq-server ; need administrative access on Linux systems
priority=1
autostart=true

[program:bokeh-server]
command=bokeh-server --backend memory --port 5006 --ws-conn-string %(ENV_WS_PROTOCOL)s://explorer.continuum.io/bokeh/sub
priority=1
autostart=true

[program:tika]
command=tika-rest-server
priority=2
Expand All @@ -38,16 +53,23 @@ priority=2
[program:ddt]
command=ddt
priority=5
autostart=false
autostart=true

[program:tad]
command=tad
priority=5
autostart=true
autostart=false

# have to bring up celery after other applications
[program:celery]
command=celery -A memex worker -l info --loglevel=debug --logfile=resources/logs/celery-worker.log
priority=10
killasgroup=true

[program:django]
command=python manage.py runserver 0.0.0.0:8000
command=gunicorn memex.wsgi:application -b 0.0.0.0:8000
priority=20
stopasgroup=true

[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
4 changes: 2 additions & 2 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@
# built documents.
#
# The short X.Y version.
version = '0.2'
version = '0.4'
# The full version, including alpha/beta/rc tags.
release = '0.2'
release = '0.4'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
Loading

0 comments on commit 28cfb04

Please sign in to comment.