forked from openedx-unsupported/devstack
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdocker-compose-analytics-pipeline.yml
122 lines (113 loc) · 3.99 KB
/
docker-compose-analytics-pipeline.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
version: "2.1"
services:
namenode:
image: edxops/analytics_pipeline_hadoop_namenode:${OPENEDX_RELEASE:-latest}
container_name: edx.devstack.analytics_pipeline.namenode
hostname: namenode
environment:
- CLUSTER_NAME=devstack
ports:
- 127.0.0.1:50070:50070
command: ["/run.sh"]
volumes:
- namenode_data:/hadoop/dfs/name
datanode:
image: edxops/analytics_pipeline_hadoop_datanode:${OPENEDX_RELEASE:-latest}
container_name: edx.devstack.analytics_pipeline.datanode
hostname: datanode
environment:
CORE_CONF_fs_defaultFS: "hdfs://namenode:8020"
depends_on:
- namenode
ports:
- 127.0.0.1:50075:50075
command: ["/run.sh"]
volumes:
- datanode_data:/hadoop/dfs/data
resourcemanager:
image: edxops/analytics_pipeline_hadoop_resourcemanager:${OPENEDX_RELEASE:-latest}
container_name: edx.devstack.analytics_pipeline.resourcemanager
hostname: resourcemanager
environment:
CORE_CONF_fs_defaultFS: "hdfs://namenode:8020"
YARN_CONF_yarn_log___aggregation___enable: 'true'
YARN_CONF_yarn_nodemanager_aux___services: mapreduce_shuffle
YARN_CONF_yarn_nodemanager_aux___services_mapreduce_shuffle_class: 'org.apache.hadoop.mapred.ShuffleHandler'
MAPRED_CONF_mapreduce_framework_name: yarn
depends_on:
- namenode
- datanode
ports:
- 127.0.0.1:8088:8088 # resource manager web ui
command: ["/run.sh"]
nodemanager:
image: edxops/analytics_pipeline_hadoop_nodemanager:${OPENEDX_RELEASE:-latest}
container_name: edx.devstack.analytics_pipeline.nodemanager
hostname: nodemanager
environment:
CORE_CONF_fs_defaultFS: "hdfs://namenode:8020"
YARN_CONF_yarn_resourcemanager_hostname: resourcemanager
YARN_CONF_yarn_log___aggregation___enable: 'true'
YARN_CONF_yarn_nodemanager_aux___services: mapreduce_shuffle
YARN_CONF_yarn_nodemanager_aux___services_mapreduce_shuffle_class: 'org.apache.hadoop.mapred.ShuffleHandler'
YARN_CONF_yarn_nodemanager_vmem___check___enabled: 'false'
MAPRED_CONF_mapreduce_framework_name: yarn
depends_on:
- resourcemanager
- namenode
- datanode
ports:
- 127.0.0.1:8042:8042 # node manager web ui
- 127.0.0.1:19888:19888 # node manager job history server ui
command: ["/run.sh"]
sparkmaster:
image: edxops/analytics_pipeline_spark_master:${OPENEDX_RELEASE:-latest}
container_name: edx.devstack.analytics_pipeline.sparkmaster
hostname: sparkmaster
ports:
- 127.0.0.1:8080:8080
- 127.0.0.1:7077:7077 # spark master port
- 127.0.0.1:6066:6066 # spark api
- 127.0.0.1:18080:18080 # spark history server
sparkworker:
image: edxops/analytics_pipeline_spark_worker:${OPENEDX_RELEASE:-latest}
container_name: edx.devstack.analytics_pipeline.sparkworker
hostname: sparkworker
depends_on:
- sparkmaster
environment:
- SPARK_MASTER=spark://sparkmaster:7077
ports:
- 127.0.0.1:8081:8081 # spark worker UI
vertica:
image: sumitchawla/vertica:latest
container_name: edx.devstack.analytics_pipeline.vertica
volumes:
- vertica_data:/home/dbadmin/docker
analyticspipeline:
image: edxops/analytics_pipeline:${OPENEDX_RELEASE:-latest}
container_name: edx.devstack.analytics_pipeline
hostname: analyticspipeline
volumes:
- ${DEVSTACK_WORKSPACE}/edx-analytics-pipeline:/edx/app/analytics_pipeline/analytics_pipeline
command: ["/etc/bootstrap.sh", "-d"]
depends_on:
- mysql
- namenode
- resourcemanager
- nodemanager
- datanode
- sparkworker
- elasticsearch
- vertica
ports:
- 127.0.0.1:4040:4040 # spark web UI
environment:
HADOOP_COMMON_RESOURCE_MANAGER_HOST: "resourcemanager"
HADOOP_DEFAULT_FS: "hdfs://namenode:8020"
SPARK_MASTER_HOST: "spark://sparkmaster:7077"
SPARK_MASTER_PORT: "7077"
volumes:
namenode_data:
datanode_data:
vertica_data: