-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Dick Kreisberg
committed
Jul 29, 2014
0 parents
commit e9d6a00
Showing
83 changed files
with
176,124 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#glob syntax | ||
|
||
syntax: glob | ||
|
||
*.png | ||
*.jpg | ||
*.ico | ||
*.gif | ||
*.pom | ||
.idea* | ||
.DS_Store |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
#drwxrwsr-x. 2 erkkila2 csbgroup 4.0K Jul 9 13:59 . | ||
#-rwxr-x---. 1 erkkila2 csbgroup 1.5G Jul 9 14:00 coadread.merge.12apr.rnaseq.276.hg18Plus.rface | ||
# | ||
[build] | ||
source=TCGA | ||
afm=/titan/cancerregulome3/TCGA/outputs/kirc/kirc.bigMerge.25jun.tsv | ||
annotations=na | ||
quantile_features=CNVR,GEXP,METH | ||
associations=/titan/cancerregulome3/TCGA/outputs/kirc/keep.pwpvD.short.sort | ||
interesting_scores=/titan/cancerregulome3/TCGA/outputs/kirc/featScores.pwpvD.short.txt | ||
afm_description=Kidney Renal AllPairs | ||
comment= | ||
dataset_label=kirc_31july_pw | ||
dataset_date=31-07-12 | ||
[email protected] | ||
disease_code=KIRC | ||
#python bin must be 2.5+ and imported the MySQLDb package | ||
python_bin=python | ||
|
||
#the directory needs to exist and end in / | ||
[results] | ||
path=/home/csbgroup/public_html/RE/dataimport/results/ | ||
#/proj/ilyalab/jlin/load_associations/dataimport_meta/python/results/ | ||
|
||
[dbetl] | ||
#only supports blank, absolute, negative, negative_log10 | ||
pvalue_transform=absolute | ||
#collapse_edge_directions=1 implies taking the rf-ace higher important (A->B, B->A) set | ||
#n/a for pairwise | ||
collapse_edge_directions=1 | ||
reverse_directions=1 | ||
keep_unmapped_associations=1 | ||
|
||
|
||
#ISB specific - requires smtp python module | ||
[pubcrawl] | ||
dopubcrawl=no | ||
[email protected] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#RE DataImport META configs | ||
[build] | ||
source=TCGA | ||
afm=/home/csbgroup/re_data/tcga/crc/coadread.all.23jan14.TP.afm | ||
annotations=na | ||
quantile_features=CNVR | ||
associations=/home/csbgroup/re_data/tcga/crc/coadread.all.23jan14.TP.rface | ||
interesting_scores=/home/csbgroup/re_data/tcga/crc/featScores.coadread.all.23jan14.TP.txt | ||
#good idea to include method_version | ||
afm_description=CRC RFACE_1.0.4 | ||
comment= | ||
dataset_label=crc_31july_test | ||
dataset_date=31-07-12 | ||
[email protected] | ||
disease_code=COADREAD | ||
#python bin must be 2.5+ and imported the MySQLDb package | ||
python_bin=python | ||
|
||
#the directory needs to exist and end in / | ||
[results] | ||
path=/home/csbgroup/public_html/RE/dataimport/results/ | ||
#/proj/ilyalab/jlin/load_associations/dataimport_meta/python/results/ | ||
|
||
[dbetl] | ||
#only supports absolute, negative, negative_log10 | ||
pvalue_transform=negative_log10 | ||
#collapse_edge_directions=1 implies taking the rf-ace higher important (A->B, B->A) set | ||
#the following settings are valid for rface only | ||
collapse_edge_directions=1 | ||
reverse_directions=1 | ||
process_gene_interest_score=0 | ||
keep_unmapped_associations=1 | ||
|
||
#ISB specific - requires smtp python module | ||
[pubcrawl] | ||
dopubcrawl=no | ||
[email protected] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
#RE-Config and MySql Data Import notes and requirements | ||
#Requires python 2.5, 2.6, or 2.7 and libraries MySQLdb | ||
|
||
The python executable is explicitly set to /tools/bin/python2.7 in the sh scripts, update it as needed | ||
|
||
Set up the RE admin dependencies and set permissions by executing rfex_admin.sql with root on the mysql server: | ||
|
||
mysql -u root -ppassword < rfex_admin.sql | ||
|
||
It is also recommended taking a look at MySQL's /etc/my.cnf If you have millions of associations, then you should consider following the my-large.cnf (See MySQL documentation) settings | ||
|
||
#Config | ||
if you want to call your database a different schema name then tcga, you will want to replace the rfex_admin.sql 'tcga' with name you want and then modify one of the example config/rfex_sql.config files. | ||
The host can be localhost, or the actual server name. If you are running the data import on the db server, then localhost is best, if you are running the data import from a different server, then put in the entire address, such as machine.systemsbiology.net with the appropriate port. The server you are on must have access to the db server. | ||
|
||
The rfex_admin.sql contains grant statements and it is important that you replace the existing server name of saskatoon with your server name. | ||
Within the config file, the path dictated needs to exist, since the script only creates dir dynamically relative to this path. | ||
[results] | ||
path=/titan/cancerregulome3/TCGA/outputs_sandbox/parsed_associations | ||
|
||
If you were to update the random_forest database name for the chrom and ref info, be aware that RE contains a google-dsapi-svc.config file containing this database name and you will need to update this and then redeploy the same name war file in your web app server. | ||
|
||
#MySQL Engine | ||
RE db tables are using the MyISAM engine, versus InnoDB, since MyISAM is better fitted for read-intensive (select) and offers full-text indexing and simpler to design and drop. | ||
|
||
#SH | ||
From python dir, run sh: | ||
There should be corresponding sh files to the config files setup above, the commands required for the sh is: | ||
dataset_label feature_matrix_file associations_file dataset_comment dataset_description re_instance | ||
|
||
#RFACE analysis example: | ||
sh load_rface_associations.sh test_gbm_rface /titan/cancerregulome3/TCGA/outputs/gbm/gbm.merge.u133a.31oct.hg18.tsv /titan/cancerregulome3/TCGA/outputs/gbm/rf.u133a.31oct.mask1.F/all_associations.out "BRCA Her2 subset" "59 Her2-classified patients" internal | ||
|
||
#All Pairs example: | ||
sh load_pairwise_associations.sh test_kirc_0206_pw /titan/cancerregulome3/TCGA/outputs/kirc/bigMerge.06feb12.hg18.tsv /titan/cancerregulome3/TCGA/outputs/kirc/bigMerge.06feb12.pwpv "Kidney" "Kidney sandbox" public | ||
|
||
Regarding processing time, we are averaging about 800,000-1 million edges per minute. It is recommended that you modify your /etc/my.cnf to have high memory settings as quite a number of views, indexes and buffers are used. | ||
|
||
You can load multiple associations (right now limited to RFACE and pairwise) by using | ||
start_load_feature_associations.sh | ||
|
||
ie | ||
sh start_load_feature_associations.sh test_gbm /titan/cancerregulome3/TCGA/outputs/gbm/gbm.merge.u133a.31oct.hg18.tsv /titan/cancerregulome3/TCGA/outputs/gbm/rf.u133a.31oct.mask1.F/all_associations.out "test new flow" "test" internal /titan/cancerregulome3/TCGA/outputs/gbm/bigMerge.06feb12.1e04_1e08_8_0.pwpv | ||
|
||
Please contact [email protected] with any questions. | ||
Thanks |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#” sh /home/csbgroup/public_html/RE/dataimport/python/load_rface_associations.sh tcga_coadread_nov10 /home/csbgroup/tcga/rf-ace/coadread/coadread.nov10.tsv /home/csbgroup/tcga/rf-ace/coadread/coadread.nov10.associations.filtered.tsv "TCGA Colorectal" "466 patients" public ” | ||
# | ||
[build] | ||
afm=/path/matrix.afm.tsv | ||
annotations=/path/matrix.feature.annotation.tsv | ||
associations=/path/rface.associations | ||
afm_description=TUT Prostate AFM 20K features 100 samples | ||
comment=your build comment | ||
dataset_label=gbm_dataset_01May | ||
intermediate_results_dir=./results |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
[mysql_configs] | ||
host= | ||
port=3306 | ||
db=tcga | ||
username= | ||
password= | ||
|
||
[solr_configs] | ||
solrpath=http://host:8080/solr | ||
|
||
[cutoff] | ||
pvalue=.5 | ||
#rface method | ||
importance=0.0001 | ||
correlation=0 | ||
#pairwise method | ||
loggedpvalue=-4 | ||
|
||
#this is used for dataset label, ie cancer_type_method_date | ||
#the list tokens can be any string as long as it matches, otherwise | ||
#the update dataset script will fail | ||
[cancer_types] | ||
list=brca,coad,coadread,gbm,ov | ||
|
||
[pubcrawl] | ||
dopubcrawl=no | ||
pubcrawl_contact= | ||
|
||
[results] | ||
path= | ||
dosmtp=no | ||
notify= |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
[mysql_configs] | ||
host=breve | ||
port=3306 | ||
db=tcga | ||
username=visquick_rw | ||
password=r34dwr1t3 | ||
|
||
[solr_configs] | ||
solrpath=http://glados9:7080/solr | ||
|
||
#[cutoff] | ||
#pvalue=.5 | ||
#rface method | ||
#importance=0.0001 | ||
#correlation=0 | ||
#pairwise method | ||
#loggedpvalue=-4 | ||
|
||
#[cancer_types] | ||
#list=brca,coad,coadread,gbm,ov | ||
|
||
[pubcrawl] | ||
dopubcrawl=no | ||
[email protected],[email protected] | ||
|
||
#[results] | ||
#path=/local/tcga/re_dbetl | ||
#/proj/ilyalab/jlin/load_associations/dataimport_meta/python/results | ||
|
||
[notification] | ||
dosmtp=no | ||
[email protected] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
[mysql_configs] | ||
host= | ||
port= | ||
db= | ||
username= | ||
password= | ||
|
||
[cutoff] | ||
pvalue=.5 | ||
#rface method | ||
importance=0.0001 | ||
correlation=0 | ||
#pairwise method | ||
loggedpvalue=-4 | ||
|
||
#this is used for dataset label, ie cancer_type_method_date | ||
#the list tokens can be any string as long as it matches, otherwise | ||
#the update dataset script will fail | ||
[cancer_types] | ||
list=brca,coad,coadread,gbm,ov,kirc | ||
|
||
[pubcrawl] | ||
dopubcrawl=no | ||
[email protected],[email protected] | ||
|
||
[results] | ||
path=/titan/cancerregulome3/TCGA/outputs_sandbox/parsed_associations | ||
dosmtp=no | ||
[email protected],[email protected],[email protected],[email protected],[email protected] |
Oops, something went wrong.