From a1805fc978241c30bf3a487a43fcc2a2c03d68c6 Mon Sep 17 00:00:00 2001 From: marwan-abdellah Date: Tue, 20 Jun 2023 14:35:28 +0200 Subject: [PATCH 1/3] Fixing an issue with the security of the server by replacing all the urrlib calls with requests. --- rest_wrapper/rest_wrapper.py | 103 ++++++++++++++++------------------- 1 file changed, 48 insertions(+), 55 deletions(-) diff --git a/rest_wrapper/rest_wrapper.py b/rest_wrapper/rest_wrapper.py index 850e64e..9702612 100644 --- a/rest_wrapper/rest_wrapper.py +++ b/rest_wrapper/rest_wrapper.py @@ -1,23 +1,25 @@ -"""Making use of the REST API (NeuroMorpho.org v7) to query the database.""" -# python v2 or v3 -try: - from urllib2 import urlopen, Request, URLError -except ImportError: - from urllib.request import urlopen, Request, URLError +"""Making use of the REST API (NeuroMorpho.org v7, and updated to v8.5) to query the database.""" -import re, json, sys +import re, sys, requests # pseudo-constants NEUROMORPHO_URL = "http://neuromorpho.org" MAX_NEURONS_PER_PAGE = 500 +requests.packages.urllib3.disable_warnings() +requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS += ':HIGH:!DH:!aNULL' +try: + requests.packages.urllib3.contrib.pyopenssl.util.ssl_.DEFAULT_CIPHERS += ':HIGH:!DH:!aNULL' +except AttributeError: + pass + def validate_response_code(response): """Checks response code from JSON request and print warning then exits Keyword arguments: response -- response of the issued JSON request """ - code = response.getcode() + code = response.status_code # success if code == 200: return @@ -39,21 +41,13 @@ def check_api_health(): Returns true if API is available or false otherwise """ url = "http://neuromorpho.org/api/health" - req = Request(url) - - try: - response = urlopen(req) - if json.loads(response.read().decode('utf-8'))['status'] != "UP": - print("REST API not available.") - else: - return True - except URLError: - print(""" - No network connectivity. A working internet connection is required. - Check with ISP and/or your local network infrastructure for failure. - """) - return False - + + reply = requests.get(url, verify=False) + if reply.json()['status'] != "UP": + print("REST API not available.") + return False + else: + return True def get_num_neurons(num_neurons): """Get number of neurons. API can handle only up to 500 neurons per page @@ -97,22 +91,21 @@ def get_swc_by_filter_rule_for_search_term(filter_string_list, search_term, num_ pairs = pairs + [fq.replace(" ", "%20").split("=") for fq in filterString] url = url + "&".join(["fq=%s:%s" % (k, v) for (k, v) in pairs]) - req = Request(url) - response = urlopen(req) - validate_response_code(response) - total_pages = json.loads(response.read().decode("utf-8"))['page']['totalPages'] + reply = requests.get(url, verify=False) + validate_response_code(reply) + total_pages = reply.json()['page']['totalPages'] num_neuron_pages = get_neuron_pages(num_neurons, total_pages) count = 0 for page in range(0, num_neuron_pages): url = url + "&size=%i&page=%i" % (num_neurons, page) - req = Request(url) - response = urlopen(req) - neurons = json.loads(response.read().decode("utf-8")) + reply = requests.get(url, verify=False) + neurons = reply.json() num_neurons = len(neurons['_embedded']['neuronResources']) count = 0 for neuron in range(0, num_neurons): # get each file - if index == -1: get_swc_by_neuron_name(neurons['_embedded']['neuronResources'][neuron]['neuron_name']) + if index == -1: + get_swc_by_neuron_name(neurons['_embedded']['neuronResources'][neuron]['neuron_name']) # get only file with index in this html view if neuron - count == index: @@ -141,19 +134,21 @@ def get_swc_by_neuron_index(neuronIndex): """ if not check_api_health(): return url = "%s/api/neuron/id/%i" % (NEUROMORPHO_URL, neuronIndex) - req = Request(url) - response = urlopen(req) - validate_response_code(response) - neuron_name = json.loads(response.read().decode("utf-8"))['neuron_name'] + reply = requests.get(url, verify=False) + + validate_response_code(reply) + + neuron_name = reply.json()['neuron_name'] url = "%s/neuron_info.jsp?neuron_name=%s" % (NEUROMORPHO_URL, neuron_name) - html = urlopen(url).read().decode("utf-8") + reply = requests.get(url, verify=False) + html = reply.content.decode("utf-8") p = re.compile(r'Morphology File \(Standardized\)', re.MULTILINE) m = re.findall(p, html) for match in m: file_name = match.replace("%20", " ").split("/")[-1] - response = urlopen("%s/dableFiles/%s" % (NEUROMORPHO_URL, match)) + reply = requests.get(url="%s/dableFiles/%s" % (NEUROMORPHO_URL, match), verify=False) with open(file_name, 'w') as f: - f.write(response.read().decode('utf-8')) + f.write(reply.content.decode('utf-8')) def get_swc_by_neuron_name(neuron_name): @@ -164,15 +159,17 @@ def get_swc_by_neuron_name(neuron_name): """ if not check_api_health(): return url = "%s/neuron_info.jsp?neuron_name=%s" % (NEUROMORPHO_URL, neuron_name) - html = urlopen(url).read().decode("utf-8") + reply = requests.get(url, verify=False) + html = reply.content.decode('utf-8') p = re.compile(r'Morphology File \(Standardized\)', re.MULTILINE) m = re.findall(p, html) file_name = None for match in m: file_name = match.replace("%20", " ").split("/")[-1] - response = urlopen("%s/dableFiles/%s" % (NEUROMORPHO_URL, match)) + reply = requests.get(url="%s/dableFiles/%s" % (NEUROMORPHO_URL, match), verify=False) with open(file_name, 'w') as f: - f.write(response.read().decode('utf-8')) + f.write(reply.content.decode("utf-8")) + # check for file name presence in database if not file_name: print("Neuron with name %s not found in NeuroMorpho.org database." % neuron_name) @@ -201,17 +198,15 @@ def get_swc_by_brain_region(brain_region, num_neurons=-1): num_neurons = get_num_neurons(num_neurons) url = "%s/api/neuron/select?q=brain_region:%s&size=%i" % (NEUROMORPHO_URL, brain_region, num_neurons) - req = Request(url) - response = urlopen(req) - validate_response_code(response) - total_pages = json.loads(response.read().decode("utf-8"))['page']['totalPages'] + reply = requests.get(url, verify=False) + validate_response_code(reply) + total_pages = reply.json()['page']['totalPages'] num_neuron_pages = get_neuron_pages(num_neurons, total_pages) for page in range(0, num_neuron_pages): url = "%s/api/neuron/select?q=brain_region:%s&size=%i&page=%i" % ( NEUROMORPHO_URL, brain_region, num_neurons, page) - req = Request(url) - response = urlopen(req) - neurons = json.loads(response.read().decode("utf-8")) + reply = requests.get(url, verify=False) + neurons = reply.json() num_neurons = len(neurons['_embedded']['neuronResources']) for neuron in range(0, num_neurons): get_swc_by_neuron_name(neurons['_embedded']['neuronResources'][neuron]['neuron_name']) @@ -235,16 +230,14 @@ def get_swc_by_archive_name(archive_name, num_neurons=-1): num_neurons = get_num_neurons(num_neurons) url = "%s/api/neuron/select?q=archive:%s&size=%i" % (NEUROMORPHO_URL, archive_name, num_neurons) - req = Request(url) - response = urlopen(req) - validate_response_code(response) - total_pages = json.loads(response.read().decode("utf-8"))['page']['totalPages'] + reply = requests.get(url, verify=False) + validate_response_code(reply) + total_pages = reply.json()['page']['totalPages'] num_neuron_pages = get_neuron_pages(num_neurons, total_pages) for page in range(0, num_neuron_pages): url = "%s/api/neuron/select?q=archive:%s&size=%i&page=%i" % (NEUROMORPHO_URL, archive_name, num_neurons, page) - req = Request(url) - response = urlopen(req) - neurons = json.loads(response.read().decode("utf-8")) + reply = requests.get(url, verify=False) + neurons = reply.json() num_neurons = len(neurons['_embedded']['neuronResources']) for neuron in range(0, num_neurons): get_swc_by_neuron_name(neurons['_embedded']['neuronResources'][neuron]['neuron_name']) \ No newline at end of file From 76287bfd97ae6bea91b0955b781744f60530efea Mon Sep 17 00:00:00 2001 From: marwan-abdellah Date: Tue, 20 Jun 2023 16:19:03 +0200 Subject: [PATCH 2/3] Adding parallel puller by archive and fixing an issue with the README. --- README.md | 10 +- get_swc_by_archive_parallel.py | 1003 ++++++++++++++++++++++++++++++++ rest_wrapper/rest_wrapper.py | 29 +- 3 files changed, 1028 insertions(+), 14 deletions(-) create mode 100644 get_swc_by_archive_parallel.py diff --git a/README.md b/README.md index ea6df61..bd57d72 100644 --- a/README.md +++ b/README.md @@ -76,22 +76,22 @@ Get a neuron of this search term by index of total neurons matching the search c ### Example 3 Get a neuron (Here: The first neuron in the database) by the global index (1) from the database: -- `python get_SWC.py --index 1` +- `python get_swc.py --index 1` ### Example 4 Get a neuron by it's known name and download as SWC file: -- `python get_SWC.py --name cnic_001` +- `python get_swc.py --name cnic_001` ### Example 5 The following will download *all* SWC files from the region *Neocortex* to the current working directory -- `python get_SWC.py --region neocortex` +- `python get_swc.py --region neocortex` and the following command will just download the first ten neurons from the *Neocortex* region. -- `python get_SWC.py --region neocortex --neurons 10` +- `python get_swc.py --region neocortex --neurons 10` ### Example 6 To download a whole *Smith* archive in SWC format from the database, use the following commmand: -- `python get_SWC.py --archive Smith` +- `python get_swc.py --archive Smith` diff --git a/get_swc_by_archive_parallel.py b/get_swc_by_archive_parallel.py new file mode 100644 index 0000000..645f814 --- /dev/null +++ b/get_swc_by_archive_parallel.py @@ -0,0 +1,1003 @@ +import os +import joblib +import subprocess + + +def execute_command(shell_command): + subprocess.call(shell_command, shell=True) + + +def execute_commands(shell_commands): + for shell_command in shell_commands: + print(shell_command) + execute_command(shell_command) + +def execute_commands_parallel(shell_commands): + from joblib import Parallel, delayed + Parallel(n_jobs=os.cpu_count())(delayed(execute_command)(i) for i in shell_commands) + + +def construct_command_per_archive(archive): + return "python3.8 get_swc.py --archive %s" % archive + + +def main(): + + archives = [ + "Abdolhoseini_Kluge", + "Abrous", + "Acharya", + "Achim_Partanen", + "Acsady", + "Adke_Carrasquillo", + "Adori", + "Aharon_Zuo", + "Ahmed", + "Alilain", + "Allen Cell Types", + "Allman", + "Almeida", + "Almuhtasib", + "Althammer", + "Alvarez", + "Alzheimer", + "Amaral", + "Anderson", + "Andolina", + "Andreae", + "Anselmi_Travagli", + "Anstoetz", + "Antic", + "Anton", + "Araujo", + "Arenkiel", + "Argue", + "Arnold_Johnston", + "Artinian", + "Ascoli", + "Ataman_Boulting", + "Auer", + "Avendano", + "Bacci", + "Bacigaluppi", + "Back", + "Badea", + "Baharani_A", + "Baier", + "Bailey", + "Baldwin", + "Balleine", + "Ballester-Rosado", + "Baltussen_Ultanir", + "Bandtlow", + "Banerjee", + "Bannatyne", + "Bao_Hechen", + "Baptista_Sousa", + "Barbour", + "Barco_Scandaglia", + "Bardy", + "Bareyre", + "Baro", + "Barreda", + "Barrionuevo", + "Bartmann", + "Bartos", + "Bastian", + "Bathellier_Larkum", + "Baulac", + "Bausch", + "Bazan", + "Becchetti", + "Beck", + "Beckstead", + "Beer_Foerster", + "Beguin", + "Beining", + "Bellemer", + "Bellesi_Zhang", + "Ben-Ari", + "Bengochea", + "Bercier", + "Bergstrom", + "Berkowitz", + "Bertels", + "Bevan_Chu", + "Bianchi", + "Bianco", + "Bicanic", + "BICCN-MOp-miniatlas-anatomy", + "Bikson", + "Bilkei-Gorzo", + "Bird", + "Birinyi", + "Bito", + "Blackman", + "Blackwell", + "Blagburn", + "Bleckert", + "Bock", + "Boesmans_VandenBerghe", + "Borst", + "Bove_Travagli", + "Boyle", + "Bragado-Alonso", + "Brager", + "Branchereau_Cattaert", + "Brecha", + "Brecht", + "Brennand", + "Briggman", + "Briggs", + "Brown", + "Browning", + "Browning_Travagli", + "Bruckner", + "Brumberg", + "Brunjes", + "Brunner", + "Bu", + "Buchs", + "Buelow", + "Buffo", + "Bullmann", + "Burdakov", + "Burke", + "Burkhalter", + "Buskila", + "Buzsaki", + "Bu_Yu", + "Cai", + "Caille", + "Calabresi_Ghiglieri", + "Calderon", + "Cameron", + "Campos", + "Canavesi", + "Capogna", + "Cappelli_Buratti", + "Cardona", + "Carlen", + "Carriba", + "Carvalho", + "Castanho_Oliveira", + "Cauli", + "Caviedes", + "Chadderton", + "Chaimowicz", + "Chalupa", + "Chandrashekar", + "Chang", + "Chao", + "Charlet", + "Chen", + "Cheng", + "Chen_B", + "Chen_D", + "Chen_Dai", + "Chen_Dong", + "Chen_Maniatis", + "Chen_Qu", + "Chen_SK", + "Chen_YP", + "Chen_Zhao", + "Chiang", + "Chklovskii", + "Chmykhova", + "Cho", + "Ciosk", + "Cirelli", + "Claiborne", + "Cline", + "Coate", + "Cohen", + "Cohen-Cory", + "Cohen_A", + "Collin", + "Consiglio", + "Conte", + "Conti", + "Coombes", + "Cossart", + "Cox", + "da Silva", + "Dahmen", + "Dallman", + "Danzer", + "Daria", + "Darnell", + "De Bartolo", + "De Koninck", + "De Paola", + "De Schutter", + "DeFelipe", + "Dehorter", + "deKloet_Mansvelder", + "DeKock", + "Del Negro", + "Delvalle", + "DeMoya_Morrison", + "DeMunter", + "Dendritica", + "Deng", + "Denizet", + "Denk", + "Destexhe", + "Dhanya", + "Di Benedetto", + "DIADEM", + "Diana", + "Dias_Lima", + "Diaz", + "Dickstein", + "DiCristo", + "Dierssen", + "DiLisa", + "Ding_Feldmeyer", + "Diniz", + "Djenoune_wyart", + "Doe", + "Dolci_Decimo", + "Dong", + "Dragich", + "Drago", + "Duan", + "Duch", + "Dudek", + "Dumas", + "Duque-Tahvildari", + "Dusart", + "Dwyer", + "Egger", + "Eickholt", + "Ellender", + "Ellis", + "Epsztein", + "Eroglu", + "Esclapez", + "Evans", + "EyeWire", + "Falcone", + "Farrow", + "Faucherre", + "Feldmeyer", + "Feller", + "Fellin", + "Feng", + "Ferguson", + "Fernandez-Acosta_Vicario", + "Fernandez-Ruiz", + "Field", + "Firestein", + "Fishell", + "Fisher", + "Flores", + "FlyEM", + "Foerster", + "Folgueira", + "Fontana", + "Franca", + "Frankland", + "Franklin", + "Fricker", + "Fried", + "Fujita", + "Fukunaga", + "Fulton", + "Fyffe", + "Gabbiani", + "Gage", + "Gage_Toda", + "Galliano_Grubb", + "Galloni_Rancz", + "Gangras", + "Garcia", + "Garcia-Cairasco", + "Garcia-Hernandez_DeSantis", + "Gareau", + "Gartner-Alpar", + "Gaspar", + "Gauron", + "Gazan", + "Geis_Borst", + "Gerber", + "Gesuita_Karayannis", + "Ghatak_Lipton", + "Ghosh", + "Giniger", + "Ginty", + "Giugliano", + "Glennon", + "GliaLab", + "Goaillard", + "Goldman-Rakic", + "Gomes", + "Gonzalez-Arenas", + "Gonzalez-Burgos", + "Gradinaru", + "Grasselli", + "Grasselli_Hansel", + "Greathouse_Herskowitz", + "Greggio", + "Griego_Galvan", + "Grilli", + "Groen", + "Groh", + "Grtuzendler_Hill", + "Grunert", + "Guerra da Rocha", + "Gugula", + "Guizzetti", + "Gulledge", + "Gulyas", + "Gunnersen", + "Guo", + "Guo_Xia", + "Gupton_Song", + "Guthrie", + "Hajos", + "Hale", + "Halnes", + "Hamad", + "Hannibal", + "Han_etal", + "Harland", + "Hart", + "Hay", + "Hayes", + "Heistek_Mansvelder", + "Helmstaedter", + "Hen", + "Henckens", + "Henny", + "Henny_Roeper", + "Heppner", + "Herget", + "Hernandez-Garzon", + "Herring_Keyvani", + "Hess", + "Hetman", + "Hierlemann", + "Higgs", + "Hildebrand", + "Hioki", + "Hirsch", + "Hoffman", + "Hof_Varghese", + "Howard", + "Hoxha", + "Hrvoj-Mihic_Semendeferi", + "Hsueh", + "Huang", + "Huang_JY", + "Huang_ML", + "Huang_ZJ", + "Huberman", + "Huber_Haapasalo", + "Huebener", + "Hughes", + "Huh", + "Hunt", + "Hur", + "Hurd", + "HustTDI", + "Huynh", + "Hu_Cheng_Soba", + "Hu_Soba", + "Hwang", + "Hyunsoo", + "H_Zhang", + "Iascone", + "Igarashi", + "Ikeno", + "Illiano_Pardo", + "Imai", + "Irintchev", + "Isaacson", + "Issa_Schwalbe", + "Ito", + "Jaarsma", + "Jacobs", + "Jacobs_G", + "Jacoby_Schwartz", + "Jaeger", + "Jaffe", + "Jan", + "Jefferis", + "Jekely", + "Jensen", + "Jiezheng", + "Johenning", + "Johnson", + "Johnson_T", + "Johnston", + "Johnston_J", + "Jonas", + "Jongbloets", + "Jubao_Forrest", + "Kabbani", + "Kadow", + "Kameda_Kaneko", + "Kamikouchi", + "Kanatsou", + "Kana_Desland", + "Kannan", + "Kanninen", + "Kantor", + "Kantor_Szarka", + "Karaca_Oliveira", + "Karlsson", + "Karube", + "Katona", + "Kavetsky", + "Kawaguchi", + "Ka_Kim", + "Kellendonk", + "Keller", + "Kelliher", + "Kengaku", + "Keyvani", + "Khankan", + "Khodosevich", + "Kilb", + "Kim", + "Kimura", + "Kimura_Tohyama", + "Kim_JH", + "King", + "Kist", + "Kisvarday", + "Kiyota_Machhi", + "Klatt_Heine", + "Klausberger", + "Kodali_Shetty", + "Koehl", + "Kojic", + "Kole", + "Kondo", + "Kononenko_Haucke", + "Kontou_Kittler", + "Kornfeld", + "Korngreen", + "Korte", + "Koshy", + "Kosillo", + "Kosillo_Ahmed", + "Kosta_Jones", + "Kougias_Juraska", + "Krajka", + "Kramvis_Spijker", + "Krasnoshchekova", + "Krieger", + "Krishnaswamy", + "Kroon", + "Kuan_Phelps", + "Kubota", + "Kuddannaya", + "Kula_Gugula", + "Kullander", + "La Barbera", + "Lagali", + "Lagartos-Donate", + "Lai", + "Lambe", + "Landgraf", + "Lanoue", + "Larkman", + "Lasik", + "Laukoter_Hippenmeyer", + "Laurent", + "Lee", + "Lee_CH", + "Lee_CJ", + "Lee_KH", + "Lee_LJ", + "Lefler_Amsalem", + "Lein", + "Leroy", + "Levkowitz", + "Lewis", + "Liao", + "Lien", + "Lim_Kitazawa", + "Lin", + "Linaro", + "Linyi_Chen", + "Lin_Zhang", + "Liu", + "Liu_XD", + "Liu_Zeng", + "Li_Bolshakov", + "Li_Yao", + "Loane", + "Lohr", + "Long", + "Lopes", + "Lopez-Bendito", + "Lopez-Schier", + "Lorenzati", + "Lovell", + "Lozano", + "Luders", + "Luebke", + "Luikart", + "Lukas", + "Lumpkin", + "Luo", + "Luzzati", + "Lu_Lichtman", + "Ma", + "MacAskill", + "Maccaferri", + "MacDonald", + "Macedo-Lima", + "Madeira_Tsirka", + "Maguire-Zeiss", + "Mailly", + "Makihara_Goshima", + "Mallick", + "Mancia-Leon", + "Manis", + "Mani_Schwartz", + "Manzini", + "Maras_Akil", + "Marder", + "Margrie", + "Marin", + "Markram", + "Martemyanov", + "Martin", + "Martina", + "Martineau", + "Martin_Avendano", + "Martone", + "Masachs", + "Masland", + "Maxwell", + "Ma_Pan", + "Ma_Serra", + "McBain", + "McKellar", + "McQuiston", + "Mearow", + "Mechawar", + "Medalla", + "Medan", + "Mehder", + "Meliza", + "Mellor", + "Mena-Segovia", + "Mentis", + "Mercer", + "Merchan-Perez", + "Mercuri", + "Meszena", + "Meucci", + "Meyer", + "Michaelson", + "Middei_Minichiello", + "Middledorp", + "Mikhaylova", + "Miller", + "Miller_Allison", + "Milnerwood", + "Miroschnikow", + "Miyasaka", + "Mizrahi", + "Molinard-Chenu", + "Molofsky", + "Montgomery_Masino", + "Monyer", + "Moons", + "Morara", + "Morelli", + "Morgan", + "Morimoto_Zhao_Reiser", + "Morrice", + "Morrow", + "Morsch", + "MouseLight", + "Moyer", + "Mueller_Chen", + "Mulholland", + "Munera", + "Munoz-Cobo", + "Muotri", + "Murase", + "Murer", + "Mustaparta_Lofaldli", + "Nacher", + "Naegele", + "Nagaeva_Korpi", + "Nakano_Doya", + "Namiki", + "Nam_Yu", + "Nandi", + "Narkilahti", + "Nath", + "Nathans", + "Nath_Schwartz", + "Nedelescu", + "Nedelescu-Aoki", + "Negyessy", + "Nelson", + "Neves", + "Newbern", + "Ngodup_Trussel", + "Niquille", + "Nishitoh", + "Nishiyama", + "Nixdorf-Bergweiler", + "Nolan", + "Nombela-Palomo", + "Nordman", + "Norkett", + "Northington", + "Nusser", + "Oboti", + "Oguro-Ando", + "Ohgomori", + "Olesnicky", + "Oliveira", + "Oliveira_Neumann", + "Olson", + "OpenWorm", + "Opitz", + "Orion", + "Orlowski", + "Ortega", + "Otsuka", + "Otto", + "Oviedo", + "Ozaki", + "Pal", + "Palmer", + "Pantelis_Mann", + "Papageorgiou_Kann", + "Papazian", + "Pappas_Dauer", + "Park", + "Parker", + "Park_Smirnakis", + "Parnas", + "Pascucci", + "Patterson_Neitz", + "Pearlstein_Hammond", + "Peca", + "Peng", + "Peng_YR", + "Peric_Bataveljic", + "Petersen", + "Peterson", + "Petreanu", + "Petrosini", + "Pham", + "Pierce", + "Piirainen", + "Pinto", + "Pirone", + "Pischedda_Piccoli", + "Piskorowski", + "Pita-Thomas", + "Poleg-Polsky", + "Poorthuis", + "Poria", + "Portera-Cailliau", + "Povysheva", + "Pozzo-Miller", + "Prida", + "Prince", + "Priya", + "Puglielli", + "Pushchin", + "Putatunda_Hu", + "Qiru", + "Qiu", + "Qu", + "Quan", + "Quinlan", + "Radic", + "Radley", + "Rainnie", + "Rajamani_Qvist", + "Raju_Oldham", + "Rancillac", + "Rancz", + "Rasia-Filho", + "Razetti", + "Reiser", + "Ren", + "Renner", + "Rhode", + "Ribic", + "Rice-Baylor", + "Richards", + "Rico", + "Rieger", + "Riera-Tur", + "Rishal", + "Rivlin", + "Robel", + "Roberts", + "Rochefort", + "Rockland", + "Rodger", + "Rodrigues", + "Rodrigues_Sousa_Cunha", + "Rose", + "Rosenberg", + "Rosenkranz", + "Rosner", + "Rossi", + "Routh", + "Roysam", + "Rubel", + "Rudy", + "Ruiyu", + "Rumbaugh", + "Rusakov", + "Ruthazer", + "Rybak_Knaden", + "Sachse", + "Saghatelyan", + "Sailor", + "Sakalar_Lasztoczi", + "Sanes", + "Santhakumar", + "Sanz-Morello", + "Sato-Bigbee", + "Sawtell", + "Scanziani", + "Schmitz", + "Schoppik_Schier", + "Schornig_Taverna", + "Schulz", + "Schwaller", + "Schwartz", + "Scimemi", + "Scott", + "Se-Jun", + "Segev", + "Seki", + "Sekulic", + "Sell", + "Selten_Kasri", + "Semendeferi_Muotri", + "Sengelaub", + "Serra_Colasante", + "Seung", + "Shah_Bamji", + "Shao_Chen", + "Shcheglovitov", + "Sheets", + "Shimell", + "Shinomiya_FlyEM", + "Siegelbaum", + "Siegert", + "Siemsen", + "Sikdar", + "Singh", + "Singh_K", + "Siqueira_Guil", + "Sitaraman", + "Si_Wang", + "Sjostrom", + "Skalecka", + "Skiteva_Chergui", + "Skyberg", + "Smart", + "Smit-Rigter", + "Smith", + "Smith-Cowan", + "Smith_Bilbo", + "Smith_Cowan", + "Smith_Koizumi", + "Sng", + "Soba", + "Soltesz", + "Somogyi", + "Sondereker_Renna", + "Soriano", + "Southam", + "Spiga", + "Spruston", + "Sreenath", + "Stagkourakis_Broberger", + "Staiger", + "Stedehouder", + "Steelman", + "Steinecke_Taniguchi", + "Stevens", + "Stopfer", + "Storm", + "Straiker", + "Strettoi", + "Stuart", + "Studer", + "Sturner_Tavosanis", + "Su", + "Sully_Delogu", + "Summavielle", + "Sun", + "SungBae", + "Sun_Prince", + "Suter_Shepherd", + "Suzuki_Dityatev", + "Svoboda", + "Swanson", + "Sweedler", + "Sweet", + "Szatko_Franke", + "Szegedi", + "Szegedi_Lamsa", + "Szegedi_Paizs_Lamsa", + "Szoboszlay", + "Sztarker", + "Szucs", + "Tagawa", + "Takahashi", + "Takeuchi", + "Tamas", + "Tang-Schomer", + "Tan_Ng", + "Tao", + "Tarusawa", + "Tavakoli", + "Taylor", + "Tejos-Bravo_Fiedler", + "Temereva", + "Tendulkar", + "Tenner", + "Tensaouti", + "Tepper", + "Tessarollo_Yanpallewar", + "Thier", + "Thomas", + "TieYuan_Zhang", + "Timofeev", + "Tischfield", + "Tobin_Lee", + "Todd", + "Tolias", + "Tolias_Jiang", + "Topolnik", + "Tracey", + "Travagli", + "Trudeau", + "Trullas", + "Tsirka", + "Tsoulfas", + "Turner", + "Ullian", + "Urban", + "Urban_K", + "Usiello", + "Uteshev", + "Uusisaari", + "Vaasjo", + "Vaidya", + "Valero_Malva", + "Valiante", + "Valley", + "Vandenbroucke", + "vanderVoet_Castells", + "VanDerZee", + "VanHook", + "Vannini", + "Vannini_Restani_Caleo", + "vanWoerden", + "Varela-Nallar", + "vdHeuvel", + "Ventura", + "Venugopal_Travers", + "Vervaeke", + "Vicini", + "Vida", + "Vidal", + "VilaVerde", + "Vissel", + "Vitriol", + "Vivinetto_Cave", + "Vlachos", + "vonEngelhardt", + "VonGersdorff", + "vonWyl_vomBerg", + "Votruba", + "Vuksic", + "Wadiche", + "Wake", + "Wallace_Palmer", + "Wang", + "Wang_Hadjab", + "Wang_I-F", + "Wang_J", + "Wang_Lefebvre", + "Wang_Maas", + "Wang_Olson", + "Wang_XD", + "Wang_Y", + "Wang_Ye", + "Wang_Zhou", + "Wanner_Friedrich", + "Watt", + "Wearne_Hof", + "Weigel", + "Weil", + "Weir", + "Weiss_Manzini", + "Wellman", + "Wenzel_Berman", + "Wernig", + "Wernitznig", + "West", + "Weston", + "West_Hofer", + "Whiddon_Krimm", + "White", + "Wildenberg_Kasthuri", + "Williams", + "Williams_Wilson", + "Wilson_R", + "Wittner", + "Wolf", + "Wong", + "Wong_H", + "Wong_Silver", + "Wotjak", + "Wright_Heuckeroth", + "Wu", + "Wu_Yu", + "Xiong", + "Xue", + "Yamada", + "Yamao", + "Yamawaki", + "Yang", + "Yang-CY", + "Yang_CY", + "Yang_Liu", + "Yang_XW", + "Yayon_Soreq", + "Ye", + "Yen_Chen", + "Yin", + "Yin_Yuan", + "Yorek", + "Yousheng", + "Yu", + "Yuan", + "Yuste", + "Yu_Wong", + "Zagreb", + "Zaitsev", + "Zamboni_Merlo", + "Zeiser", + "Zeng", + "Zhang", + "Zhang_Hu", + "Zhang_Jiang", + "Zhang_L", + "Zhang_S", + "Zhang_X", + "Zhan_Kipp", + "Zhao", + "Zhao_Yuan", + "Zheng", + "Zhong_Mao_Ganguli", + "Zhou", + "Zhou_Heijnen", + "Zhuang", + "Ziegler_Tavosanis", + "Zoghbi", + "Zorrilla_Bacci", + "Zwart", + "Zylbertal" + ] + + commands = list() + for archive in archives: + commands.append(construct_command_per_archive(archive=archive)) + execute_commands_parallel(shell_commands=commands) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/rest_wrapper/rest_wrapper.py b/rest_wrapper/rest_wrapper.py index 9702612..6350d3f 100644 --- a/rest_wrapper/rest_wrapper.py +++ b/rest_wrapper/rest_wrapper.py @@ -1,7 +1,7 @@ """Making use of the REST API (NeuroMorpho.org v7, and updated to v8.5) to query the database.""" -import re, sys, requests - +import re, sys, os, requests + # pseudo-constants NEUROMORPHO_URL = "http://neuromorpho.org" MAX_NEURONS_PER_PAGE = 500 @@ -14,6 +14,15 @@ pass +def verify_directory(directory): + """Verifies a directory. + """ + + if not os.path.isdir(directory): + os.mkdir(directory) + + + def validate_response_code(response): """Checks response code from JSON request and print warning then exits Keyword arguments: @@ -105,11 +114,11 @@ def get_swc_by_filter_rule_for_search_term(filter_string_list, search_term, num_ for neuron in range(0, num_neurons): # get each file if index == -1: - get_swc_by_neuron_name(neurons['_embedded']['neuronResources'][neuron]['neuron_name']) + get_swc_by_neuron_name(search_term, neurons['_embedded']['neuronResources'][neuron]['neuron_name']) # get only file with index in this html view if neuron - count == index: - get_swc_by_neuron_name(neurons['_embedded']['neuronResources'][neuron]['neuron_name']) + get_swc_by_neuron_name(search_term, neurons['_embedded']['neuronResources'][neuron]['neuron_name']) return neurons['_embedded']['neuronResources'][neuron]['neuron_name'] # increase count here count = neuron + num_neurons @@ -151,7 +160,7 @@ def get_swc_by_neuron_index(neuronIndex): f.write(reply.content.decode('utf-8')) -def get_swc_by_neuron_name(neuron_name): +def get_swc_by_neuron_name(directory, neuron_name): """Download the SWC file specified by the neuron's name Keyword arguments: @@ -167,7 +176,9 @@ def get_swc_by_neuron_name(neuron_name): for match in m: file_name = match.replace("%20", " ").split("/")[-1] reply = requests.get(url="%s/dableFiles/%s" % (NEUROMORPHO_URL, match), verify=False) - with open(file_name, 'w') as f: + verify_directory(directory=directory) + + with open('%s/%s' % (directory, file_name), 'w') as f: f.write(reply.content.decode("utf-8")) # check for file name presence in database @@ -209,7 +220,7 @@ def get_swc_by_brain_region(brain_region, num_neurons=-1): neurons = reply.json() num_neurons = len(neurons['_embedded']['neuronResources']) for neuron in range(0, num_neurons): - get_swc_by_neuron_name(neurons['_embedded']['neuronResources'][neuron]['neuron_name']) + get_swc_by_neuron_name(brain_region, neurons['_embedded']['neuronResources'][neuron]['neuron_name']) def get_swc_by_archive_name(archive_name, num_neurons=-1): @@ -227,7 +238,7 @@ def get_swc_by_archive_name(archive_name, num_neurons=-1): if not archive_name[0].isupper(): print("Warning: archive name does not start with upper case letter") return - + num_neurons = get_num_neurons(num_neurons) url = "%s/api/neuron/select?q=archive:%s&size=%i" % (NEUROMORPHO_URL, archive_name, num_neurons) reply = requests.get(url, verify=False) @@ -240,4 +251,4 @@ def get_swc_by_archive_name(archive_name, num_neurons=-1): neurons = reply.json() num_neurons = len(neurons['_embedded']['neuronResources']) for neuron in range(0, num_neurons): - get_swc_by_neuron_name(neurons['_embedded']['neuronResources'][neuron]['neuron_name']) \ No newline at end of file + get_swc_by_neuron_name(archive_name, neurons['_embedded']['neuronResources'][neuron]['neuron_name']) \ No newline at end of file From fb32fb473bd60c62f4f780875ec3409100e6c552 Mon Sep 17 00:00:00 2001 From: marwan-abdellah Date: Tue, 20 Jun 2023 16:36:56 +0200 Subject: [PATCH 3/3] Adding a parallel script to fetch the data by archive. --- get_swc.py | 26 +++++++++++++++++--------- get_swc_by_archive_parallel.py | 11 ++++++++--- rest_wrapper/rest_wrapper.py | 28 ++++++++++++++++------------ 3 files changed, 41 insertions(+), 24 deletions(-) diff --git a/get_swc.py b/get_swc.py index 3faada2..2277d4e 100644 --- a/get_swc.py +++ b/get_swc.py @@ -1,43 +1,51 @@ #!/usr/bin/env python +import os from rest_wrapper.rest_wrapper import * import argparse from geometry_tools.geometry_tools import check_cylinder_intersections parser = argparse.ArgumentParser(description="Access NeuroMorpho.org v7 w/ REST API and download SWC files") +parser.add_argument('--output', required=True, type=str, help="The output directory where the results will be written") parser.add_argument('--region', required=False, type=str, help="Brain region", metavar="R") parser.add_argument('--neurons', required=False, type=int, help="Count of neurons (-1 means all)", metavar="C") parser.add_argument('--name', required=False, type=str, help="Name of neuron", metavar="N") parser.add_argument('--index', required=False, type=int, help="Index of neuron", metavar="I") parser.add_argument('--archive', required=False, type=str, help="Archive name", metavar="A") -parser.add_argument('--filters', required=False, type=str, help="One or multuple filters", metavar="[FILTER]", - action='append', nargs=1) +parser.add_argument('--filters', required=False, type=str, help="One or multuple filters", metavar="[FILTER]", action='append', nargs=1) parser.add_argument('--search', required=False, type=str, help="Search term", metavar="S") parser.add_argument('--validate', action='store_true', help="Check for cylinder intersections") - args = parser.parse_args() +# verify the output directory +if not os.path.isdir(args.output): + try: + os.mkdir(args.output) + except: + print('Please provide a valid directory. The directory [%s] does not exist' % args.output) + exit(0) + if args.region: numNeurons = (args.neurons != -1 and args.neurons) or -1 brainRegion = (args.region != -1 and args.region) or "neocortex" - get_swc_by_brain_region(brainRegion, numNeurons) + get_swc_by_brain_region(brainRegion, numNeurons, output_dir=args.output) elif args.archive: numNeurons = (args.neurons != -1 and args.neurons) or -1 archiveName = (args.archive != -1 and args.archive) or "Smith" - get_swc_by_archive_name(archiveName, numNeurons) + get_swc_by_archive_name(archiveName, numNeurons, output_dir=args.output) elif args.search: if args.index: - fileName = get_swc_by_filter_rule_for_search_term(args.filters, args.search, 500, args.index) + fileName = get_swc_by_filter_rule_for_search_term(args.filters, args.search, 500, args.index, output_dir=args.output) if args.validate: print(check_cylinder_intersections(fileName)) else: - fileName = get_swc_by_filter_rule_for_search_term(args.filters, args.search, args.neurons, -1) + fileName = get_swc_by_filter_rule_for_search_term(args.filters, args.search, args.neurons, -1, output_dir=args.output) if args.validate: print(check_cylinder_intersections(fileName)) elif not (args.region or not ((args.index is not None) ^ (args.name is not None))): if args.index: - get_swc_by_neuron_index(args.index) + get_swc_by_neuron_index(args.index, output_dir=args.output) if args.name: - fileName = get_swc_by_neuron_name(args.name) + fileName = get_swc_by_neuron_name(args.name, output_dir=args.output) if args.validate: check_cylinder_intersections(fileName) else: parser.print_help() diff --git a/get_swc_by_archive_parallel.py b/get_swc_by_archive_parallel.py index 645f814..8895c46 100644 --- a/get_swc_by_archive_parallel.py +++ b/get_swc_by_archive_parallel.py @@ -1,6 +1,7 @@ import os import joblib import subprocess +import argparse def execute_command(shell_command): @@ -17,12 +18,16 @@ def execute_commands_parallel(shell_commands): Parallel(n_jobs=os.cpu_count())(delayed(execute_command)(i) for i in shell_commands) -def construct_command_per_archive(archive): - return "python3.8 get_swc.py --archive %s" % archive +def construct_command_per_archive(archive, output_dir): + return "python3.8 get_swc.py --archive %s --output=%s" % (archive, output_dir) def main(): + parser = argparse.ArgumentParser(description="Parallel access NeuroMorpho.org v8.5 w/ REST API and download SWC files") + parser.add_argument('--output', required=True, type=str, help="The output directory where the results will be written") + args = parser.parse_args() + archives = [ "Abdolhoseini_Kluge", "Abrous", @@ -996,7 +1001,7 @@ def main(): commands = list() for archive in archives: - commands.append(construct_command_per_archive(archive=archive)) + commands.append(construct_command_per_archive(archive=archive, output_dir=args.output)) execute_commands_parallel(shell_commands=commands) if __name__ == "__main__": diff --git a/rest_wrapper/rest_wrapper.py b/rest_wrapper/rest_wrapper.py index 6350d3f..6fe073f 100644 --- a/rest_wrapper/rest_wrapper.py +++ b/rest_wrapper/rest_wrapper.py @@ -80,13 +80,14 @@ def get_neuron_pages(num_neurons, total_pages): return min(total_pages, num_neurons / MAX_NEURONS_PER_PAGE if num_neurons > MAX_NEURONS_PER_PAGE else 1) -def get_swc_by_filter_rule_for_search_term(filter_string_list, search_term, num_neurons, index=-1): +def get_swc_by_filter_rule_for_search_term(filter_string_list, search_term, num_neurons, index=-1, output_dir=""): """Downloads n neurons by filterString and stores as SWC files Keyword arguments: filter_string_list -- the filter string as key value pairs search_term-- the search term num_neurons -- number of neurons + output_dir: the output directory where the swc files will be written """ if not check_api_health(): return url = "%s/api/neuron/select?q=%s&" % (NEUROMORPHO_URL, search_term.replace("=", ":")) @@ -114,11 +115,11 @@ def get_swc_by_filter_rule_for_search_term(filter_string_list, search_term, num_ for neuron in range(0, num_neurons): # get each file if index == -1: - get_swc_by_neuron_name(search_term, neurons['_embedded']['neuronResources'][neuron]['neuron_name']) + get_swc_by_neuron_name(search_term, neurons['_embedded']['neuronResources'][neuron]['neuron_name'], output_dir=output_dir) # get only file with index in this html view if neuron - count == index: - get_swc_by_neuron_name(search_term, neurons['_embedded']['neuronResources'][neuron]['neuron_name']) + get_swc_by_neuron_name(search_term, neurons['_embedded']['neuronResources'][neuron]['neuron_name'], output_dir=output_dir) return neurons['_embedded']['neuronResources'][neuron]['neuron_name'] # increase count here count = neuron + num_neurons @@ -135,7 +136,7 @@ def get_swc_by_filter_rule_for_search_term_by_index(filterStringList, searchTerm get_swc_by_filter_rule_for_search_term(filterStringList, searchTerm, -1, index) -def get_swc_by_neuron_index(neuronIndex): +def get_swc_by_neuron_index(neuronIndex, output_dir): """Download a neuron by index and store it into a SWC file Keyword arguments: @@ -154,13 +155,13 @@ def get_swc_by_neuron_index(neuronIndex): p = re.compile(r'Morphology File \(Standardized\)', re.MULTILINE) m = re.findall(p, html) for match in m: - file_name = match.replace("%20", " ").split("/")[-1] + file_name = "%s/%s" % (output_dir, match.replace("%20", " ").split("/")[-1]) reply = requests.get(url="%s/dableFiles/%s" % (NEUROMORPHO_URL, match), verify=False) with open(file_name, 'w') as f: f.write(reply.content.decode('utf-8')) -def get_swc_by_neuron_name(directory, neuron_name): +def get_swc_by_neuron_name(directory, neuron_name, output_dir): """Download the SWC file specified by the neuron's name Keyword arguments: @@ -176,9 +177,9 @@ def get_swc_by_neuron_name(directory, neuron_name): for match in m: file_name = match.replace("%20", " ").split("/")[-1] reply = requests.get(url="%s/dableFiles/%s" % (NEUROMORPHO_URL, match), verify=False) - verify_directory(directory=directory) + verify_directory(directory="%s/%s" % (output_dir, directory)) - with open('%s/%s' % (directory, file_name), 'w') as f: + with open('%s/%s/%s' % (output_dir, directory, file_name), 'w') as f: f.write(reply.content.decode("utf-8")) # check for file name presence in database @@ -189,12 +190,13 @@ def get_swc_by_neuron_name(directory, neuron_name): return file_name -def get_swc_by_brain_region(brain_region, num_neurons=-1): +def get_swc_by_brain_region(brain_region, num_neurons=-1, output_dir=""): """Download a specific number of SWC files specified by a region name Keyword arguments: brain_region -- the brain region num_neurons -- how many neurons to retrieved (-1 means all neurons) + output_dir: the output directory where the swc files will be written Note: Brain regions usually start in lowercase """ @@ -220,15 +222,17 @@ def get_swc_by_brain_region(brain_region, num_neurons=-1): neurons = reply.json() num_neurons = len(neurons['_embedded']['neuronResources']) for neuron in range(0, num_neurons): - get_swc_by_neuron_name(brain_region, neurons['_embedded']['neuronResources'][neuron]['neuron_name']) + get_swc_by_neuron_name(brain_region, neurons['_embedded']['neuronResources'][neuron]['neuron_name'], output_dir=output_dir) -def get_swc_by_archive_name(archive_name, num_neurons=-1): +def get_swc_by_archive_name(archive_name, num_neurons=-1, output_dir=""): """Download a specific number of SWC files specified by an archive name Keyword arguments: archive_name -- the brain region num_neurons -- how many neurons to retrieve (-1 means all neurons) + output_dir: the output directory where the swc files will be written + Note: Archive names usually start in uppercase """ # check for API health @@ -251,4 +255,4 @@ def get_swc_by_archive_name(archive_name, num_neurons=-1): neurons = reply.json() num_neurons = len(neurons['_embedded']['neuronResources']) for neuron in range(0, num_neurons): - get_swc_by_neuron_name(archive_name, neurons['_embedded']['neuronResources'][neuron]['neuron_name']) \ No newline at end of file + get_swc_by_neuron_name(archive_name, neurons['_embedded']['neuronResources'][neuron]['neuron_name'], output_dir=output_dir) \ No newline at end of file