From bfa67b7975aa09e5a03f1737e78b23c5b674d701 Mon Sep 17 00:00:00 2001 From: William Moore Date: Fri, 15 Nov 2024 12:28:08 +0000 Subject: [PATCH] Update Compound.ipynb to use searchengine instead of mapr --- Compound.ipynb | 97 +++++++++++++++++--------------------------------- 1 file changed, 33 insertions(+), 64 deletions(-) diff --git a/Compound.ipynb b/Compound.ipynb index 582717b9..4b1f749d 100644 --- a/Compound.ipynb +++ b/Compound.ipynb @@ -103,20 +103,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Set up base URLS so can use shorter variable names later on" + "### Set up base URL so can use shorter variable names later on" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ - "URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/?value={value}&case_sensitive=false&orphaned=true\"\n", - "SCREENS_PROJECTS_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/?value={value}&case_sensitive=false&id={compound_id}\"\n", - "PLATES_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/plates/?value={value}&id={screen_id}&case_sensitive=false\"\n", - "IMAGES_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/images/?value={value}&node={parent_type}&id={parent_id}&case_sensitive=false\"\n", - "ATTRIBUTES_URL = \"https://idr.openmicroscopy.org/webclient/api/annotations/?type=map&image={image_id}\"" + "SEARCH_URL = \"https://idr-testing.openmicroscopy.org/searchengine/api/v1/resources/image/search/?key={key}&value={value}\"" ] }, { @@ -138,16 +134,7 @@ }, "outputs": [], "source": [ - "TYPE = \"compound\"\n", - "KEYS = {TYPE:\n", - " (\"InChIKey\",\n", - " \"PubChem InChIKey\",\n", - " \"Compound Concentration (microMolar)\",\n", - " \"Concentration (microMolar)\",\n", - " \"Dose\",\n", - " \"Compound MoA\",\n", - " \"Compound Action\")\n", - "}" + "COMPOUND_NAME = \"Compound Name\"" ] }, { @@ -168,43 +155,34 @@ }, "outputs": [], "source": [ - "def parse_annotation(writer, json_data, name, data_type):\n", - " plate_name = \"-\"\n", - " screen_name = name\n", - " for p in json_data[data_type]:\n", - " parent_id = p['id']\n", - " plate_name = p['name']\n", - " qs3 = {'key': TYPE, 'value': compound,\n", - " 'parent_type': data_type[:-1], 'parent_id': parent_id}\n", - " url3 = IMAGES_URL.format(**qs3)\n", + "def parse_annotation(writer, json_data, compound):\n", + " for p in json_data:\n", + " plate_name = p['plate_name']\n", + " screen_name = p['screen_name']\n", " c = compound.lower()\n", " if c.startswith(\"ml\"):\n", " c = 'ml9'\n", - " for i in session.get(url3).json()['images']:\n", - " image_id = i['id']\n", - " url4 = ATTRIBUTES_URL.format(**{'image_id': image_id})\n", - " row = {}\n", - " inchikey = \"unknown\"\n", - " concentration = \"unknown\"\n", - " moa = \"unknown\"\n", - " for a in session.get(url4).json()['annotations']:\n", - " for v in a['values']:\n", - " key = str(v[0])\n", - " if key in KEYS[TYPE]:\n", - " if key in ['InChIKey', 'PubChem InChIKey']:\n", - " inchikey = v[1]\n", - " elif key in ['Dose', 'Compound Concentration (microMolar)', 'Concentration (microMolar)']:\n", - " concentration = float(v[1].replace(' micromolar', ''))\n", - " elif key in ['Compound MoA', 'Compound Action']:\n", - " moa = v[1]\n", - " row.update({'Compound': c,\n", - " 'Screen': screen_name,\n", - " 'Plate': plate_name,\n", - " 'Image': image_id,\n", - " 'InChIKey': inchikey,\n", - " 'Concentration (microMolar)': concentration,\n", - " 'MoA': moa})\n", - " writer.writerow(row)" + " image_id = p['id']\n", + " inchikey = \"unknown\"\n", + " concentration = \"unknown\"\n", + " moa = \"unknown\"\n", + " for v in p['key_values']:\n", + " key = v[\"name\"]\n", + " value = v[\"value\"]\n", + " if key in ['InChIKey', 'PubChem InChIKey']:\n", + " inchikey = value\n", + " elif key in ['Dose', 'Compound Concentration (microMolar)', 'Concentration (microMolar)']:\n", + " concentration = float(value.replace(' micromolar', ''))\n", + " elif key in ['Compound MoA', 'Compound Action']:\n", + " moa = value\n", + " row = {'Compound': c,\n", + " 'Screen': screen_name,\n", + " 'Plate': plate_name,\n", + " 'Image': image_id,\n", + " 'InChIKey': inchikey,\n", + " 'Concentration (microMolar)': concentration,\n", + " 'MoA': moa}\n", + " writer.writerow(row)" ] }, { @@ -236,19 +214,10 @@ " 'InChIKey', 'Concentration (microMolar)', 'MoA']\n", " writer = csv.DictWriter(csvfile, fieldnames=fieldnames)\n", " writer.writeheader()\n", - " for compound in compounds:\n", - " qs1 = {'key': TYPE, 'value': compound}\n", - " url1 = URL.format(**qs1)\n", - " json_data = session.get(url1).json()\n", - " for m in json_data['maps']:\n", - " qs2 = {'key': TYPE, 'value': compound, 'compound_id': m['id']}\n", - " url2 = SCREENS_PROJECTS_URL.format(**qs2)\n", - " json_data = session.get(url2).json()\n", - " for s in json_data['screens']:\n", - " compound = s['extra']['value']\n", - " qs3 = {'key': TYPE, 'value': compound, 'screen_id': s['id']}\n", - " url3 = PLATES_URL.format(**qs3)\n", - " parse_annotation(writer, session.get(url3).json(), s['name'], 'plates')\n", + " for compound in compounds[:2]:\n", + " url = SEARCH_URL.format(**{'key': COMPOUND_NAME, 'value': compound})\n", + " json_data = session.get(url).json()\n", + " parse_annotation(writer, json_data['results']['results'], compound)\n", "finally:\n", " csvfile.close()" ]