diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..6f59a4a --- /dev/null +++ b/.dockerignore @@ -0,0 +1,10 @@ +Dockerfile +README.md +*.pyc +*.pyo +*.pyd +__pycache__ +.pytest_cache +*.pdf +*.dbf +*.jpg \ No newline at end of file diff --git a/.gcloudignore b/.gcloudignore new file mode 100644 index 0000000..5437b5d --- /dev/null +++ b/.gcloudignore @@ -0,0 +1,15 @@ +README.md +trees.csv +.git/ +*.pyc +*.pyo +*.pyd +__pycache__ +.pytest_cache +*.pdf +*.jpg +stiles.trees.csv +node_modules/ +build/ +tmp/ +cloud_sql_proxy diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..4754161 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,16 @@ + +# Use the official lightweight Python image. +# https://hub.docker.com/_/python +FROM python:3.8.2-slim + +# Copy local code to the container image. +ENV APP_HOME /app +WORKDIR $APP_HOME +COPY . ./ + +RUN apt-get update -y \ + && apt-get install -y build-essential curl + +RUN pip3 install --no-cache-dir -r requirements.txt + +CMD exec gunicorn --bind :$PORT --workers 1 --threads 8 --timeout 0 etl_http_listener:app \ No newline at end of file diff --git a/Makefile b/Makefile index b6e378c..35b6892 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,5 @@ # Anything that needs to be done before the other rules run +SHELL := /bin/bash setup: mkdir -p build/data @@ -10,6 +11,7 @@ release: setup | node download-images.js \ | node split-trees.js build/data + # Runs the pipeline using local data, but skips the CPU-intensive python tasks img-test: setup cat data/trees.csv \ diff --git a/cloudbuild.yaml b/cloudbuild.yaml new file mode 100644 index 0000000..5062922 --- /dev/null +++ b/cloudbuild.yaml @@ -0,0 +1,4 @@ +steps: +- name: 'gcr.io/cloud-builders/docker' + entrypoint: 'bash' + args: ['-c', 'docker build -t us-west1-docker.pkg.dev/$PROJECT_ID/cloud-run-source-deploy/etl-image:prod .'] \ No newline at end of file diff --git a/data/species_attributes.csv b/data/species_attributes.csv index 10aefd6..4879833 100644 --- a/data/species_attributes.csv +++ b/data/species_attributes.csv @@ -1,24 +1,66 @@ botanical_name,botanical_synonyms,sm_botanical_name,common_name,Species ID,family_botanical_name,family_common_name,native,EOL_ID,EOL_overview_URL,simplified_IUCN_status,IUCN_status,IUCN_DOI_or_URL,shade_production,form,type,Cal_IPC_rating,CAL_IPC_url,Irrigation_Requirements × Chitalpa tashkentensis,,Chitalpa tashkentensis,CHITALPA,262,Bignoniaceae,Bignonia,exotic,49307441,https://eol.org/pages/49307441/overview,not listed,not listed,,filtered,rounded,,,, -Acacia baileyana,,Acacia baileyana,BAILEY ACACIA,144,Fabaceae,Legume,watch,649008,http://eol.org/pages/649008/overview,not listed,not listed,,dense,spreading,evergreen,watch,https://www.cal-ipc.org/plants/risk/acacia-baileyana-risk/,"none, once established" -Acacia baileyana 'Purpurea',,Acacia baileyana 'Purpurea',Purple Acacia,1102,Fabaceae,Legume,watch,649008,http://eol.org/pages/649008/overview,not listed,not listed,,dense,spreading,evergreen,watch,https://www.cal-ipc.org/plants/risk/acacia-baileyana-risk/,"none, once established" +Abies concolor,,,White Fir,,Pinaceae,Pine,native,1033078,https://eol.org/pages/1033078,not listed,not listed,,,conical,evergreen,,, +Abies grandis,,,Grand Fir,,Pinaceae,Pine,native,1033074,https://eol.org/pages/1033074,,,,,conical,evergreen,,, +Abies magnifica,,,Red Fir,,Pinaceae,Pine,native,1061727,https://eol.org/pages/1061727,,,,,,,,, +Abies pinsapo,,,Spanish Fir,,Pinaceae,Pine,exotic,1061726,https://eol.org/pages/1061726,,,,,conical,evergreen,,, +Abies spp.,,,Fir,,Pinaceae,Pine,,13998,https://eol.org/pages/13998,,,,,,,,, +Abutilon pictum thompsonii,Abutilon striatum,,Painted Albutilon,,Malvaceae,Mallow,exotic,597500,https://eol.org/pages/597500,,,,,,,,, +Acacia baileyana,,Acacia baileyana,BAILEY ACACIA,144,Fabaceae,Legume,invasive,649008,http://eol.org/pages/649008/overview,not listed,not listed,,dense,spreading,evergreen,watch,https://www.cal-ipc.org/plants/risk/acacia-baileyana-risk/,"none, once established" +Acacia baileyana 'Purpurea',,Acacia baileyana 'Purpurea',Purple Acacia,1102,Fabaceae,Legume,invasive,649008,http://eol.org/pages/649008/overview,not listed,not listed,,dense,spreading,evergreen,watch,https://www.cal-ipc.org/plants/risk/acacia-baileyana-risk/,"none, once established" Acacia cognata,Acacia subporosa,Acacia cognata,River Wattle,274,Fabaceae,Legume,exotic,660740,http://eol.org/pages/660740/overview,not listed,not listed,,dense,"small, spreading",,,, -Acacia longifolia,,Acacia longifolia,Sydney Golden Wattle,338,Fabaceae,Legume,exotic,690308,http://eol.org/pages/690308/overview,not listed,not listed,,dense,rounded,,,, -Acacia melanoxylon,,Acacia melanoxylon,BLACK ACACIA,145,Fabaceae,Legume,limited,8684941,http://eol.org/pages/8684941/overview,not listed,not listed,,dense,rounded,evergreen,limited,https://www.cal-ipc.org/plants/paf/acacia-melanoxylon-plant-assessment-form/,"none, once established" +Acacia cultriformis,,,Knife Acacia,,Fabaceae,Legume,exotic,661694,https://eol.org/pages/661694,,,,,,evergreen,,, +Acacia cyclops,"[""acacia cyclopsis"", ""Acacia cyclopis""]",,Cyclop Acacia,,Fabaceae,Legume,exotic,661835,https://eol.org/pages/661835,,,,,,,,, +Acacia dealbata,"[""Acacia decurrens var. dealbata"",]",,Silver Wattle,,Fabaceae,Legume,invasive,684065,https://eol.org/pages/684065,,,,,,,,, +Acacia decurrens,,,Green Wattle,,Fabaceae,Legume,exotic,663357,https://eol.org/pages/663357,,,,,,,,, +Acacia floribunda,,,Gossamer Wattle,,Fabaceae,Legume,exotic,646757,https://eol.org/pages/646757,,,,,,,,, +Acacia jennerae,,,Coonavittra Wattle,,Fabaceae,Legume,exotic,655359,https://eol.org/pages/655359,,,,,,,,, +Acacia longifolia,,Acacia longifolia,Sydney Golden Wattle,338,Fabaceae,Legume,invasive,690308,http://eol.org/pages/690308/overview,not listed,not listed,,dense,rounded,,,, +Acacia longissima,,,Narrow-leaf Wattle,,Fabaceae,Legume,exotic,661222,https://eol.org/pages/661222,,,,,,,,, +Acacia melanoxylon,,Acacia melanoxylon,BLACK ACACIA,145,Fabaceae,Legume,invasive,8684941,http://eol.org/pages/8684941/overview,not listed,not listed,,dense,rounded,evergreen,limited,https://www.cal-ipc.org/plants/paf/acacia-melanoxylon-plant-assessment-form/,"none, once established" +Acacia paradoxa,"[""Acacia ornithophora"", ""Acacia undulata"", ""Mimosa paradoxa"", ""Racosperma paradoxum"", ""Acacia armata"", ""Acacia hybrida""]",,Kangaroo Acacia,,Fabaceae,Legume,invasive,648693,https://eol.org/pages/648693,,,,,,,,, +Acacia pendula,"[""Racosperma pendulum"",]",,Weeping Acacia,,Fabaceae,Legume,exotic,688908,https://eol.org/pages/688908,,,,,,,,, +Acacia podalyriifolia,,,Pearl Acacia,,Fabaceae,Legume,exotic,691015,https://eol.org/pages/691015,,,,,,,,, +Acacia pycnantha,,,Golden Wattle,,Fabaceae,Legume,exotic,689739,https://eol.org/pages/689739,,,,,,,,, +Acacia redolens,,,Bank Catclaw,,Fabaceae,Legume,exotic,660800,https://eol.org/pages/660800,,,,,,,,, +Acacia retiniodes,,,Water Wattle,,Fabaceae,Legume,exotic,703681,https://eol.org/pages/703681,,,,,,,,, +Acacia salicina,,,Willow Acacia,,Fabaceae,Legume,exotic,643418,https://eol.org/pages/643418,,,,,,,,, +Acacia saligna,"[""Acacia cyanophylla"", ""Acacia bracteata""]",,Blue-Leaf Wattle,,Fabaceae,Legume,exotic,690383,https://eol.org/pages/690383,,,,,,,,, +Vachellia farnesiana,"[""acacia smallii"", ""Acacia farnesiana"",]",,Sweeet Acacia,,Fabaceae,Legume,exotic,52202878,https://eol.org/pages/52202878,,,,,,,,, +Acacia spp.,,,Acacia,,Fabaceae,Legume,exotic,39940157,https://eol.org/pages/39940157,,,,,,,,, Acacia stenophylla,Acacia stenophylla var. linearis,Acacia stenophylla,Shoestring Acacia,415,Fabaceae,Legume,exotic,643396,http://eol.org/pages/643396/overview,not listed,not listed,,filtered,"weeping, pendulous",,,, Acca sellowiana,Feijoa sellowiana,Acca sellowiana,Pinapple Guava,207,Myrtaceae,Myrtle,exotic,2508674,http://eol.org/pages/2508674/overview,not listed,not listed,,filtered,rounded,,,, +Acer buergerianum,"[""Acer buergeranum"",]",,Trident Maple,,Sapindaceae,Soapberry,exotic,2888944,https://eol.org/pages/2888944,,,,,rounded,deciduous,,, +Acer circinatum,,,Vine Maple,,Sapindaceae,Soapberry,native,582251,https://eol.org/pages/582251,,,,,,,,, +Acer japonicum,,,Full Moon Maple,,Sapindaceae,Soapberry,exotic,2888970,https://eol.org/pages/2888970,,,,,,,,, +Acer macrophyllum,,,Big Leaf Maple,,Sapindaceae,Soapberry,native,582252,https://eol.org/pages/582252,,,,,,,,, +Acer negundo,,,Box Elder,,Sapindaceae,Soapberry,native,583069,https://eol.org/pages/583069,,,,,,,,, +Acer oblongum,,,Evergreen Maple,,Sapindaceae,Soapberry,exotic,2888990,https://eol.org/pages/2888990,,,,,,,,, Acer palmatum,,Acer palmatum,JAPANESE MAPLE,188,Sapindaceae,Soapberry,exotic,596824,http://eol.org/pages/596824/overview,Least Concern,Least Concern,http://dx.doi.org/10.2305/IUCN.UK.2017-3.RLTS.T193845A2285627.en,dense,"spreading, vase",,,,moderate +Acer palmatum 'bloodgood',,,Bloodgood Japanese Maple,,Sapindaceae,Soapberry,exotic,596824,http://eol.org/pages/596824/overview,,,,,,,,, +Acer palmatum 'green',,,Green Japanese Maple,,Sapindaceae,Soapberry,exotic,596824,http://eol.org/pages/596824/overview,,,,,,,,, +Acer palmatum 'red',,,Red Japanese Maple,,Sapindaceae,Soapberry,exotic,596824,http://eol.org/pages/596824/overview,,,,,,,,, Acer paxii,,Acer paxii,Evergreen Maple,137,Sapindaceae,Soapberry,exotic,2888996,http://eol.org/pages/2888996/overview,not listed,not listed,,dense,rounded,,,, Acer rubrum,Acer rubrum var. drummondii,Acer rubrum,Red Maple,104,Sapindaceae,Soapberry,exotic,582246,http://eol.org/pages/582246/overview,Least Concern,Learn Concern,http://dx.doi.org/10.2305/IUCN.UK.2017-3.RLTS.T193860A2287111.en,filtered,"Conical, rounded, Spreading",,,, +Acer platanoides,,,Norway Maple,,Sapindaceae,Soapberry,exotic,583070,https://eol.org/pages/583070,,,,,,,,, +Acer pseudoplatanus,,,Sycamore Maple,,Sapindaceae,Soapberry,exotic,583073,https://eol.org/pages/583073,,,,,,,,, Acer saccharinum,,Acer saccharinum,SILVER MAPLE,115,Sapindaceae,Soapberry,exotic,583072,http://eol.org/pages/583072/overview,Least Concern,Learn Concern,http://dx.doi.org/10.2305/IUCN.UK.2017-3.RLTS.T193862A2287256.en,dense,"spreading, vase",,,, +Acer saccharum,,,Sugar Maple,,Sapindaceae,Soapberry,exotic,582247,https://eol.org/pages/582247,,,,,,,,, Acer spp.,,Acer spp.,Maple,72,Sapindaceae,Soapberry,exotic,47125858,https://eol.org/pages/47125858/overview,not listed,not listed,,dense,rounded,,,, +Acer x freemanii,Acer rubrum x saccharinum,,Freeman Maple,,Sapindaceae,Soapberry,exotic,49123875,https://eol.org/pages/49123875,,,,,,,,, +Acer × freemanii 'autumn fantasy',,,Autumn Fantasy Maple,,Sapindaceae,Soapberry,exotic,49123875,https://eol.org/pages/49123875,,,,,,,,, +Acoelorrhaphe wrightii,,,Paurotis palm,,Arecaceae,Palm,exotic,1127723,https://eol.org/pages/1127723,,,,,,,,, +Acrocarpus fraxinifolius,,,Pink Cedar,,Fabaceae,Legume,exotic,695211,https://eol.org/pages/695211,,,,,,deciduous,,, +Aesculus californica,Hippocastanatum californica,,California Buckeye,,Sapindaceae,Soapberry,native,581628,https://eol.org/pages/581628,,,,,,,,, +Aesculus × carnea,Aesculus carnea,,Red Horse Chestnut,,Sapindaceae,Soapberry,exotic,52558635,https://eol.org/pages/52558635,,,,,,,,, +Aesculus hippocastanum,,,Horse Chestnut,,Sapindaceae,Soapberry,exotic,582243,https://eol.org/pages/582243,,,,,,,,, Afrocarpus elongatus 'Icee Blue',Podocarpus elongatus,Afrocarpus elongatus 'Icee Blue',Icee Blue Yellow-Wood,2034,Podocarpaceae,Podocarp,exotic,323467,https://eol.org/pages/323467,not listed,not listed,,dense,rounded,evergeen,,, Afrocarpus falcatus,Afrocarpus gracilior;Podocarpus gracilior,Afrocarpus falcatus,African fern pine,2086,Podocarpaceae,Podocarp,exotic,1033604,https://eol.org/pages/1033604,Least Concern,Least Concern,http://dx.doi.org/10.2305/IUCN.UK.2013-1.RLTS.T42438A2980290.en,dense,rounded,evergreen,,, Afrocarpus falcatus,,Afrocarpus gracilior,FERN PINE,46,Podocarpaceae,Podocarp,exotic,1033605,http://eol.org/pages/1033605/overview,Least Concern,Least Concern,http://dx.doi.org/10.2305/IUCN.UK.2013-1.RLTS.T42439A2980350.en,dense,rounded,evergeen,,, Afrocarpus macrophyllus,,Afrocarpus macrophyllus,YEW PINE,134,Podocarpaceae,Podocarp,exotic,1059922,http://eol.org/pages/1059922/overview,not listed,not listed,,dense,rounded,evergeen,,, Agathis robusta,,Agathis robusta,Queensland Kauri,269,Araucariaceae,Araucaria,exotic,1033628,http://eol.org/pages/1033628/overview,Least Concern,Least Concern,http://dx.doi.org/10.2305/IUCN.UK.2013-1.RLTS.T16437966A2960124.en,dense,rounded,,,, Agonis flexuosa,,Agonis flexuosa,PEPPERMINT TREE,307,Myrtaceae,Myrtle,exotic,5448625,http://eol.org/pages/5448625/overview,not listed,not listed,,filtered,pendulous,,,,minimal -Ailanthus altissima,,Ailanthus altissima,TREE OF HEAVEN,233,Simaroubaceae,Quassia,moderate,5614169,http://eol.org/pages/5614169/overview,not listed,not listed,,dense,"spreading, vase",,moderate,https://www.cal-ipc.org/plants/paf/ailanthus-altissima-plant-assessment-form/, +Ailanthus altissima,,Ailanthus altissima,TREE OF HEAVEN,233,Simaroubaceae,Quassia,invasive,5614169,http://eol.org/pages/5614169/overview,not listed,not listed,,dense,"spreading, vase",,moderate,https://www.cal-ipc.org/plants/paf/ailanthus-altissima-plant-assessment-form/, Albizia julibrissin,,Albizia julibrissin,SILK TREE,76,Fabaceae,Legume,exotic,640054,http://eol.org/pages/640054/overview,not listed,not listed,,filtered,rounded,deciduous,,,minimal Allocasuarina verticillata,Casuarina excelsa;Casuarina stricta,Allocasuarina verticillata,Drooping She-Oak,1881,Casuarinaceae,Beefwood,exotic,628407,http://eol.org/pages/628407/overview,not listed,not listed,,filtered,"vase, pendulous",,,, Alnus cordata,,Alnus cordata,ITALIAN ALDER,187,Betulaceae,Birch,exotic,1145955,http://eol.org/pages/1145955/overview,Least Concern,Least Concern,http://dx.doi.org/10.2305/IUCN.UK.2017-3.RLTS.T194657A117268007.en,dense,"Conical, Spreading",,,,moderate @@ -29,7 +71,7 @@ Araucaria columnaris,Araucaria cookii,Araucaria columnaris,STAR PINE,306,Araucar Araucaria heterophylla,Araucaria excelsa,Araucaria heterophylla,NORFOLK ISLAND PINE,84,Araucariaceae,Araucaria,exotic,1033727,http://eol.org/pages/1033727/overview,Vulnerable,Vulnerable D2,http://dx.doi.org/10.2305/IUCN.UK.2011-2.RLTS.T30497A9548582.en,dense,"Conical, Spreading",evergreen,,,moderate Arbutus 'Marina',,Arbutus 'Marina',MARINA ARBUTUS,486,Ericaceae,Heather,exotic,71122,http://eol.org/pages/71122/overview,not listed,not listed,,dense,rounded,evergreen,,, Arbutus unedo,,Arbutus unedo,STRAWBERRY TREE,315,Ericaceae,Heather,exotic,583608,http://eol.org/pages/583608/overview,Least Concern,Least Concern,http://dx.doi.org/10.2305/IUCN.UK.2017-3.RLTS.T202930A68076133.en,dense,rounded,evergreen,,,minimal -Archontophoenix cunninghamiana,,Archontophoenix cunninghamiana,KING PALM,63,Arecaceae,Palm,exotic,1136266,http://eol.org/pages/1136266/overview,not listed,not listed,,"little, none",palm,evergreen,,, +Archontophoenix cunninghamiana,"[""archontophoenix cunningham"", ""archontophoenix cunningham""]",Archontophoenix cunninghamiana,KING PALM,63,Arecaceae,Palm,exotic,1136266,http://eol.org/pages/1136266/overview,not listed,not listed,,"little, none",palm,evergreen,,, Asphalted well,,Asphalted well,Asphalted well,638,,,,,,not listed,not listed,,,,,,, Auranticarpa rhombifolium,Pittosporum rhombifolium,Auranticarpa rhombifolium,QUEENSLAND PITTOSPORUM,100,Pittosporaceae,Cheesewood,exotic,5556305,http://eol.org/pages/5556305/overview,not listed,not listed,,dense,rounded,,,, Bauhinia × blakeana,Bauhinia purpurea × Bauhinia variegata,Bauhinia blakeana,HONG KONG ORCHID TREE,250,Fabaceae,Legume,exotic,641500,http://eol.org/pages/641500/overview,not listed,not listed,,dense,"pendulous, spreading",,,,minimal @@ -135,8 +177,9 @@ Gleditsia triacanthos,,Gleditsia triacanthos,HONEY LOCUST,55,Fabaceae,Legume,exo Grevillea robusta,,Grevillea robusta,SILK OAK,112,Proteaceae,Protea,watch,582736,http://eol.org/pages/582736/overview,not listed,not listed,,filtered,spreading,evergreen,watch,https://www.cal-ipc.org/plants/risk/grevillea-robusta-risk/,minimal Hakea drupacea,Banksia heterophylla,Hakea suaveolens,SWEET HAKEA,270,Proteaceae,Protea,exotic,5511103,http://eol.org/pages/5511103/overview,not listed,not listed,,filtered,rounded,,,, Handroanthus chrysotrichus,Tabebuia chrysotricha,Handroanthus chrysotrichus,Golden Trumpet Tree,179,Bignoniaceae,Bignonia,exotic,5637482,http://eol.org/pages/5637482/overview,not listed,not listed,,dense,rounded,,,, -Handroanthus heptaphyllus,Tabebuia impetiginosa ,Handroanthus impetiginosus,Pink Trumpet Tree,403,Bignoniaceae,Bignonia,exotic,5637444,http://eol.org/pages/5637444,not listed,not listed,,"filtered, dense","rounded, vase",deciduous,,, +Handroanthus heptaphyllus,Tabebuia impetiginosa,Handroanthus impetiginosus,Pink Trumpet Tree,403,Bignoniaceae,Bignonia,exotic,5637444,http://eol.org/pages/5637444,not listed,not listed,,"filtered, dense","rounded, vase",deciduous,,, Harpephyllum caffrum,,Harpephyllum caffrum,KAFFIR PLUM,192,Anacardiaceae,Sumac,exotic,6935240,http://eol.org/pages/6935240/overview,not listed,not listed,,dense,rounded,,,,minimal +Hesperocyparis arizonica,"[""cupressus arizonica"", ""Cupressus arizonica var. arizonica""]",,Arizona Cypress,,Cupressaceae,Cypress,exotic,,,,,,,,,,, Hesperocyparis arizonica var. glabra,Cupressus arizonica var. glabra,Cupressus glabra,Smoothbark Arizona Cypress,142,Cupressaceae,Cypress,exotic,49307043,https://eol.org/pages/49307043,Near Threatened,Near Threatened,http://dx.doi.org/10.2305/IUCN.UK.2013-1.RLTS.T19708408A19708411.en,dense,Columnar,,,,minimal Hesperocyparis macrocarpa,Cupressus macrocarpa;Cupressus lambertiana,Hesperocyparis macrocarpa,MONTEREY CYPRESS,332,Cupressaceae,Cypress,native,1034856,http://eol.org/pages/1034856/overview,Vulnerable,Vulnerable D2,http://www.iucnredlist.org/details/30375/0,filtered,conical,,,, Heteromeles arbutifolia,,Heteromeles arbutifolia,TOYON,232,Rosaceae,Rose,native,47383069,https://eol.org/pages/47383069,not listed,not listed,,dense,rounded,evergreen,,, @@ -224,6 +267,7 @@ Pittosporum crassifolium,,Pittosporum crassifolium,KARO,322,Pittosporaceae,Chees Pittosporum tobira,,Pittosporum tobira,MOCK ORANGE,198,Pittosporaceae,Cheesewood,exotic,583390,http://eol.org/pages/583390/overview,not listed,not listed,,dense,rounded,evergreen,,,moderate Pittosporum undulatum,,Pittosporum undulatum,VICTORIAN BOX,125,Pittosporaceae,Cheesewood,watch,583391,http://eol.org/pages/583391/overview,not listed,not listed,,dense,rounded,evergreen,watch,https://www.cal-ipc.org/plants/risk/pittosporum-undulatum-risk/,moderate Pittosporum viridiflorum,,Pittosporum viridiflorum,CAPE PITTOSPORUM,345,Pittosporaceae,Cheesewood,exotic,47136685,https://eol.org/pages/47136685/overview,not listed,not listed,,dense,rounded,evergreen,,,moderate +Planting site,"[""Vacant site"", ""planting site large"", ""planting site medium"", ""planting site small""]",Vacant site,VACANT SITE,238,,,,,,not listed,not listed,,,,,,, Platanus × hispanica,Platanus × acerifolia;Platanus occidentalis × Platanus orientalis;,Platanus X hispanica,London Plane,70,Platanaceae,Plane-tree,exotic,49950980,https://eol.org/pages/49950980/overview,not listed,not listed,,dense,"vase, spreading",,,, Platanus × hispanica 'Bloodgood',Platanus × acerifolia 'Bloodgood',Platanus X hispanica 'Bloodgood',Bloodgood Plane,1079,Platanaceae,Plane-tree,exotic,49950980,https://eol.org/pages/49950980/overview,not listed,not listed,,filtered,rounded,deciduous,,, Platanus × hispanica 'Yarwood',Platanus × acerifolia 'Yarwood',Platanus X hispanica 'Yarwood',Yarwood Plane,933,Platanaceae,Plane-tree,exotic,49950980,https://eol.org/pages/49950980/overview,not listed,not listed,,filtered,rounded,deciduous,,, @@ -231,6 +275,7 @@ Platanus mexicana,,Platanus mexicana,Mexican Sycamore,440,Platanaceae,Plane-tree Platanus racemosa,,Platanus racemosa,CALIFORNIA SYCAMORE,23,Platanaceae,Plane-tree,native,594707,http://eol.org/pages/594707/overview,not listed,not listed,,filtered,spreading,evergreen,,, Platanus x hispanica 'Columbia',Platanus × acerifolia 'Columbia',Platanus X hispanica 'Columbia',Columbia Plane,1360,Platanaceae,Plane-tree,exotic,49950980,https://eol.org/pages/49950980/overview,not listed,not listed,,dense,"rounded, spreading",deciduous,,, Platycladus orientalis,,Platycladus orientalis,ORIENTAL ARBORVITAE,200,Cupressaceae,Cypress,exotic,323359,http://eol.org/pages/323359/overview,Near Threatened,Near Threatened,http://dx.doi.org/10.2305/IUCN.UK.2013-1.RLTS.T31305A2803944.en,dense,rounded,evergreen,,,moderate +Podocarpus henkelii,Afrocarpus henkelii,,Long Leafed Yellowwood,,Podocarpaceae,Podocarp,exotic,1033708,https://eol.org/pages/1033708,Endangered,,,,,,,, Prosopis glandulosa,,Prosopis glandulosa,Mesquite,905,Fabaceae,Legume,native,416627,http://eol.org/pages/416627/overview,not listed,not listed,,little,,deciduous,,,"none, once established" Prunus × blireiana,,Prunus blireiana,FLOWERING PLUM,47,Rosaceae,Rose,exotic,39934521,https://eol.org/pages/39934521,not listed,not listed,,filtered,,,,,moderate Prunus armeniaca,,Prunus armeniaca,APRICOT,6,Rosaceae,Rose,exotic,301091,http://eol.org/pages/301091/overview,Endangered,Endangered B2ab(iii),http://dx.doi.org/10.2305/IUCN.UK.2007.RLTS.T63405A12666025.en,filtered,"Rounded, Spreading, Vase",deciduous,,,moderate @@ -271,7 +316,7 @@ Spathodea campanulata,,Spathodea campanulata,African Tulip Tree,383,Bignoniaceae Sphaeropteris cooperi,Alsophila cooperi,Cyathea cooperi,Australian Tree Fern,739,Cyatheaceae,Tree fern,exotic,483203,http://eol.org/pages/483203/overview,not listed,not listed,,filtered,fern,,,, Stenocarpus sinuatus,,Stenocarpus sinuatus,FIREWHEEL TREE,171,Proteaceae,Protea,exotic,582737,http://eol.org/pages/582737/overview,not listed,not listed,,filtered,,,,,moderate Strelitzia nicolai,,Strelitzia nicolai,GIANT BIRD OF PARADISE,321,Strelitziaceae,Strelitzia,exotic,345179,http://eol.org/pages/345179/overview,not listed,not listed,,"little, none",palm,evergreen,,,moderate -Stump,,Stump,STUMP,225,,,,,,not listed,not listed,,,,,,, +Stump,"[""Dead tree""]",Stump,STUMP,225,,,,,,not listed,not listed,,,,,,, Stump - not accessible,,Stump - not accessible,STUMP - NOT ACCESSIBLE,1434,,,,,,not listed,not listed,,,,,,, Syagrus romanzoffiana,Arecastrum romanzoffianum,Syagrus romanzoffianum,QUEEN PALM,99,Arecaceae,Palm,exotic,1129524,http://eol.org/pages/1129524/overview,not listed,not listed,,"little, none",palm,evergreen,,,moderate Syzygium australe,Eugenia australis;Eugenia myrtifolia,Syzygium paniculatum,BRUSH CHERRY,17,Myrtaceae,Myrtle,exotic,2508667,http://eol.org/pages/2508667/overview,not listed,not listed,,dense,rounded,evergreen,,,moderate @@ -292,8 +337,7 @@ Ulmus parvifolia 'Drake',,Ulmus parvifolia 'Drake',DRAKE ELM,462,Ulmaceae,Elm,ex Ulmus pumila,,Ulmus pumila,SIBERIAN ELM,111,Ulmaceae,Elm,exotic,594950,http://www.eol.org/pages/594950/overview,Least Concern,Least Concern,http://dx.doi.org/10.2305/IUCN.UK.2018-1.RLTS.T61967372A61967374.en,filtered,"spreading, rounded",deciduous,,,moderate Umbellularia californica,,Umbellularia californica,California Bay,20,Lauraceae,Laurel,native,596841,http://eol.org/pages/596841/overview,not listed,not listed,,filtered,rounded,evergreen,,,"none, once established" Unidentified spp.,,Unidentified spp.,Unidentified Tree,1506,,,,,,not listed,not listed,,,,,,, -Unsuitable site,,Unsuitable site,UNSUITABLE SITE,237,,,,,,not listed,not listed,,,,,,, -Vacant site,,Vacant site,VACANT SITE,238,,,,,,not listed,not listed,,,,,,, +Unsuitable site,"[""poor planting site""]",Unsuitable site,UNSUITABLE SITE,237,,,,,,not listed,not listed,,,,,,, Viburnum spp.,,Viburnum spp.,Viburnum,571,Adoxaceae,Adoxas,exotic,490016,https://eol.org/pages/490016/overview,not listed,not listed,,dense,,,,,moderate Washingtonia filifera,,Washingtonia filifera,CALIFORNIA FAN PALM,21,Arecaceae,Palm,native,1127834,http://www.eol.org/pages/1127834/overview,Lower Risk/near threatened,Lower Risk/near threatened,http://dx.doi.org/10.2305/IUCN.UK.1998.RLTS.T38725A10145920.en,little,,,,,"none, once established" Washingtonia filifera x robusta,,Washingtonia filifera X robusta,Filibuster Hybrid Fan Palm,557,Arecaceae,Palm,exotic,,,not listed,not listed,,"little, none",palm,evergreen,,,"none, once established" @@ -302,5 +346,8 @@ Wisteria sinensis,,Wisteria sinensis,CHINESE WISTERIA,662,Fabaceae,Legume,exotic Wodyetia bifurcata,,Wodyetia bifurcata,FOXTAIL PALM,604,Arecaceae,Palm,exotic,1127809,http://eol.org/pages/1127809/overview,Lower Risk/conservation dependent,Lower Risk/conservation dependent,http://dx.doi.org/10.2305/IUCN.UK.1998.RLTS.T38733A10146773.en,"little, none",palm,evergreen,,,moderate Yucca elephantipes,Yucca gigantea,Yucca elephantipes,Giant Yucca,224,Asparagaceae,Asparagus,exotic,1083612,https://eol.org/pages/1083612/overview,not listed,not listed,,filtered,palm,evergreen,,,"none, once established" Yucca gloriosa,,Yucca gloriosa,SPANISH DAGGER,223,Asparagaceae,Asparagus,exotic,1083624,http://www.eol.org/pages/1083624/overview,not listed,not listed,,little/none,palm,evergreen,,,"none, once established" -Yucca spp.,,Yucca spp.,Yucca Species,135,Asparagaceae,Asparagus,exotic,23768875,https://eol.org/pages/23768875,not listed,not listed,,"little, none",palm,evergreen,,,"none, once established" +Yucca spp.,,Yucca spp.,Yucca,135,Asparagaceae,Asparagus,exotic,23768875,https://eol.org/pages/23768875,not listed,not listed,,"little, none",palm,evergreen,,,"none, once established" +Zamia spp.,,,Zamia,,Zamiaceae,Cycad,exotic,39967576,https://eol.org/pages/39967576,,,,,,,,, Zelkova serrata,,Zelkova serrata,Sawtooth Zelkova,216,Ulmaceae,Elm,exotic,484119,http://eol.org/pages/484119/overview,not listed,not listed,,filtered,"spreading, vase",deciduous,,, +Zelkova serrata 'village green',,,Village Green Japanese Zelkova,,Ulmaceae,Elm,exotic,484119,http://eol.org/pages/484119/overview,not listed,not listed,,filtered,"spreading, vase",deciduous,,, +Ziziphus jujuba,"[""zizyphus jujuba""]",,Chinese jujube,,Rhamnaceae,Buckthorn,exotic,582338,https://eol.org/pages/582338,,,,,,,,, \ No newline at end of file diff --git a/data/stiles_data/parse_la_data.py b/data/stiles_data/parse_la_data.py deleted file mode 100644 index 58d997b..0000000 --- a/data/stiles_data/parse_la_data.py +++ /dev/null @@ -1,404 +0,0 @@ -import argparse -from pathlib import Path - -import geopandas as gpd - - -class CityParser(object): - - def __init__(self, path: Path): - geo_jsons = [p for p in path.iterdir() if p.is_file() and p.suffix == '.geojson'] - self.city = path.parts[-1] - assert len(geo_jsons) <= 1 - if len(geo_jsons) > 0: - self.geo_json_path = geo_jsons[-1] - else: - self.geo_json_path = None - - def get_maximal_df(self): - assert self.geo_json_path - df = gpd.read_file(str(self.geo_json_path.absolute())).assign(city=self.city) - return self.lat_lon_from_geometry(df) - - @staticmethod - def lat_lon_from_geometry(df, y_is_lat=True): - if y_is_lat: - return df.assign( - latitude=df['geometry'].apply(lambda p: p.y), - longitude=df['geometry'].apply(lambda p: p.x) - ) - return df.assign( - latitude=df['geometry'].apply(lambda p: p.x), - longitude=df['geometry'].apply(lambda p: p.y) - ) - - @staticmethod - def cat_parser(df, min_field, max_field, og_field, cats): - actual_cats = [cat for cat in df[og_field].unique().tolist() if set(cat.strip()) != {'-'}] - if len(actual_cats) > len(cats): - raise RuntimeError(f'{len(cats)} categories but categories in df={df[og_field].unique().tolist()}') - df[min_field] = -1 - df[max_field] = -1 - for cat in cats: - mask = df[og_field].str.strip() == cat - if len(cat.split('-')) == 2: - min_val, max_val = cat.split('-') - df.loc[mask, min_field] = int(min_val) - df.loc[mask, max_field] = int(max_val) - elif cat.endswith('+'): - min_val = int(cat[:-1]) - df.loc[mask, min_field] = int(min_val) - elif cat.startswith('>'): - min_val = int(cat[1:]) - df.loc[mask, min_field] = int(min_val) - - return df - - -class LosAngelesCityParser(CityParser): - def __init__(self, path: Path): - super().__init__(path) - - def get_maximal_df(self): - df = super().get_maximal_df() - df = df.assign(name_common=df['species'].str.title()) - df = self.lat_lon_from_geometry(df) - return df[['name_common', 'latitude', 'longitude', 'city']] - - -class LosAngelesCountyParser(CityParser): - - def __init__(self, path: Path): - super().__init__(path) - - def get_maximal_df(self): - df = super().get_maximal_df() - df = df.assign( - name_common=df['SPECIES'].str.title(), - diameter_min_in=df['DIAMETER'] - ) - return df[['name_common', 'latitude', 'longitude', 'diameter_min_in', 'city']] - - -class AgouraHillsParser(CityParser): - - def __init__(self, path: Path): - super().__init__(path) - - def get_maximal_df(self): - df = super().get_maximal_df() - df = df.assign( - tree_id=df['InventoryID'], - name_common=df['species'].str.title(), - name_botanical=df['botanical'].str.title(), - address=df['Address'].astype(str).str.cat(df['Street'].str.title(), sep=' ') - ) - df = self.cat_parser( - df, - 'diameter_min_in', - 'diameter_max_in', - 'DBH', - ['0-6', '07-12', '13-18', '19-24', '25-30', '31+'] - ) - df = self.cat_parser( - df, - 'height_min_feet', - 'height_max_feet', - 'height', - ['01-15', '15-30', '30-45', '45-60', '60+'] - ) - - return df[ - [ - 'tree_id', - 'name_common', - 'name_botanical', - 'address', - 'height_min_feet', - 'city', - 'height_max_feet', - 'latitude', - 'longitude', - 'diameter_min_in', - 'diameter_max_in', - ] - ] - - -class AlhambraParser(CityParser): - - def __init__(self, path: Path): - super().__init__(path) - - def get_maximal_df(self): - df = super().get_maximal_df() - df = df.assign( - name_common=df['species'].str.title(), - name_botanical=df['BotanicalN'].str.title(), - tree_id=df['Tree'], - address=df['Address'].astype(str).str.cat(df['Street'].str.title(), sep=' ') - ) - df = self.cat_parser( - df, - 'diameter_min_in', - 'diameter_max_in', - 'DBH', - ['0-6', '07-12', '13-18', '19-24', '25-30', '31+'] - ) - df = self.cat_parser( - df, - 'height_min_feet', - 'height_max_feet', - 'height', - ['01-15', '15-30', '30-45', '45-60', '60+'] - ) - return df[ - [ - 'name_common', - 'name_botanical', - 'city', - 'tree_id', - 'address', - 'height_min_feet', - 'height_max_feet', - 'latitude', - 'longitude', - 'diameter_min_in', - 'diameter_max_in', - - ] - ] - - -class ArcadiaParser(CityParser): - - def __init__(self, path: Path): - super().__init__(path) - - def get_maximal_df(self): - df = super().get_maximal_df() - df = df.assign( - tree_id=df['TREE_ID'], - name_common=df['COM_NAME'].str.title(), - address=df['ADDR'].str.split('ARCADIA').str[0].str.title() - ) - return df[ - [ - 'name_common', - 'city', - 'tree_id', - 'address', - 'latitude', - 'longitude', - - ] - ] - - -class BellflowerParser(CityParser): - - def __init__(self, path: Path): - super().__init__(path) - - def get_maximal_df(self): - df = super().get_maximal_df() - df = df.assign( - estimated_value=df['EstValue'], - name_common=df['species'].str.title(), - name_botanical=df['botanical'].str.title(), - address=df['Address'].astype(str).str.cat(df['Street'].str.title(), sep=' '), - tree_id=df['InventoryID'], - ) - df = self.cat_parser( - df, - 'diameter_min_in', - 'diameter_max_in', - 'DBH', - ['0-6', '07-12', '13-18', '19-24', '25-30', '31+', '---'] - ) - df = self.cat_parser( - df, - 'height_min_feet', - 'height_max_feet', - 'height', - ['01-15', '15-30', '30-45', '45-60', '60+', '---'] - ) - - return df[ - [ - 'name_common', - 'name_botanical', - 'city', - 'tree_id', - 'address', - 'estimated_value', - 'height_min_feet', - 'height_max_feet', - 'latitude', - 'longitude', - 'diameter_min_in', - 'diameter_max_in', - ] - ] - - -class BellGardensParser(CityParser): - - def __init__(self, path: Path): - super().__init__(path) - - def get_maximal_df(self): - df = super().get_maximal_df() - df = df.assign( - name_common=df['species'].str.title(), - name_botanical=df['BOTANICALN'].str.title(), - tree_id=df['INVENTORYI'], - address=df['ADDRESS'].astype(str).str.cat(df['STREET'].str.title(), sep=' ') - ) - df = self.cat_parser( - df, - 'diameter_min_in', - 'diameter_max_in', - 'DBH', - ['0-6', '07-12', '13-18', '19-24', '25-30', '31+'] - ) - df = self.cat_parser( - df, - 'height_min_feet', - 'height_max_feet', - 'height', - ['01-15', '15-30', '30-45', '45-60', '60+'] - ) - - return df[ - [ - 'name_common', - 'name_botanical', - 'city', - 'tree_id', - 'address', - 'height_min_feet', - 'height_max_feet', - 'latitude', - 'longitude', - 'diameter_min_in', - 'diameter_max_in', - ] - ] - - -class ArtesiaParser(CityParser): - - def __init__(self, path: Path): - super().__init__(path) - - def get_maximal_df(self): - df = super().get_maximal_df() - df = df.assign( - tree_id=df['INVENTORYI'], - address=df['ADDRESS'].astype(str).str.cat(df['STREET'].str.title(), sep=' '), - name_botanical=df['BOTANICALN'].str.title(), - name_common=df['species'].str.title(), - ) - df = self.cat_parser( - df, - 'diameter_min_in', - 'diameter_max_in', - 'DBH', - ['0-6', '07-12', '13-18', '19-24', '25-30', '31+'] - ) - df = self.cat_parser( - df, - 'height_min_feet', - 'height_max_feet', - 'height', - ['01-15', '15-30', '30-45', '45-60', '60+'] - ) - return df[ - [ - 'name_common', - 'name_botanical', - 'city', - 'tree_id', - 'address', - 'height_min_feet', - 'height_max_feet', - 'latitude', - 'longitude', - 'diameter_min_in', - 'diameter_max_in', - - ] - ] - - -class BeverlyHillsParser(CityParser): - - def __init__(self, path: Path): - super().__init__(path) - - def get_maximal_df(self): - df = super().get_maximal_df() - df = df.assign( - tree_id=df['TREEID'], - address=df['ADDRESS'].astype(str).str.cat(df['STREET'].str.title(), sep=' '), - name_botanical=df['BOTANICAL'].str.title(), - name_common=df['species'].str.title(), - ) - df = self.cat_parser( - df, - 'height_min_feet', - 'height_max_feet', - 'HEIGHT_RAN', - ['1-15', '16-30', '31-45', '46-60', '>60', '------', ''] - ) - return df[ - [ - 'name_common', - 'name_botanical', - 'city', - 'tree_id', - 'address', - 'height_min_feet', - 'height_max_feet', - 'latitude', - 'longitude', - ] - ] - - -class StilesDataParser(object): - - mapper = { - 'los-angeles-city': LosAngelesCityParser, - 'los-angeles-county': LosAngelesCountyParser, - 'agoura-hills': AgouraHillsParser, - 'alhambra' : AlhambraParser, - 'arcadia': ArcadiaParser, - 'artesia': ArtesiaParser, - 'bell-gardens': BellGardensParser, - 'bellflower': BellflowerParser, - 'beverly-hills': BeverlyHillsParser - } - - def __init__(self, data_path): - root_path = Path(data_path) - self.data_dirs = ([x for x in root_path.iterdir() if x.is_dir()]) - - def parse_all(self): - for data_dir in self.data_dirs: - city = data_dir.parts[-1] - if city != 'all': - if city in self.mapper: - city_parser = self.mapper[city](data_dir) - if city_parser.geo_json_path: - city_parser.get_maximal_df() - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--datapath", required=True, type=str) - args = parser.parse_args() - - data_parser = StilesDataParser(args.datapath) - data_parser.parse_all() diff --git a/etl_http_listener.py b/etl_http_listener.py new file mode 100644 index 0000000..61db80e --- /dev/null +++ b/etl_http_listener.py @@ -0,0 +1,39 @@ +import os + +import pandas as pd +from flask import Flask +from sm_parser import parse_trees +import upload_trees +import parse_la_data +import download_images + +app = Flask(__name__) + + +def download_tree_images(): + img_download = download_images.ImageDownloader() + trees_df, hashed_urls = img_download.get_trees_without_images() + for idx, row in enumerate(trees_df.itertuples()): + img_download.get_tree_images(row.id, row.eol_id, hashed_urls) + + +@app.route("/") +def upload_sm_trees(): + df = pd.read_csv('https://data.smgov.net/resource/w8ue-6cnd.csv?$limit=50000') + df = parse_trees(df=df, stdout=False) + matcher = parse_la_data.SpeciesMatcher(df) + matched_df = matcher.match(how='left').drop('species_id', axis=1) + uploader = upload_trees.SMTreeUploader() + species_mapper = uploader.get_species_ids_mapper() + matched_df['species_id'] = matched_df['botanical_name'].map(species_mapper) + uploader.truncate_sm_trees() + uploader.prepare_df(matched_df) + uploader.upload_trees(uploader.df, batch_size=0) + + download_tree_images() + + return 'SUCCESS' + + +if __name__ == "__main__": + app.run(debug=False, host="0.0.0.0", port=int(os.environ.get("PORT", 8080))) \ No newline at end of file diff --git a/parse_la_data.py b/parse_la_data.py new file mode 100644 index 0000000..1dcb4b5 --- /dev/null +++ b/parse_la_data.py @@ -0,0 +1,431 @@ +import argparse +import getpass +import json +from pathlib import Path +from typing import List, Set + +import pandas as pd +import geopandas as gpd +from shapely.geometry import Point + +from upload_trees import DBTreeUploader + + +class CityParser(object): + + # these are case insensitive + name_common_columns = ['name_common', 'species', 'com_name', 'trees_spec'] + name_botanical_columns = ['name_botanical', 'botanical', 'botanicaln', 'botanicalna', 'trees_bota'] + condition = ['condition', 'treecondition', 'treeconditi'] + address_columns = ['address'] + diameter_min_in_columns = ['diameter_min_in'] + diameter_max_in_columns = ['diameter_max_in'] + exact_diameter_columns = ['exact_diameter', 'diameter', 'exact_dbh', 'trunk_diam', 'actualdbh'] + height_min_feet_columns = ['height_min_feet'] + height_max_feet_columns = ['height_max_feet'] + exact_height_columns = ['exact_height', 'exact_heigh', 'height', 'actualheight'] + tree_id_columns = ['tree_id', 'inventoryid', 'tree', 'inventoryi', 'treeid', 'objectid', 'trees_ogc_'] + est_value_columns = ['estimated_value', 'est_value', 'estvalue'] + + height_tuples = [ + ('height', 'height_min_feet', 'height_max_feet'), + ('HEIGHT_RAN', 'height_min_feet', 'height_max_feet'), + ] + diameter_tuples = [ + ('diameter', 'diameter_min_in', 'diameter_max_in'), + ('DBH', 'diameter_min_in', 'diameter_max_in'), + ] + + def __init__(self, city, path: Path, geojson_path: Path=None): + self.city = city + geo_jsons = [] + csvs = [] + if path: + geo_jsons = [p for p in path.iterdir() if p.is_file() and p.suffix == '.geojson'] + csvs = [p for p in path.iterdir() if p.is_file() and p.suffix == '.csv'] + assert len(geo_jsons) <= 1 + assert len(csvs) <= 1 + if len(csvs) > 0: + self.csv_path = csvs[-1] + else: + self.csv_path = None + if len(geo_jsons) > 0: + self.geo_json_path = geo_jsons[-1] + elif geojson_path: + self.geo_json_path = geojson_path + else: + self.geo_json_path = None + + def get_min_max_columns(self, df, range_col_tuples, skip_col: Set[str] = None): + for (range_col, min_col, max_col) in range_col_tuples: + if skip_col and range_col in skip_col: + continue + for col in [range_col, range_col.upper(), range_col.lower()]: + if col in df.columns: + try: + return self.cat_parser(df, min_col, max_col, col), col + except AttributeError: + pass + return df, None + + def get_column(self, df, potential_columns: List[str], exclude_col: str = None, titleize=False): + column_name = potential_columns[0] + df_columns = [s.strip().lower() for s in df.columns] + for potential_column in potential_columns: + if potential_column in df_columns: + if exclude_col is not None and exclude_col.lower() == potential_column.lower(): + continue + idx = df_columns.index(potential_column) + column = df.columns[idx] + if titleize: + df[column_name] = df[column].str.title() + else: + df[column_name] = df[column] + return df + return df + + def filter_columns(self, df): + potential_columns = { + 'name_common', + 'name_botanical', + 'condition', + 'exact_diameter', + 'exact_height', + 'tree_id', + 'estimated_value', + 'address', + 'geometry', + 'city', + 'diameter_min_in', + 'diameter_max_in', + 'height_min_feet', + 'height_max_feet', + } + actual_columns = potential_columns & set(df.columns) + return df[list(actual_columns)].rename( + columns={ + 'height_min_feet': 'height_min_ft', + 'height_max_feet': 'height_max_ft', + } + ) + + def read_df(self): + assert self.geo_json_path or self.csv_path + if self.csv_path: + df = pd.read_csv(self.csv_path.absolute()).assign(city=self.city) + if 'longitude' in df.columns: + df['geometry'] = gpd.points_from_xy(df['longitude'], df['latitude']) + elif 'LONGITUDE' in df.columns: + df['geometry'] = gpd.points_from_xy(df['LONGITUDE'], df['LATITUDE']) + else: + raise RuntimeError(f'CSV: {self.csv_path} has no geometry') + else: + df = gpd.read_file(str(self.geo_json_path.absolute())).assign(city=self.city) + return df + + def cleanup_columns(self, df): + str_cols = [ + 'name_common', + 'name_botanical', + 'condition', + 'tree_id', + 'address', + ] + for col in str_cols: + df[col] = df[col].str.strip() + + def get_maximal_df(self, df=None): + if df is None: + df = self.read_df() + + df = self.get_column(df, self.address_columns, titleize=True) + df = self.get_column(df, self.name_common_columns, titleize=True) + df = self.get_column(df, self.name_botanical_columns, titleize=True) + df = self.get_column(df, self.tree_id_columns) + df = self.get_column(df, self.condition) + df = self.get_column(df, self.est_value_columns) + + df, height_col = self.get_min_max_columns(df, self.height_tuples) + df, diameter_col = self.get_min_max_columns(df, self.diameter_tuples) + + df = self.get_column(df, self.exact_height_columns, exclude_col=height_col) + df = self.get_column(df, self.exact_diameter_columns, exclude_col=diameter_col) + + df = self.get_column(df, self.diameter_min_in_columns) + df = self.get_column(df, self.diameter_max_in_columns) + df = self.get_column(df, self.height_max_feet_columns) + df = self.get_column(df, self.height_min_feet_columns) + + return self.filter_columns(df).drop_duplicates() + + @staticmethod + def cat_parser(df, min_field, max_field, og_field, cats=None): + if cats is None: + cats = [cat.strip() for cat in df[og_field].unique().tolist() if cat is not None and set(cat.strip()) != {'-'}] + df[min_field] = None + df[max_field] = None + for cat in cats: + mask = df[og_field].str.strip() == cat + if len(cat.split('-')) == 2: + min_val, max_val = cat.split('-') + df.loc[mask, min_field] = int(min_val) + df.loc[mask, max_field] = int(max_val) + elif cat.endswith('+'): + min_val = int(cat[:-1]) + df.loc[mask, min_field] = int(min_val) + elif cat.startswith('>'): + min_val = int(cat[1:]) + df.loc[mask, min_field] = int(min_val) + + return df + + +class AgouraHillsParser(CityParser): + def get_maximal_df(self): + df = self.read_df() + df = df.assign( + address=df['Address'].astype(str).str.cat(df['Street'].str.title(), sep=' ') + ).drop('Address', axis=1) + return super().get_maximal_df(df=df) + + +class AlhambraParser(CityParser): + def get_maximal_df(self): + df = self.read_df() + df = df.assign( + address=df['Address'].astype(str).str.cat(df['Street'].str.title(), sep=' ') + ).drop('Address', axis=1) + return super().get_maximal_df(df=df) + + +class ArcadiaParser(CityParser): + def get_maximal_df(self): + df = self.read_df() + df = df.assign( + address=df['ADDR'].str.split('ARCADIA').str[0].str.title() + ) + return super().get_maximal_df(df=df) + + +class BellflowerParser(CityParser): + def get_maximal_df(self): + df = self.read_df() + df = df.assign( + address=df['Address'].astype(str).str.cat(df['Street'].str.title(), sep=' '), + ).drop('Address', axis=1) + return super().get_maximal_df(df=df) + + +class BellGardensParser(CityParser): + def get_maximal_df(self): + df = self.read_df() + df = df.assign( + address=df['ADDRESS'].astype(str).str.cat(df['STREET'].str.title(), sep=' ') + ).drop('ADDRESS', axis=1) + return super().get_maximal_df(df=df) + + +class ArtesiaParser(CityParser): + def get_maximal_df(self): + df = self.read_df() + df = df.assign( + address=df['ADDRESS'].astype(str).str.cat(df['STREET'].str.title(), sep=' '), + ).drop('ADDRESS', axis=1) + return super().get_maximal_df(df=df) + + +class BeverlyHillsParser(CityParser): + def get_maximal_df(self): + df = self.read_df() + df = df.rename(columns={'height': 'exact_height'}) + df = df.assign( + address=df['ADDRESS'].astype(str).str.cat(df['STREET'].str.title(), sep=' '), + ).drop('ADDRESS', axis=1) + return super().get_maximal_df(df=df) + + +class LongBeachParser(CityParser): + def get_maximal_df(self, df=None): + df = self.read_df().drop('ADDRESS', axis=1) + return super().get_maximal_df(df=df) + + +class SantaClaritaParser(CityParser): + def get_maximal_df(self, df=None): + df = self.read_df() + df = df[df['PROP_ADR'].notnull()] + df = df.assign( + address=df['PROP_ADR'].astype(str).str.cat(df['PROPSTREET'].str.title(), sep=' '), + ) + return super().get_maximal_df(df=df) + + +class SantaClaritaParksParser(CityParser): + def get_maximal_df(self, df=None): + df = self.read_df() + df = df.assign( + address=df['ADDRESS'].astype(str).str.cat(df['STREET'].str.title(), sep=' '), + ).drop('ADDRESS', axis=1) + return super().get_maximal_df(df=df) + + +class PasadenaParser(CityParser): + def get_maximal_df(self, df=None): + df = self.read_df() + df['Botanical'] = df['Genus'].str.cat(df['Species'], sep=' ').str.title() + df = df.drop('Species', axis=1) + + df['House_Numb'] = df['House_Numb'].astype(pd.Int64Dtype()) + mask = df['Street_Dir'].isnull() + df['Street'] = df['Street_Nam'].astype(str).str.cat(df['Street_Typ'], sep=' ') + df['Address'] = df['House_Numb'].astype(str).str.cat(df['Street'], sep=' ') + df.loc[mask, 'Address'] = df.loc[mask, 'House_Numb'].astype(str).str.cat( + df.loc[mask, 'Street_Nam'], sep=' ' + ).str.cat(df.loc[mask, 'Street_Typ'], sep=' ') + + return super().get_maximal_df(df=df) + + +class GlendaleParser(CityParser): + def get_maximal_df(self, df=None): + df = self.read_df() + df = df.drop('Address', axis=1) + df['address'] = df['OnAddress'].astype(str).str.cat(df['OnStreet'].astype(str).str.strip(), sep=' ') + return super().get_maximal_df(df=df) + + +class PomonaParser(CityParser): + def get_maximal_df(self, df=None): + df = self.read_df() + df['address'] = df['ADDRESS'].astype(str).str.cat(df['STREET'].astype(str).str.strip(), sep=' ') + df = df.drop('ADDRESS', axis=1) + + return super().get_maximal_df(df=df) + + +class StilesDataParser(object): + + mapper = { + 'los-angeles-city': CityParser, + 'los-angeles-county': CityParser, + 'agoura-hills': AgouraHillsParser, + # 'alhambra' : AlhambraParser, + # 'arcadia': ArcadiaParser, + 'artesia': ArtesiaParser, + # 'bell-gardens': BellGardensParser, + 'bellflower': BellflowerParser, + 'beverly-hills': BeverlyHillsParser, + 'long-beach': LongBeachParser, + 'santa-clarita-parks': SantaClaritaParksParser, + # 'santa-clarita': SantaClaritaParser, + 'pasadena': PasadenaParser, + 'glendale': GlendaleParser, + 'pomona': PomonaParser, + } + + def __init__(self, data_path): + root_path = Path(data_path) + self.data_dirs = {x.name: x for x in root_path.iterdir() if x.is_dir()} + all_path = Path(f'{data_path}/all') + self.geojsons = {geojson_path.name.split('.')[0]: geojson_path for geojson_path in all_path.glob('*.geojson')} + + def parse_all(self): + dfs = [] + for city in self.mapper: + if city in self.mapper: + city_parser = self.mapper[city]( + city, + self.data_dirs[city] if city in self.data_dirs else None, + self.geojsons[city] if city in self.geojsons else None + ) + if city_parser.geo_json_path: + df = city_parser.get_maximal_df() + dfs.append(df) + + df = pd.concat(dfs) + str_cols = [ + 'name_common', + 'name_botanical', + 'address', + 'city', + 'condition', + ] + for col in str_cols: + df[col] = df[col].astype(str, skipna=True) + mask = df[col].notnull() + df.loc[mask, col] = df.loc[mask, col].str.strip() + + return df + + +class SpeciesMatcher(object): + def __init__(self, df): + self.species_df = pd.read_csv('data/species_attributes.csv') + # TODO (Remove this after Emily renames) + self.species_df = self.species_df.drop_duplicates('botanical_name') + self.synonym_df = SpeciesMatcher.generate_synonyms( + self.species_df.copy(), 'botanical_name', ['sm_botanical_name'], ['botanical_synonyms'] + ) + self.df = df + + @staticmethod + def generate_synonyms(df, base_column, regular_columns, json_columns): + synonyms = [] + for row in df.itertuples(): + row_synonyms = set() + base_name = getattr(row, base_column) + row_synonyms.add(base_name.lower()) + for column in regular_columns: + if not isinstance(getattr(row, column), float) and getattr(row, column): + row_synonyms.add(getattr(row, column).lower()) + for column in json_columns: + json_value = getattr(row, column) + if not isinstance(json_value, float): + try: + row_synonyms |= set([s.lower() for s in json.loads(json_value)]) + except json.JSONDecodeError: + row_synonyms.add(json_value.lower()) + synonyms.append(list(row_synonyms)) + + df['synonym'] = synonyms + return df.explode('synonym') + + def match(self, how='inner'): + df = self.df.assign(synonym=self.df['name_botanical'].str.lower()) + return pd.merge( + df, + self.synonym_df[['synonym', 'botanical_name']], + how=how, + on='synonym' + ).drop(['name_botanical', 'synonym'], axis=1) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--datapath", required=True, type=str) + parser.add_argument("--host", required=False, type=str, default=None) + args = parser.parse_args() + + data_parser = StilesDataParser(args.datapath) + # df = data_parser.parse_all() + # df['location'] = gpd.GeoSeries(df['geometry']).apply(lambda x: Point(x.y, x.x)).to_wkt() + # df.to_csv('stiles.trees.csv', index=False) + # assert False + df = pd.read_csv('stiles.trees.csv') + matcher = SpeciesMatcher(df) + matched_df = matcher.match() + uploader = DBTreeUploader() + uploader.truncate_trees() + uploader.update_species( + matcher.species_df.rename(columns={'Species ID': 'species_id'}).rename( + columns={col: col.lower() for col in matcher.species_df.columns} + ) + ) + species_mapper = uploader.get_species_ids_mapper() + matched_df['species_id'] = matched_df['botanical_name'].map(species_mapper) + assert matched_df['species_id'].notnull().all() + uploader.upload_trees( + matched_df.assign(state='CA').rename(columns={'Species ID': 'species_id'}).rename( + columns={col: col.lower() for col in matched_df.columns} + ) + ) diff --git a/pruning_planting.py b/pruning_planting.py index c23a98a..7617442 100644 --- a/pruning_planting.py +++ b/pruning_planting.py @@ -17,8 +17,8 @@ def load_dataset(name, line_to_points=False): reproject it into WGS84, and return the geodataframe. """ # Load the street planting shape data, reprojecting into WGS84 - gdf = gpd.read_file(name, crs='+init=epsg:2229') - gdf = gdf.to_crs({'init': 'epsg:4326', 'no_defs': True}) + gdf = gpd.read_file(name, crs='2229') + gdf = gdf.to_crs(4326) if line_to_points: dfs = [] for row in gdf.itertuples(): @@ -134,7 +134,7 @@ def collapse_years(tree): trees = gpd.GeoDataFrame( trees, geometry=gpd.points_from_xy(trees['longitude'], trees['latitude']), - crs={'init': 'epsg:4326'} + crs=4326 ) trees["pruning_year"] = trees.apply(collapse_years, axis=1) diff --git a/requirements.txt b/requirements.txt index a3e3678..2b9d1df 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,9 @@ -geopandas==0.4.0 \ No newline at end of file +geopandas +pandas==1.1.5 +Flask==2.0.2 +gunicorn==20.1.0 +rtree +pymysql +Pillow +google-cloud-storage +cloud-sql-python-connector[pymysql] diff --git a/sm_parser.py b/sm_parser.py new file mode 100644 index 0000000..3bef796 --- /dev/null +++ b/sm_parser.py @@ -0,0 +1,42 @@ +import sys +from io import StringIO + +import pandas as pd + + +def parse_trees(df=None, stdout=False): + heritage_trees = pd.read_csv('data/heritage_trees.csv') + if df is None: + df = pd.read_csv(StringIO(''.join(sys.stdin.readlines()))).rename(columns={'Tree ID': 'tree_id'}) + else: + df = df.rename(columns={'Tree ID': 'tree_id'}) + df = pd.merge(df, heritage_trees, how='left', on='tree_id').rename( + columns={ + 'Species ID': 'species_id', + 'Name Botanical': 'name_botanical', + 'Height Min': 'height_min_ft', + 'Height Max': 'height_max_ft', + 'DBH Min': 'diameter_min_in', + 'DBH Max': 'diameter_max_in', + 'Latitude': 'latitude', + 'Longitude': 'longitude', + 'Location Description': 'location_description', + 'year_added': 'heritageYear', + 'heritage_number': 'heritageNumber', + 'text': 'heritageText' + } + ) + df = df.assign( + address=df['Address'].astype(str).str.cat(df['Street'], sep=' '), + city='Santa Monica', + state='CA', + heritage=df['heritageNumber'].notnull() + ) + if stdout: + print(df.to_json(orient='records'), file=sys.stdout) + else: + return df + + +if __name__ == "__main__": + parse_trees(True) \ No newline at end of file diff --git a/treeapi/.dockerignore b/treeapi/.dockerignore new file mode 100644 index 0000000..3e4bdd9 --- /dev/null +++ b/treeapi/.dockerignore @@ -0,0 +1,7 @@ +Dockerfile +README.md +*.pyc +*.pyo +*.pyd +__pycache__ +.pytest_cache diff --git a/treeapi/Dockerfile b/treeapi/Dockerfile new file mode 100644 index 0000000..7bc29ba --- /dev/null +++ b/treeapi/Dockerfile @@ -0,0 +1,25 @@ + +# Use the official lightweight Python image. +# https://hub.docker.com/_/python +FROM python:3.8.2-slim + +# Allow statements and log messages to immediately appear in the Knative logs +ENV PYTHONUNBUFFERED True + +# Copy local code to the container image. +ENV APP_HOME /app +WORKDIR $APP_HOME +COPY . ./ + +RUN apt-get update -y \ + && apt-get install -y gcc libpq-dev + +# Install production dependencies. +RUN pip3 install --no-cache-dir -r requirements.txt + +# Run the web service on container startup. Here we use the gunicorn +# webserver, with one worker process and 8 threads. +# For environments with multiple CPU cores, increase the number of workers +# to be equal to the cores available. +# Timeout is set to 0 to disable the timeouts of the workers to allow Cloud Run to handle instance scaling. +CMD exec gunicorn --bind :$PORT --workers 1 --worker-class uvicorn.workers.UvicornWorker --threads 8 --timeout 0 main:app diff --git a/treeapi/README.md b/treeapi/README.md new file mode 100644 index 0000000..8a55aeb --- /dev/null +++ b/treeapi/README.md @@ -0,0 +1,15 @@ +# Database Connection + +You need to set the environment variables +```angular2html +EXPORT TREE_DB_PASS="password" +EXPORT TREE_DB_CONNECTION_STR="lively-sentry-336718:us-west1:public-tree-map-db" +``` + +Also make sure in `main.py` that `LOCAL=True` for local development. + + +Assuming the database instance is running, you need to run `cloud-sql-proxy` which uses unix-sockets. +```angular2html +./cloud-sql-proxy --unix-socket ~/cloudsql --credentials-file lively-sentry-336718-fc01c1868439.json lively-sentry-336718:us-west1:public-tree-map-db +``` \ No newline at end of file diff --git a/treeapi/db_conn.py b/treeapi/db_conn.py new file mode 100644 index 0000000..af129c9 --- /dev/null +++ b/treeapi/db_conn.py @@ -0,0 +1,18 @@ +import os + +import sqlalchemy + + +def init_connection_engine(local=False): + prepend_str = '/Users/allent/' if local else '' + return sqlalchemy.create_engine( + sqlalchemy.engine.url.URL.create( + drivername="mysql+pymysql", + username='root' if local else os.environ['TREE_DB_USER'], + password=os.environ['TREE_DB_PASS'], + database="publictrees", + query={ + "unix_socket": f"{prepend_str}/cloudsql/{os.environ['TREE_DB_CONNECTION_STR']}" + } + ), + ) diff --git a/treeapi/download_images.py b/treeapi/download_images.py new file mode 100644 index 0000000..e113be5 --- /dev/null +++ b/treeapi/download_images.py @@ -0,0 +1,137 @@ +import os +from typing import Optional, Set +import dataclasses +import hashlib + +from google.cloud import storage +from PIL import Image +from io import BytesIO +import requests +import pandas as pd +import sqlalchemy + +from db_conn import init_connection_engine + + +@dataclasses.dataclass +class TreeImage: + species_id: int + retrieval_url: str + hashed_url: str + img_type: str + description: str + image: Optional + author: Optional[str] + author_url: Optional[str] + + +class ImageDownloader(object): + MAX_SIZE = (1024, 1024) + + def __init__(self, local): + + self.bucket = storage.Client().bucket('public-tree-map-images') + self.local = local + + def insert_tree_into_db(self, tree: TreeImage): + with init_connection_engine(self.local).connect() as conn: + table = sqlalchemy.Table('images', sqlalchemy.MetaData(), autoload_with=conn) + stmt = sqlalchemy.insert(table).values( + extension=tree.img_type, + original_url=tree.retrieval_url, + details=tree.description if tree.description else None, + hashed_original_url=tree.hashed_url, + species_id=tree.species_id, + author=tree.author if tree.author else None, + author_url=tree.author_url if tree.author_url else None + ) + + conn.execute(stmt) + conn.commit() + + def get_and_upload_image(self, tree_image: TreeImage): + image_key = f'{tree_image.hashed_url}.{tree_image.img_type}' + blob = self.bucket.blob(image_key) + if not blob.exists(): + r = requests.get(tree_image.retrieval_url) + if r.ok: + img = Image.open(BytesIO(r.content)) + img.thumbnail(self.MAX_SIZE) + tree_image.image = img + byte_stream = BytesIO() + img.save(byte_stream, format=img.format) + byte_stream.seek(0) + blob.upload_from_file( + byte_stream, + content_type=r.headers['Content-Type'] + ) + + self.insert_tree_into_db(tree_image) + return tree_image + + def get_tree_images(self, tree_id, eol_id, existing_images: Set[str]): + + assert os.environ.get('TREE_SALT') is not None + + url = f'http://eol.org/api/pages/1.0.json?id=${eol_id}&images_per_page=3&videos_per_page=0&sounds_per_page=0&maps_per_page=0&texts_per_page=0&details=true&taxonomy=false' + r = requests.get(url) + images_to_retrieve = [] + if r.ok: + request_body = r.json() + data_objects = request_body['taxonConcept'].get('dataObjects') + if data_objects: + for data_object in data_objects: + hashed_url = hashlib.md5(f"{data_object['eolMediaURL']}{os.environ['TREE_SALT']}".encode('utf-8')).hexdigest() + if hashed_url not in existing_images: + images_to_retrieve.append( + TreeImage( + tree_id, + data_object['eolMediaURL'], + hashed_url, + data_object['dataSubtype'], + data_object['description'] if 'description' in data_object else None, + None, + data_object['rightsHolder'].strip() if 'rightsHolder' in data_object else None, + f'https://eol.org/pages/{int(eol_id)}/media' + ) + ) + + if images_to_retrieve: + for image in images_to_retrieve: + uploaded_tree = self.get_and_upload_image(image) + if uploaded_tree: + existing_images.add(uploaded_tree.hashed_url) + + def get_trees_without_images(self): + with init_connection_engine(self.local).connect() as conn: + sql = sqlalchemy.text( + """ + SELECT + S.id, + S.eol_id, + COUNT(DISTINCT I.id) AS cnt + FROM species S + LEFT JOIN images I ON S.id = I.species_id + GROUP BY 1, 2 + HAVING cnt < 3 + """ + ) + tree_results = conn.execute(sql).mappings().fetchall() + sql = sqlalchemy.text( + """ + SELECT + hashed_original_url + FROM images + """ + ) + result_set = conn.execute(sql).mappings() + image_results = set([row['hashed_original_url'] for row in result_set.fetchall()]) + return pd.DataFrame(tree_results), image_results + + +if __name__ == "__main__": + img_download = ImageDownloader(local=True) + trees_df, hashed_urls = img_download.get_trees_without_images() + for idx, row in enumerate(trees_df.itertuples()): + print(f'{idx}/{len(trees_df)}') + img_download.get_tree_images(row.id, row.eol_id, hashed_urls) diff --git a/treeapi/main.py b/treeapi/main.py new file mode 100644 index 0000000..5813022 --- /dev/null +++ b/treeapi/main.py @@ -0,0 +1,155 @@ +import json + +import sqlalchemy +from fastapi import FastAPI, Request +from fastapi.middleware.cors import CORSMiddleware + +from db_conn import init_connection_engine + +app = FastAPI() +origins = [ + "*", + "http://localhost", + "http://localhost:8080", +] + +LOCAL = True + +app.add_middleware( + CORSMiddleware, + allow_origins=origins, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +random_tree_cache = {} + + +@app.get("/random/") +async def get_random_tree(request: Request): + ip_address = request.client.host + ip_hash = sum([int(x) for x in ip_address if x.isdigit()]) % 11 + if ip_hash in random_tree_cache: + return random_tree_cache[ip_hash] + else: + sql = f""" + SELECT + ST_LATITUDE(location) AS lat, + ST_LONGITUDE(location) AS lng + FROM trees + WHERE id % 11 = :ip_hash + """ + with init_connection_engine(LOCAL).connect() as conn: + random_tree_cache[ip_hash] = conn.execute( + sqlalchemy.text(sql), + {'ip_hash': ip_hash} + ).mappings().all() + return random_tree_cache[ip_hash] + + +@app.get("/tree/{tree_id}") +async def get_tree(tree_id): + sql = f""" + SELECT + botanical_name AS name_botanical, + common_name AS name_common, + family_botanical_name AS family_name_botanical, + family_common_name AS family_name_common, + address, + city, + state, + diameter_min_in, + diameter_max_in, + exact_diameter, + height_min_ft, + height_max_ft, + exact_height, + native AS nativity, + estimated_value, + tree_condition, + shade_production, + irrigation_requirements, + form, + type, + iucn_status, + iucn_doi_or_url, + ST_LATITUDE(location) AS latitude, + ST_LONGITUDE(location) AS longitude, + heritage, + heritage_number AS heritageNumber, + heritage_text AS heritageText, + heritage_year AS heritageYear, + JSON_ARRAYAGG( + JSON_OBJECT( + 'url', + CONCAT('https://storage.googleapis.com/public-tree-map-images/', hashed_original_url, '.', + extension), + 'author', JSON_OBJECT( + 'name', author, + 'url', author_url + ) + ) + ) AS images, + T.id AS tree_id + FROM trees T + INNER JOIN species s on T.species_id = s.id + LEFT JOIN images i on s.id = i.species_id + WHERE + T.id = :tree_id + GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 + """ + with init_connection_engine(LOCAL).connect() as conn: + resultset = conn.execute(sqlalchemy.text(sql), {'tree_id': tree_id}).mappings() + result = resultset.fetchone() + + if result: + result = dict(result) + result['images'] = json.loads(result['images']) + return result + + +@app.get("/trees/") +async def get_trees(lat1, lng1, lat2, lng2, lat3, lng3, lat4, lng4): + lats = [lat1, lat2, lat3, lat4] + lngs = [lng1, lng2, lng3, lng4] + lat_lngs = [] + for lat, lng in zip(lats, lngs): + lat_lngs.append(f'{lat} {lng}') + csv = ','.join(lat_lngs) + polygon_str = f'POLYGON(({csv}, {lat_lngs[0]}))' + sql = f""" + SELECT + T.id AS tree_id, + botanical_name AS name_botanical, + common_name AS name_common, + family_botanical_name AS family_name_botanical, + family_common_name AS family_name_common, + iucn_status, + native AS nativity, + ST_LATITUDE(location) AS latitude, + ST_LONGITUDE(location) AS longitude, + heritage + FROM trees T + INNER JOIN species s on T.species_id = s.id + WHERE + MBRContains( + ST_GeomFromText(:polygon, 4269), + location + ) AND + T.id IS NOT NULL + """ + + with init_connection_engine(LOCAL).connect() as conn: + resultset = conn.execute( + sqlalchemy.text(sql), + {'polygon': polygon_str} + ).mappings() + results = resultset.fetchall() + + if results: + results = [dict(r) for r in results] + for tree in results: + tree['heritage'] = True if tree['heritage'] else False + return results + diff --git a/treeapi/requirements.txt b/treeapi/requirements.txt new file mode 100644 index 0000000..d557fe3 --- /dev/null +++ b/treeapi/requirements.txt @@ -0,0 +1,5 @@ +fastapi +uvicorn[standard] +gunicorn +pymysql +sqlalchemy \ No newline at end of file diff --git a/upload_trees.py b/upload_trees.py new file mode 100644 index 0000000..642bc77 --- /dev/null +++ b/upload_trees.py @@ -0,0 +1,270 @@ +import os +from typing import Dict +import json +import sys + +import pymysql +import geopandas as gpd +import numpy as np +import pandas as pd +import pymysql.cursors +from google.cloud.sql.connector import Connector + + +class DBCursor(object): + def __init__(self, password=None): + self.connection = Connector().connect( + os.environ['TREE_DB_CONNECTION_STR'], + 'pymysql', + user='root', + password=password if password else os.environ['TREE_DB_PASS'], + db='publictrees' + ) + + def __enter__(self): + return self.connection + + def __exit__(self, type, value, traceback): + self.connection.close() + + +class DBTreeUploader(object): + + def truncate_trees(self): + self._truncate_table('trees') + + def delete_species(self): + with DBCursor() as conn: + conn.cursor().execute( + f""" + DELETE FROM species; + """ + ) + conn.cursor().execute( + f""" + ALTER TABLE species AUTO_INCREMENT = 1; + """ + ) + conn.commit() + + def _truncate_table(self, table): + with DBCursor() as conn: + conn.cursor().execute( + f""" + TRUNCATE TABLE {table}; + """ + ) + conn.cursor().execute( + f""" + ALTER TABLE {table} AUTO_INCREMENT = 1; + """ + ) + conn.commit() + + def truncate_sm_trees(self): + with DBCursor() as conn: + conn.cursor().execute( + """ + DELETE FROM trees + WHERE city = 'Santa Monica' + """ + ) + conn.commit() + + def get_species_ids_mapper(self) -> Dict[str, int]: + with DBCursor() as conn: + cursor = conn.cursor(pymysql.cursors.DictCursor) + cursor.execute( + """ + SELECT id, botanical_name + FROM species + """ + ) + results = cursor.fetchall() + + return pd.DataFrame(results).set_index('botanical_name').to_dict()['id'] + + def upload_trees(self, df: pd.DataFrame, batch_size=100000): + with DBCursor() as conn: + sql = """ + INSERT INTO trees( + tree_id, + species_id, + address, + state, + city, + tree_condition, + diameter_min_in, + diameter_max_in, + exact_diameter, + height_min_ft, + height_max_ft, + exact_height, + estimated_value, + location, + heritage, + heritage_year, + heritage_number, + heritage_text + ) + VALUES ( + %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, ST_GeomFromText(%s, 4269), %s, %s, %s, %s + ) + """ + if batch_size == 0: + df['batch'] = 0 + else: + df['batch'] = np.random.randint(int(len(df) / batch_size), size=len(df)) + df = df.where((pd.notnull(df)), None) + for _, batch_df in df.groupby('batch'): + conn.cursor().executemany( + sql, + [ + ( + int(row.tree_id) if row.tree_id is not None and row.tree_id != np.nan else None, + row.species_id, + row.address, + row.state, + row.city, + row.tree_condition if hasattr(row, 'tree_condition') else None, + row.diameter_min_in, + row.diameter_max_in, + row.exact_diameter if hasattr(row, 'exact_diameter') else None, + row.height_min_ft, + row.height_max_ft, + row.exact_height if hasattr(row, 'exact_height') else None, + row.estimated_value if hasattr(row, 'estimated_value') else None, + row.location, + row.heritage if hasattr(row, 'heritage') else False, + row.heritage_year if hasattr(row, 'heritage_year') else None, + row.heritage_number if hasattr(row, 'heritage_number') else None, + row.heritage_text if hasattr(row, 'heritage_text') else None + ) for row in batch_df.itertuples() + ] + ) + conn.commit() + + def update_species(self, df): + with DBCursor() as conn: + cursor = conn.cursor(pymysql.cursors.DictCursor) + cursor.execute( + """ + SELECT botanical_name FROM species + """ + ) + botanical_names = set([row['botanical_name'] for row in cursor.fetchall()]) + update_df = df[df['botanical_name'].isin(botanical_names)] + cursor.executemany( + """ + UPDATE species + SET + common_name = %s, + family_botanical_name = %s, + family_common_name = %s, + native = %s, + eol_id = %s, + eol_overview_url = %s, + simplified_iucn_status = %s, + iucn_status = %s, + iucn_doi_or_url = %s, + shade_production = %s, + form = %s, + type = %s, + cal_ipc_url = %s, + irrigation_requirements = %s, + species_id = %s + WHERE + botanical_name = %s + """, + [ + ( + row.common_name, + row.family_botanical_name, + row.family_common_name, + row.native, + int(row.eol_id) if row.eol_id is not None else None, + row.eol_overview_url, + row.simplified_iucn_status, + row.iucn_status, + row.iucn_doi_or_url, + row.shade_production, + row.form, + row.type, + row.cal_ipc_url, + row.irrigation_requirements, + row.species_id, + row.botanical_name, + ) for row in update_df.itertuples() + ] + + ) + + write_df = df[~df['botanical_name'].isin(botanical_names)] + self.upload_species(write_df) + + @staticmethod + def upload_species(df: pd.DataFrame): + with DBCursor() as conn: + sql = """ + INSERT INTO species( + botanical_name, + common_name, + family_botanical_name, + family_common_name, + native, + eol_id, + eol_overview_url, + simplified_iucn_status, + iucn_status, + iucn_doi_or_url, + shade_production, + form, + type, + cal_ipc_url, + irrigation_requirements, + species_id + ) + VALUES ( + %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s + ) + """ + df = df.where((pd.notnull(df)), None) + conn.cursor().executemany( + sql, + [ + ( + row.botanical_name, + row.common_name, + row.family_botanical_name, + row.family_common_name, + row.native, + int(row.eol_id) if row.eol_id is not None else None, + row.eol_overview_url, + row.simplified_iucn_status, + row.iucn_status, + row.iucn_doi_or_url, + row.shade_production, + row.form, + row.type, + row.cal_ipc_url, + row.irrigation_requirements, + row.species_id + ) for row in df.itertuples() + ] + ) + + conn.commit() + + +class SMTreeUploader(DBTreeUploader): + + def __init__(self): + super().__init__() + + def prepare_df(self, df): + df['location'] = gpd.GeoSeries(gpd.points_from_xy(df['latitude'], df['longitude'])).to_wkt() + self.df = df.rename(columns={ + 'heritageYear': 'heritage_year', + 'heritageNumber': 'heritage_number', + 'heritageText': 'heritage_text', + }) \ No newline at end of file