Skip to content

Commit 899d8d4

Browse files
committed
Master merge conflict resolve
2 parents 16c2bb9 + cd6157b commit 899d8d4

File tree

109 files changed

+110003
-2457
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

109 files changed

+110003
-2457
lines changed

.github/workflows/tests.yml

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
# run test suites
2+
3+
name: Tests
4+
on:
5+
- pull_request
6+
- push
7+
8+
jobs:
9+
# see: https://github.com/fkirc/skip-duplicate-actions
10+
skip_duplicate:
11+
continue-on-error: true
12+
runs-on: ubuntu-latest
13+
outputs:
14+
should_skip: ${{ steps.skip_check.outputs.should_skip }}
15+
steps:
16+
- id: skip_check
17+
uses: fkirc/skip-duplicate-actions@master
18+
with:
19+
concurrent_skipping: "same_content"
20+
skip_after_successful_duplicate: "true"
21+
do_not_skip: '["pull_request", "workflow_dispatch", "schedule"]'
22+
23+
# NOTE:
24+
# Run all the steps even if there are no tests defined for a given domain sub-directory.
25+
# This is to make sure that the environment definition is at the very least buildable.
26+
tests:
27+
needs: skip_duplicate
28+
if: ${{ needs.skip_duplicate.outputs.should_skip != 'true' }}
29+
runs-on: ${{ matrix.os }}
30+
continue-on-error: ${{ matrix.allow-failure }}
31+
env:
32+
CACHE_NUMBER: 0 # increment to reset cache
33+
34+
# ensure conda env activation is performed automatically
35+
defaults:
36+
run:
37+
shell: bash -el {0}
38+
39+
strategy:
40+
fail-fast: false
41+
matrix:
42+
os: [ubuntu-latest]
43+
# somehow mamba with python 3.12 doesn't resolve spacy although available...
44+
python-version: ["3.9", "3.10", "3.11"]
45+
allow-failure: [false]
46+
domain: ["eo", "nlp"]
47+
48+
steps:
49+
- uses: actions/checkout@v2
50+
with:
51+
fetch-depth: "0"
52+
53+
- name: Setup Mamba
54+
uses: conda-incubator/setup-miniconda@v3
55+
with:
56+
auto-update-conda: true
57+
python-version: ${{ matrix.python-version }}
58+
miniforge-variant: Mambaforge
59+
miniforge-version: latest
60+
activate-environment: github-ci-test-python${{ matrix.python-version }}-${{ matrix.domain }}
61+
use-mamba: true
62+
use-only-tar-bz2: true
63+
64+
- name: Set cache date
65+
run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV
66+
67+
- uses: actions/cache@v2
68+
id: cache
69+
with:
70+
path: ${{ env.CONDA }}/envs/github-ci-test-python${{ matrix.python-version }}-${{ matrix.domain }}
71+
key: conda-python${{ matrix.python-version }}-${{ matrix.domain }}-${{ hashFiles('${{ matrix.domain }}/environment.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }}
72+
73+
- name: Display Python
74+
run: which python
75+
76+
- name: Update environment
77+
if: steps.cache.outputs.cache-hit != 'true'
78+
run: |
79+
echo "python=${{ matrix.python-version }}" > ${{ env.CONDA }}/envs/github-ci-test-python${{ matrix.python-version }}-${{ matrix.domain }}/conda-meta/pinned
80+
mamba env update \
81+
-n github-ci-test-python${{ matrix.python-version }}-${{ matrix.domain }} \
82+
-f ${{ matrix.domain }}/environment.yml
83+
84+
- name: Display Packages
85+
if: ${{ matrix.python-version != 'none' }}
86+
run: pip freeze
87+
88+
- name: Display Environment Variables
89+
run: |
90+
hash -r
91+
env | sort
92+
93+
- name: Check Tests
94+
id: check_tests
95+
run: |
96+
echo "HAS_TEST_DIR=$(test -d ${{ matrix.domain }}/tests && echo 'true' || echo 'false')" >> $GITHUB_OUTPUT
97+
98+
- name: Install Tests Dependencies
99+
if: ${{ steps.check_tests.outputs.HAS_TEST_DIR == 'true' }}
100+
run: pip install -r requirements-dev.txt
101+
102+
- name: Run Tests
103+
if: ${{ steps.check_tests.outputs.HAS_TEST_DIR == 'true' }}
104+
run: |
105+
cd ${{ matrix.domain }}/notebooks
106+
python -m pytest -vvv ../tests

.gitignore

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,28 @@
11
### IDE
22
**/.idea/
33
**/.vscode/
4+
**/*.code-workspace
5+
6+
### Caches
7+
**/__pycache__/
8+
**/cache/
9+
**/*tree-tagger-linux*
10+
**/.pytest_cache
11+
**/condaenv.*.requirements.txt
12+
13+
## Chroma VDB caches
14+
**/*.bin
15+
**/*.pickle
16+
**/*.sqlite3
17+
18+
### Binaries
19+
**/*.jar
420

521
### Notebooks
622
# expect examples per domain
23+
# disallow notebooks at root
724
./*.ipynb
825
**/.ipynb_checkpoints/
26+
27+
### Outputs
28+
*.log

Makefile

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Included custom configs change the value of MAKEFILE_LIST
2+
# Extract the required reference beforehand so we can use it for help target
3+
MAKEFILE_NAME := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST))
4+
# Include custom config if it is available
5+
-include Makefile.config
6+
7+
# Application
8+
APP_ROOT := $(abspath $(lastword $(MAKEFILE_NAME))/..)
9+
APP_NAME := $(shell basename $(APP_ROOT))
10+
APP_DOMAINS ?= eo nlp
11+
DOCKER_REPO ?= crim-ca/pavics-jupyter-images
12+
13+
DOCKER_BUILDS := $(addprefix docker-build-, $(APP_DOMAINS))
14+
$(DOCKER_BUILDS): docker-build-%:
15+
docker build -t $(DOCKER_REPO)/$*:latest "$(APP_ROOT)/$(*)" 2>&1 | tee "$(APP_ROOT)/make-$@.log"
16+
17+
.PHONY: docker-build
18+
docker-build: $(DOCKER_BUILDS)

eo/environment.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ dependencies:
99
- intake-stac
1010
- pyproj
1111
- rasterio
12-
- sat-search
1312
- shapely
1413

1514
# TODO: These next packages could possibly be added to a more generic 'vision' image, from which 'eo' would be built

nlp/CHANGELOG.md

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,22 @@ Changes
33

44
Unreleased (latest)
55
===================
6-
0.4.1 (2023-11-24)
6+
7+
- n/a
8+
9+
0.5.0 (2023-12-13)
710
===================
11+
812
Changes:
913
--------
14+
- Update NLU demo notebook with latest pipeline improvements and a STAC wrapper to convert NL queries to STAC requests.
15+
- Add `duckling` installation in the Docker to allow running it as child process rather than sibling Docker service.
1016
- Update base image version in Dockerfile
1117
- Add `mamba` cache cleanup to reduce image size
1218

1319
Fixes:
1420
------
15-
- ...
21+
- Fix dependencies to make them functional across multiple Python versions.
1622

1723
0.4.0 (2022-12-23)
1824
===================
@@ -127,4 +133,4 @@ Changes:
127133

128134
Fixes:
129135
------
130-
- na
136+
- na

nlp/Dockerfile

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,16 @@ FROM birdhouse/pavics-jupyter-base:0.5.0
22

33
# must update conda env as root, because of a permission error when having pip dependencies in the input yml file
44
USER root
5+
# install pre-requirement for compiling some dependencies
6+
RUN apt install -y pkg-config libpcre++-dev
57

68
COPY environment.yml /environment.yml
7-
COPY notebook_config.yml /notebook_config.yml
89

910
# update env "birdy"
1011
# use umask 0000 so that package files for the updated environment are usable by the user for the jupyter-conda-extension
11-
RUN umask 0000 && mamba env update -f /environment.yml \
12-
&& mamba clean -a
12+
RUN umask 0000 && \
13+
mamba env update -f /environment.yml && \
14+
mamba clean -y -a
1315

1416
# Set the encoding to UTF-8, this is needed for heideltime to work properly
1517
ENV LANG=C.UTF-8
@@ -19,21 +21,27 @@ RUN python -m spacy download en_core_web_trf
1921

2022
# Downloading the ner-large flair model
2123
RUN mkdir flair_models && \
22-
curl -L -o flair_models/ner-large https://huggingface.co/flair/ner-english-large/resolve/main/pytorch_model.bin > /dev/null
24+
curl -L -o flair_models/ner-large \
25+
https://huggingface.co/flair/ner-english-large/resolve/main/pytorch_model.bin > /dev/null
2326

2427
# Heideltime Tree-tagger Installation
2528
RUN mkdir -p heideltime/tree-tagger-linux-3.2.3 && cd heideltime/tree-tagger-linux-3.2.3 && \
26-
curl -o tree-tagger-linux-3.2.3.tar.gz https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/tree-tagger-linux-3.2.3.tar.gz && \
27-
curl -o tagger-scripts.tar.gz https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/tagger-scripts.tar.gz && \
28-
curl -o english.par.gz https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/english.par.gz && \
29-
curl -o install-tagger.sh https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/install-tagger.sh && \
29+
curl -o tree-tagger-linux-3.2.3.tar.gz \
30+
https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/tree-tagger-linux-3.2.3.tar.gz && \
31+
curl -o tagger-scripts.tar.gz \
32+
https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/tagger-scripts.tar.gz && \
33+
curl -o english.par.gz \
34+
https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/english.par.gz && \
35+
curl -o install-tagger.sh \
36+
https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/install-tagger.sh && \
3037
tar -xvzf tree-tagger-linux-3.2.3.tar.gz && \
3138
sh install-tagger.sh && \
3239
rm tree-tagger-linux-3.2.3.tar.gz tagger-scripts.tar.gz english.par.gz install-tagger.sh
3340

3441
# Download and extract heideltime.standalone.jar
35-
RUN cd heideltime && \
36-
curl -L -o heideltime-standalone-2.2.1.tar.gz https://github.com/HeidelTime/heideltime/releases/download/VERSION2.2.1/heideltime-standalone-2.2.1.tar.gz && \
42+
RUN cd heideltime && \
43+
curl -L -o heideltime-standalone-2.2.1.tar.gz \
44+
https://github.com/HeidelTime/heideltime/releases/download/VERSION2.2.1/heideltime-standalone-2.2.1.tar.gz && \
3745
tar -xzvf heideltime-standalone-2.2.1.tar.gz heideltime-standalone/de.unihd.dbs.heideltime.standalone.jar && \
3846
mv heideltime-standalone/de.unihd.dbs.heideltime.standalone.jar . && \
3947
rmdir heideltime-standalone && \
@@ -42,9 +50,20 @@ RUN cd heideltime && \
4250
# Give read&write permission to jenkins for config
4351
RUN chown -R jenkins heideltime
4452

53+
# Setup Haskell for Duckling server
54+
RUN curl -sSL https://get.haskellstack.org/ | bash && \
55+
git clone https://github.com/facebook/duckling && \
56+
cd duckling && \
57+
stack build && \
58+
stack install && \
59+
cd .. && \
60+
rm -fr duckling
61+
ENV PATH="/root/.local/bin:$PATH"
62+
4563
# Give ownership of the conda cache folder to jenkins, to enable installing packages by the user from JupyterLab
4664
RUN mkdir /opt/conda/pkgs/cache && chown -R 1000:1000 /opt/conda/pkgs/cache
4765

66+
COPY notebook_config.yml /notebook_config.yml
67+
4868
# specify user because of problem running start-notebook.sh when being root
4969
USER jenkins
50-
File renamed without changes.

nlp/environment.yml

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,26 @@ channels:
55
- conda-forge
66

77
dependencies:
8-
- intake-esm==2021.1.15
9-
- intake-stac==0.3.0
10-
- sat-search==0.3.0
8+
- intake-esm
9+
- intake-stac>=0.4.0
1110
- threddsclient==0.4.2
12-
- openjdk==8.0.152
13-
# python-flair=0.8 only works with numpy<=1.19.5
14-
- python-flair=0.8
15-
- numpy<=1.19.5
16-
- spacy==3.1.0
17-
- python-dateutil==2.7.5
18-
- python-levenshtein==0.12.2
19-
- requests=2.25.1
20-
- pip==20.3.3
11+
- openjdk==8.0.152
12+
- python-flair
13+
- numpy
14+
- pydantic<2
15+
- python-levenshtein
16+
- requests
17+
- pip>=22
2118
- pip:
2219
- textsearch==0.0.21
23-
- spacy==3.1.0
20+
- osmnx
21+
- langchain
22+
- spacy>=3.5,<4
23+
- spacy-transformers
24+
- transformers<4.31
25+
- sentence_transformers
26+
- chromadb
27+
- shapely
28+
- ipywidgets
29+
- nltk
30+
- pystac_client

nlp/notebooks/NLU_demo.ipynb

Lines changed: 27431 additions & 0 deletions
Large diffs are not rendered by default.
File renamed without changes.

0 commit comments

Comments
 (0)