Merge branch 'release' of https://github.com/sorrychoe/pyBigKinds int…

…o release
sorrychoe · Aug 20, 2023 · 9a9f18f · 9a9f18f
2 parents 6b0f807 + be2908a
commit 9a9f18f
Show file tree

Hide file tree

Showing 10 changed files with 56 additions and 20 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -9,6 +9,11 @@ repos:
         language: system
         entry: make format
         types: [python]
+      - id: lint
+        name: lint
+        language: system
+        entry: make lint
+        types: [python]
 
   - repo: https://github.com/asottile/add-trailing-comma
     rev: v2.4.0

diff --git a/Makefile b/Makefile
@@ -32,9 +32,11 @@ uninstall:
 wheel:
 	$(python) setup.py bdist_wheel
 
+lint:
+	$(python) -m isort --settings-file=setup.cfg pyBigKinds/ test/
+
 format:
-	$(python) -m isort --settings-file=setup.cfg pyBigKinds/
-	$(python) -m flake8 --config=setup.cfg pyBigKinds/
+	$(python) -m flake8 --config=setup.cfg pyBigKinds/ test/
 	$(python) -m pylint --rcfile=.pylintrc pyBigKinds/
 
 test:

diff --git a/README.md b/README.md
@@ -22,8 +22,21 @@ pyBigKinds는 한국 언론의 빅데이터 저장소인 BigKinds에서 추출
 
 ## Usage
 
-![](docs/example1.png)
-![](docs/example2.png)
+```python
+import pyBigKinds as bk
+
+bk.press_counter(df)
+```
+
+![](https://github.com/sorrychoe/pyBigKinds/blob/release/docs/example1.png)
+
+```python
+import pyBigKinds as bk
+
+bk.keywords_wordcloud(df, "중앙일보")
+```
+
+![](https://github.com/sorrychoe/pyBigKinds/blob/release/docs/example2.png)
 
 
 ## License

diff --git a/requirements.txt b/requirements.txt
@@ -3,7 +3,7 @@ isort>=5.7
 matplotlib>=3.5.3
 mlxtend>=0.22.0
 openpyxl>=3.1.2
-Pandas>=1.5.3
+Pandas>=2
 pre-commit>=2.13
 pylint>=2.9
 pytest>=7

diff --git a/setup.cfg b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = pyBigKinds
-version = 1.2.0
+version = 1.2.5
 description = BigKinds Data Analysis Toolkit for python
 author = Sorrychoe
 license = MIT
@@ -20,7 +20,7 @@ packages = find:
 python_requires = >=3.8
 install_requires =
     matplotlib>=3.5.3
-    pandas>=1.5.3
+    pandas>=2
     wordcloud>=1.8.2.2
     scikit-learn>=1.0.0
     tomotopy>=0.12.4
@@ -41,7 +41,9 @@ max-complexity = 18
 select = B,C,E,F,W,T4,B9
 extend-ignore = E203, W503
 ignore = E203,E226,E251,E501,E722,F821,W503,W605
-per-file-ignores = __init__.py:F401,F403,F405
+per-file-ignores =
+    __init__.py:F401,F403,F405
+    test/*:F403,F405
 exclude =
 
 [isort]

diff --git a/test/test.xlsx b/test/test.xlsx
diff --git a/test/test_base.py b/test/test_base.py
@@ -1,12 +1,18 @@
-import pytest
+# pylint: disable=F403
+# pylint: disable=F405
+
 import pandas as pd
+import pytest
+
 from pyBigKinds import *
 
+
 @pytest.fixture(scope="module")
 def dataframe():
     df = pd.read_excel("test/test.xlsx")
     return df
 
+
 def test_header_remover(dataframe):
     ans = header_remover(dataframe)
     assert ans[0] == " 한반도 긴장 높인 북한의 군사정찰위성 발사 규탄한다"

diff --git a/test/test_preprocessing.py b/test/test_preprocessing.py
@@ -1,8 +1,10 @@
-import pytest
-import pandas as pd
 import numpy as np
+import pandas as pd
+import pytest
+
 from pyBigKinds import *
 
+
 @pytest.fixture(scope="module")
 def dataframe():
     df = pd.read_excel("test/test.xlsx")
@@ -33,7 +35,7 @@ def test_tfidf(dataframe):
 def test_tfidf_vector(dataframe):
     vector = tfidf_vector(dataframe)
     assert type(vector) == np.ndarray
-    assert vector.shape == (31,2160)
+    assert vector.shape == (31, 2160)
 
 
 def test_normalize_vector(dataframe):

diff --git a/test/test_representation.py b/test/test_representation.py
@@ -1,57 +1,60 @@
-from pyBigKinds import *
-
 import numpy as np
 import pandas as pd
-
 import pytest
 
+from pyBigKinds import *
+
+
 @pytest.fixture(scope="module")
 def vector():
     df = pd.read_excel("test/test.xlsx")
     vector = tfidf_vector(df)
     return vector
 
+
 @pytest.fixture(scope="module")
 def dataframe():
     df = pd.read_excel("test/test.xlsx")
     return df
 
+
 def test_press_counter(dataframe):
     counter = press_counter(dataframe)
     assert counter.columns[0] == '언론사'
     assert counter.columns[1] == '기사'
     assert counter['기사'].max() == counter['기사'][0]
 
+
 def test_pca(vector):
     pca_df = pca(vector)
 
     assert pca_df.columns[0] == 'component 0'
     assert pca_df.columns[1] == 'component 1'
-    assert pca_df.shape == (31,2)
+    assert pca_df.shape == (31, 2)
 
 
 def test_nmf(vector):
     nmf_df = nmf(vector)
 
     assert nmf_df.columns[0] == 'component 0'
     assert nmf_df.columns[1] == 'component 1'
-    assert nmf_df.shape == (31,2)
+    assert nmf_df.shape == (31, 2)
 
 
 def test_t_sne(vector):
     tsne_df = t_sne(vector, 100)
 
     assert tsne_df.columns[0] == 'component 0'
     assert tsne_df.columns[1] == 'component 1'
-    assert tsne_df.shape == (31,2)
+    assert tsne_df.shape == (31, 2)
 
 
 def test_lsa(vector):
     lsa_df = lsa(vector)
 
     assert lsa_df.columns[0] == 'component 0'
     assert lsa_df.columns[1] == 'component 1'
-    assert lsa_df.shape == (31,2)
+    assert lsa_df.shape == (31, 2)
 
 
 def test_kmeans(vector):

diff --git a/test/test_visualization.py b/test/test_visualization.py
@@ -1,18 +1,21 @@
 import pandas as pd
+import pytest
+
 from pyBigKinds import *
 
-import pytest
 
 @pytest.fixture(scope="module")
 def vector():
     df = pd.read_excel("test/test.xlsx")
     vector = tfidf_vector(df)
     return vector
 
+
 def test_keywords_wordcloud(vector):
     with pytest.raises(TypeError):
         keywords_wordcloud(vector, "press")
 
+
 def test_top_words(vector):
     with pytest.raises(TypeError):
         top_words(vector, "press")