Skip to content

Commit

Permalink
Merge branch 'release' of https://github.com/sorrychoe/pyBigKinds int…
Browse files Browse the repository at this point in the history
…o release
  • Loading branch information
sorrychoe committed Aug 20, 2023
2 parents 6b0f807 + be2908a commit 9a9f18f
Show file tree
Hide file tree
Showing 10 changed files with 56 additions and 20 deletions.
5 changes: 5 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ repos:
language: system
entry: make format
types: [python]
- id: lint
name: lint
language: system
entry: make lint
types: [python]

- repo: https://github.com/asottile/add-trailing-comma
rev: v2.4.0
Expand Down
6 changes: 4 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,11 @@ uninstall:
wheel:
$(python) setup.py bdist_wheel

lint:
$(python) -m isort --settings-file=setup.cfg pyBigKinds/ test/

format:
$(python) -m isort --settings-file=setup.cfg pyBigKinds/
$(python) -m flake8 --config=setup.cfg pyBigKinds/
$(python) -m flake8 --config=setup.cfg pyBigKinds/ test/
$(python) -m pylint --rcfile=.pylintrc pyBigKinds/

test:
Expand Down
17 changes: 15 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,21 @@ pyBigKinds는 한국 언론의 빅데이터 저장소인 BigKinds에서 추출

## Usage

![](docs/example1.png)
![](docs/example2.png)
```python
import pyBigKinds as bk
bk.press_counter(df)
```

![](https://github.com/sorrychoe/pyBigKinds/blob/release/docs/example1.png)

```python
import pyBigKinds as bk
bk.keywords_wordcloud(df, "중앙일보")
```

![](https://github.com/sorrychoe/pyBigKinds/blob/release/docs/example2.png)


## License
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ isort>=5.7
matplotlib>=3.5.3
mlxtend>=0.22.0
openpyxl>=3.1.2
Pandas>=1.5.3
Pandas>=2
pre-commit>=2.13
pylint>=2.9
pytest>=7
Expand Down
8 changes: 5 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = pyBigKinds
version = 1.2.0
version = 1.2.5
description = BigKinds Data Analysis Toolkit for python
author = Sorrychoe
license = MIT
Expand All @@ -20,7 +20,7 @@ packages = find:
python_requires = >=3.8
install_requires =
matplotlib>=3.5.3
pandas>=1.5.3
pandas>=2
wordcloud>=1.8.2.2
scikit-learn>=1.0.0
tomotopy>=0.12.4
Expand All @@ -41,7 +41,9 @@ max-complexity = 18
select = B,C,E,F,W,T4,B9
extend-ignore = E203, W503
ignore = E203,E226,E251,E501,E722,F821,W503,W605
per-file-ignores = __init__.py:F401,F403,F405
per-file-ignores =
__init__.py:F401,F403,F405
test/*:F403,F405
exclude =

[isort]
Expand Down
Binary file modified test/test.xlsx
Binary file not shown.
8 changes: 7 additions & 1 deletion test/test_base.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
import pytest
# pylint: disable=F403
# pylint: disable=F405

import pandas as pd
import pytest

from pyBigKinds import *


@pytest.fixture(scope="module")
def dataframe():
df = pd.read_excel("test/test.xlsx")
return df


def test_header_remover(dataframe):
ans = header_remover(dataframe)
assert ans[0] == " 한반도 긴장 높인 북한의 군사정찰위성 발사 규탄한다"
Expand Down
8 changes: 5 additions & 3 deletions test/test_preprocessing.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import pytest
import pandas as pd
import numpy as np
import pandas as pd
import pytest

from pyBigKinds import *


@pytest.fixture(scope="module")
def dataframe():
df = pd.read_excel("test/test.xlsx")
Expand Down Expand Up @@ -33,7 +35,7 @@ def test_tfidf(dataframe):
def test_tfidf_vector(dataframe):
vector = tfidf_vector(dataframe)
assert type(vector) == np.ndarray
assert vector.shape == (31,2160)
assert vector.shape == (31, 2160)


def test_normalize_vector(dataframe):
Expand Down
17 changes: 10 additions & 7 deletions test/test_representation.py
Original file line number Diff line number Diff line change
@@ -1,57 +1,60 @@
from pyBigKinds import *

import numpy as np
import pandas as pd

import pytest

from pyBigKinds import *


@pytest.fixture(scope="module")
def vector():
df = pd.read_excel("test/test.xlsx")
vector = tfidf_vector(df)
return vector


@pytest.fixture(scope="module")
def dataframe():
df = pd.read_excel("test/test.xlsx")
return df


def test_press_counter(dataframe):
counter = press_counter(dataframe)
assert counter.columns[0] == '언론사'
assert counter.columns[1] == '기사'
assert counter['기사'].max() == counter['기사'][0]


def test_pca(vector):
pca_df = pca(vector)

assert pca_df.columns[0] == 'component 0'
assert pca_df.columns[1] == 'component 1'
assert pca_df.shape == (31,2)
assert pca_df.shape == (31, 2)


def test_nmf(vector):
nmf_df = nmf(vector)

assert nmf_df.columns[0] == 'component 0'
assert nmf_df.columns[1] == 'component 1'
assert nmf_df.shape == (31,2)
assert nmf_df.shape == (31, 2)


def test_t_sne(vector):
tsne_df = t_sne(vector, 100)

assert tsne_df.columns[0] == 'component 0'
assert tsne_df.columns[1] == 'component 1'
assert tsne_df.shape == (31,2)
assert tsne_df.shape == (31, 2)


def test_lsa(vector):
lsa_df = lsa(vector)

assert lsa_df.columns[0] == 'component 0'
assert lsa_df.columns[1] == 'component 1'
assert lsa_df.shape == (31,2)
assert lsa_df.shape == (31, 2)


def test_kmeans(vector):
Expand Down
5 changes: 4 additions & 1 deletion test/test_visualization.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
import pandas as pd
import pytest

from pyBigKinds import *

import pytest

@pytest.fixture(scope="module")
def vector():
df = pd.read_excel("test/test.xlsx")
vector = tfidf_vector(df)
return vector


def test_keywords_wordcloud(vector):
with pytest.raises(TypeError):
keywords_wordcloud(vector, "press")


def test_top_words(vector):
with pytest.raises(TypeError):
top_words(vector, "press")
Expand Down

0 comments on commit 9a9f18f

Please sign in to comment.