-
-
Notifications
You must be signed in to change notification settings - Fork 19
Make into proper lib add additional dependencies and CLI #32
base: master
Are you sure you want to change the base?
Changes from all commits
3dcc156
77f5041
e69238b
1153e37
cc119c5
4bb3fbf
f5437a9
12c5593
ef2bea1
c25e21e
92f0a60
b13c0da
f4fa5d9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,2 +1,3 @@ | ||
| *.pt | ||
| __pycache__ | ||
| .vscode |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| PyPDF2==1.26.0 | ||
| beautifulsoup4==4.9.3 | ||
| fastcore==1.4.2 | ||
| huggingface_hub==0.6.0 | ||
| nltk==3.5 | ||
| pandas==1.2.3 | ||
| protobuf==3.20.1 | ||
| requests==2.24.0 | ||
| sentencepiece==0.1.96 | ||
| tika==1.24 | ||
| torch==1.8.1 | ||
| tqdm==4.55.1 | ||
| transformers==4.19.1 | ||
| epub-conversion==1.0.15 | ||
| xml_cleaner==2.0.4 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| __version__ = "0.0.1" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,39 @@ | ||
| from autocards.autocards import Autocards | ||
| from fastcore.script import * | ||
|
|
||
| @call_parse | ||
| def main( | ||
| mode: Param( | ||
| "The type of data that will be autocarded", | ||
| str, | ||
| choices = ["pdf", "epub", "raw", "textfile", "url"] | ||
| ), | ||
| output: Param( | ||
| "The method to output the file as", | ||
| str, | ||
| choices = ["anki", "csv", "json", "print"] | ||
| ), | ||
| content: Param("The content to be autocarded", str), | ||
| ): | ||
| "Accelerating learning through machine-generated flashcards." | ||
|
|
||
| auto = Autocards() | ||
| if mode == "pdf": | ||
| auto.consume_pdf(content) | ||
| elif mode == "epub": | ||
| auto.consume_epub(content) | ||
| elif mode == "raw": | ||
| auto.consume_var(content) | ||
| elif mode == "textfile": | ||
| auto.consume_textfile(content) | ||
| elif mode == "url": | ||
| auto.consume_web(content) | ||
|
|
||
| if output == "anki": | ||
| auto.to_anki(deckname="autocards_export", tags=[mode]) | ||
| elif output == "csv": | ||
| auto.to_csv("output.csv", prefix="") | ||
| elif output == "json": | ||
| auto.to_json("output.json", prefix="") | ||
| elif output == "print": | ||
| auto.pprint(prefix='', jeopardy=False) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,46 +1,23 @@ | ||
| #!/usr/bin/env python3 | ||
|
|
||
|
|
||
| import sys | ||
| sys.path.append("../../.") | ||
| from autocards import Autocards | ||
| # import sys | ||
| # sys.path.append("../../.") | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why comment it instead of removing it? |
||
| from autocards.autocards import Autocards | ||
| from pathlib import Path | ||
|
|
||
|
|
||
| prefix = "On Napoléon : " | ||
| file = Path("./napoleon.txt") | ||
| file = Path("/path/to/Autocards/examples_script/napoleon/napoleon.txt") | ||
ncoop57 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| if not file.exists(): | ||
| print("File not found!") | ||
| raise SystemExit() | ||
| else: | ||
| full_text = file.read_text() | ||
| full_text = file.read_text()[:1_000] | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it would be best to reduce the size of the example text rather than silently reading only a portion don't you think ? Anyhow why do you feel this is necessary ? Is the text really that big ?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was doing it to get quick results since it is an example script. I think it would be easier for people understand the output. I agree, it would be better to reduce the example text than this since people might not look to closely at it.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not that big so I'll go ahead and just undo this change here and leave the file alone |
||
|
|
||
| auto = Autocards() | ||
| auto.clear() | ||
|
|
||
| sentence_list = full_text.split(".") | ||
| for i in sentence_list: # readds the final point | ||
| i = f"{i}.".strip() | ||
|
|
||
| output_file = Path(f"{file.parent}/output_file.txt") | ||
| output_file.touch() | ||
|
|
||
| print("Initialization complete.") | ||
|
|
||
| n = len(sentence_list) | ||
| for a in enumerate(sentence_list): | ||
| i = a[0] ; s = a[1] | ||
| print(s) | ||
| print(f"Progress : {i}/{n} ({round(i/n*100,1)}%)") | ||
| try : | ||
| auto.consume_text(s) | ||
| string = str('\"' + prefix + auto.qa_pairs[-1]['question'] + '\",\"' + auto.qa_pairs[-1]['answer'] + '\"' + "\n") | ||
| except IndexError: | ||
| print(f"Skipped sentence {s}") | ||
| string = str(f"\"Skipped sentence : \", \"{s}\n\"") | ||
| finally : | ||
| with open(output_file.absolute(), "a") as of: | ||
| of.write(string) | ||
| auto.consume_var(full_text) | ||
| auto.to_json("output.json", prefix="") | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given that this a substantial change, can you confirrmed you have tested it ?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, I've tested it. The other way kept failing since it was made with an outdated API. I'm betting the other examples also don't work, but I haven't looked at those |
||
|
|
||
| auto.print(prefix) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
| from setuptools import setup, find_packages | ||
|
|
||
| setup( | ||
| name = 'autocards', | ||
| packages = find_packages(exclude=[]), | ||
| include_package_data = True, | ||
| entry_points={ | ||
| 'console_scripts': [ | ||
| 'autocard = autocards.cli:main' | ||
| ], | ||
| }, | ||
| version = '0.0.1', | ||
| license='MIT', | ||
| description = 'Accelerating learning through machine-generated flashcards.', | ||
| author = 'Paul Bricman', | ||
| author_email = '[email protected]', | ||
| long_description_content_type = 'text/markdown', | ||
| url = 'https://github.com/paulbricman/autocards', | ||
| keywords = [ | ||
| 'artificial intelligence', | ||
| 'deep learning', | ||
| 'anki', | ||
| 'flashcards', | ||
| 'learning' | ||
| ], | ||
| install_requires=[ | ||
| 'PyPDF2 == 1.26.0', | ||
| 'beautifulsoup4 == 4.9.3', | ||
| 'fastcore == 1.4.2', | ||
| 'nltk == 3.5', | ||
| 'pandas == 1.2.3', | ||
| 'protobuf == 3.20.1', | ||
| 'requests == 2.24.0', | ||
| 'sentencepiece == 0.1.96', | ||
| 'tika == 1.24', | ||
| 'torch == 1.8.1', | ||
| 'tqdm == 4.55.1', | ||
| 'transformers == 4.19.1', | ||
| 'epub-conversion == 1.0.15', | ||
| 'xml_cleaner == 2.0.4', | ||
| 'lxml' | ||
| ], | ||
| classifiers=[ | ||
| 'Development Status :: 4 - Beta', | ||
| 'Intended Audience :: Developers', | ||
| 'Topic :: Scientific/Engineering :: Artificial Intelligence', | ||
| 'License :: OSI Approved :: MIT License', | ||
| 'Programming Language :: Python :: 3.6', | ||
| ], | ||
| ) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For clarity, I would prefer avoiding star import if you don't mind.