paulbricman · ncoop57 · May 14, 2022 · May 14, 2022 · May 14, 2022 · May 14, 2022
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 *.pt
 __pycache__
+.vscode
diff --git a/autocards.egg-info/requires.txt b/autocards.egg-info/requires.txt
@@ -0,0 +1,15 @@
+PyPDF2==1.26.0
+beautifulsoup4==4.9.3
+fastcore==1.4.2
+huggingface_hub==0.6.0
+nltk==3.5
+pandas==1.2.3
+protobuf==3.20.1
+requests==2.24.0
+sentencepiece==0.1.96
+tika==1.24
+torch==1.8.1
+tqdm==4.55.1
+transformers==4.19.1
+epub-conversion==1.0.15
+xml_cleaner==2.0.4
diff --git a/autocards/__init__.py b/autocards/__init__.py
@@ -0,0 +1 @@
+__version__ = "0.0.1"
diff --git a/autocards.py → autocards/autocards.py b/autocards.py → autocards/autocards.py
@@ -1,4 +1,4 @@
-from pipelines import qg_pipeline
+from autocards.pipelines import qg_pipeline
 
 from tqdm import tqdm
 from pathlib import Path

diff --git a/autocards/cli.py b/autocards/cli.py
@@ -0,0 +1,39 @@
+from autocards.autocards import Autocards
+from fastcore.script import *
+
+@call_parse
+def main(
+    mode: Param(
+        "The type of data that will be autocarded",
+        str,
+        choices = ["pdf", "epub", "raw", "textfile", "url"]
+    ),
+    output: Param(
+        "The method to output the file as",
+        str,
+        choices = ["anki", "csv", "json", "print"]
+    ),
+    content: Param("The content to be autocarded", str),
+):
+    "Accelerating learning through machine-generated flashcards."
+
+    auto = Autocards()
+    if mode == "pdf":
+        auto.consume_pdf(content)
+    elif mode == "epub":
+        auto.consume_epub(content)
+    elif mode == "raw":
+        auto.consume_var(content)
+    elif mode == "textfile":
+        auto.consume_textfile(content)
+    elif mode == "url":
+        auto.consume_web(content)
+
+    if output == "anki":
+        auto.to_anki(deckname="autocards_export", tags=[mode])
+    elif output == "csv":
+        auto.to_csv("output.csv", prefix="")
+    elif output == "json":
+        auto.to_json("output.json", prefix="")
+    elif output == "print":
+        auto.pprint(prefix='', jeopardy=False)
diff --git a/pipelines.py → autocards/pipelines.py b/pipelines.py → autocards/pipelines.py
@@ -1,5 +1,7 @@
 import itertools
 import logging
+import nltk
+
 from typing import Optional, Dict, Union
 
 from nltk import sent_tokenize
@@ -45,6 +47,8 @@ def __init__(
             self.model_type = "t5"
         else:
             self.model_type = "bart"
+
+        nltk.download('punkt')
 
     def __call__(self, inputs: str):
         inputs = " ".join(inputs.split())

diff --git a/examples_script/napoleon/napoleon.py b/examples_script/napoleon/napoleon.py
@@ -1,46 +1,23 @@
 #!/usr/bin/env python3
 
 
-import sys
-sys.path.append("../../.")
-from autocards import Autocards
+# import sys
+# sys.path.append("../../.")
+from autocards.autocards import Autocards
 from pathlib import Path
 
 
 prefix = "On Napoléon : "
-file = Path("./napoleon.txt")
+file = Path("/path/to/Autocards/examples_script/napoleon/napoleon.txt")
 
 if not file.exists():
     print("File not found!")
     raise SystemExit()
 else:
-    full_text = file.read_text()
+    full_text = file.read_text()[:1_000]
 
 auto = Autocards()
-auto.clear()
-
-sentence_list = full_text.split(".")
-for i in sentence_list:  # readds the final point
-    i = f"{i}.".strip()
-
-output_file = Path(f"{file.parent}/output_file.txt")
-output_file.touch()
-
-print("Initialization complete.")
-
-n = len(sentence_list)
-for a in enumerate(sentence_list):
-    i = a[0] ; s = a[1]
-    print(s)
-    print(f"Progress : {i}/{n} ({round(i/n*100,1)}%)")
-    try :
-        auto.consume_text(s)
-        string = str('\"' + prefix + auto.qa_pairs[-1]['question'] + '\",\"' + auto.qa_pairs[-1]['answer'] + '\"' + "\n")
-    except IndexError:
-        print(f"Skipped sentence {s}")
-        string = str(f"\"Skipped sentence : \", \"{s}\n\"")
-    finally :
-        with open(output_file.absolute(), "a") as of:
-            of.write(string)
+auto.consume_var(full_text)
+auto.to_json("output.json", prefix="")
 
 auto.print(prefix)
diff --git a/setup.py b/setup.py
@@ -0,0 +1,50 @@
+from setuptools import setup, find_packages
+
+setup(
+  name = 'autocards',
+  packages = find_packages(exclude=[]),
+  include_package_data = True,
+  entry_points={
+    'console_scripts': [
+      'autocard = autocards.cli:main'
+    ],
+  },
+  version = '0.0.1',
+  license='MIT',
+  description = 'Accelerating learning through machine-generated flashcards.',
+  author = 'Paul Bricman',
+  author_email = '[email protected]',
+  long_description_content_type = 'text/markdown',
+  url = 'https://github.com/paulbricman/autocards',
+  keywords = [
+    'artificial intelligence',
+    'deep learning',
+    'anki',
+    'flashcards',
+    'learning'
+  ],
+  install_requires=[
+    'PyPDF2 == 1.26.0',
+    'beautifulsoup4 == 4.9.3',
+    'fastcore == 1.4.2',
+    'nltk == 3.5',
+    'pandas == 1.2.3',
+    'protobuf == 3.20.1',
+    'requests == 2.24.0',
+    'sentencepiece == 0.1.96',
+    'tika == 1.24',
+    'torch == 1.8.1',
+    'tqdm == 4.55.1',
+    'transformers == 4.19.1',
+    'epub-conversion == 1.0.15',
+    'xml_cleaner == 2.0.4',
+    'lxml'
+  ],
+  classifiers=[
+    'Development Status :: 4 - Beta',
+    'Intended Audience :: Developers',
+    'Topic :: Scientific/Engineering :: Artificial Intelligence',
+    'License :: OSI Approved :: MIT License',
+    'Programming Language :: Python :: 3.6',
+  ],
+)