Skip to content
This repository was archived by the owner on May 5, 2023. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
*.pt
__pycache__
.vscode
15 changes: 15 additions & 0 deletions autocards.egg-info/requires.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
PyPDF2==1.26.0
beautifulsoup4==4.9.3
fastcore==1.4.2
huggingface_hub==0.6.0
nltk==3.5
pandas==1.2.3
protobuf==3.20.1
requests==2.24.0
sentencepiece==0.1.96
tika==1.24
torch==1.8.1
tqdm==4.55.1
transformers==4.19.1
epub-conversion==1.0.15
xml_cleaner==2.0.4
1 change: 1 addition & 0 deletions autocards/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = "0.0.1"
2 changes: 1 addition & 1 deletion autocards.py → autocards/autocards.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pipelines import qg_pipeline
from autocards.pipelines import qg_pipeline

from tqdm import tqdm
from pathlib import Path
Expand Down
39 changes: 39 additions & 0 deletions autocards/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from autocards.autocards import Autocards
from fastcore.script import *
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For clarity, I would prefer avoiding star import if you don't mind.


@call_parse
def main(
mode: Param(
"The type of data that will be autocarded",
str,
choices = ["pdf", "epub", "raw", "textfile", "url"]
),
output: Param(
"The method to output the file as",
str,
choices = ["anki", "csv", "json", "print"]
),
content: Param("The content to be autocarded", str),
):
"Accelerating learning through machine-generated flashcards."

auto = Autocards()
if mode == "pdf":
auto.consume_pdf(content)
elif mode == "epub":
auto.consume_epub(content)
elif mode == "raw":
auto.consume_var(content)
elif mode == "textfile":
auto.consume_textfile(content)
elif mode == "url":
auto.consume_web(content)

if output == "anki":
auto.to_anki(deckname="autocards_export", tags=[mode])
elif output == "csv":
auto.to_csv("output.csv", prefix="")
elif output == "json":
auto.to_json("output.json", prefix="")
elif output == "print":
auto.pprint(prefix='', jeopardy=False)
4 changes: 4 additions & 0 deletions pipelines.py → autocards/pipelines.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import itertools
import logging
import nltk

from typing import Optional, Dict, Union

from nltk import sent_tokenize
Expand Down Expand Up @@ -45,6 +47,8 @@ def __init__(
self.model_type = "t5"
else:
self.model_type = "bart"

nltk.download('punkt')

def __call__(self, inputs: str):
inputs = " ".join(inputs.split())
Expand Down
37 changes: 7 additions & 30 deletions examples_script/napoleon/napoleon.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,23 @@
#!/usr/bin/env python3


import sys
sys.path.append("../../.")
from autocards import Autocards
# import sys
# sys.path.append("../../.")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why comment it instead of removing it?

from autocards.autocards import Autocards
from pathlib import Path


prefix = "On Napoléon : "
file = Path("./napoleon.txt")
file = Path("/path/to/Autocards/examples_script/napoleon/napoleon.txt")

if not file.exists():
print("File not found!")
raise SystemExit()
else:
full_text = file.read_text()
full_text = file.read_text()[:1_000]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be best to reduce the size of the example text rather than silently reading only a portion don't you think ? Anyhow why do you feel this is necessary ? Is the text really that big ?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was doing it to get quick results since it is an example script. I think it would be easier for people understand the output. I agree, it would be better to reduce the example text than this since people might not look to closely at it.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not that big so I'll go ahead and just undo this change here and leave the file alone


auto = Autocards()
auto.clear()

sentence_list = full_text.split(".")
for i in sentence_list: # readds the final point
i = f"{i}.".strip()

output_file = Path(f"{file.parent}/output_file.txt")
output_file.touch()

print("Initialization complete.")

n = len(sentence_list)
for a in enumerate(sentence_list):
i = a[0] ; s = a[1]
print(s)
print(f"Progress : {i}/{n} ({round(i/n*100,1)}%)")
try :
auto.consume_text(s)
string = str('\"' + prefix + auto.qa_pairs[-1]['question'] + '\",\"' + auto.qa_pairs[-1]['answer'] + '\"' + "\n")
except IndexError:
print(f"Skipped sentence {s}")
string = str(f"\"Skipped sentence : \", \"{s}\n\"")
finally :
with open(output_file.absolute(), "a") as of:
of.write(string)
auto.consume_var(full_text)
auto.to_json("output.json", prefix="")
Copy link
Collaborator

@thiswillbeyourgithub thiswillbeyourgithub May 22, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that this a substantial change, can you confirrmed you have tested it ?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, I've tested it. The other way kept failing since it was made with an outdated API. I'm betting the other examples also don't work, but I haven't looked at those


auto.print(prefix)
50 changes: 50 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from setuptools import setup, find_packages

setup(
name = 'autocards',
packages = find_packages(exclude=[]),
include_package_data = True,
entry_points={
'console_scripts': [
'autocard = autocards.cli:main'
],
},
version = '0.0.1',
license='MIT',
description = 'Accelerating learning through machine-generated flashcards.',
author = 'Paul Bricman',
author_email = '[email protected]',
long_description_content_type = 'text/markdown',
url = 'https://github.com/paulbricman/autocards',
keywords = [
'artificial intelligence',
'deep learning',
'anki',
'flashcards',
'learning'
],
install_requires=[
'PyPDF2 == 1.26.0',
'beautifulsoup4 == 4.9.3',
'fastcore == 1.4.2',
'nltk == 3.5',
'pandas == 1.2.3',
'protobuf == 3.20.1',
'requests == 2.24.0',
'sentencepiece == 0.1.96',
'tika == 1.24',
'torch == 1.8.1',
'tqdm == 4.55.1',
'transformers == 4.19.1',
'epub-conversion == 1.0.15',
'xml_cleaner == 2.0.4',
'lxml'
],
classifiers=[
'Development Status :: 4 - Beta',
'Intended Audience :: Developers',
'Topic :: Scientific/Engineering :: Artificial Intelligence',
'License :: OSI Approved :: MIT License',
'Programming Language :: Python :: 3.6',
],
)