From d0024ca4f205517db0f9dc12dfd277d2424da911 Mon Sep 17 00:00:00 2001 From: Carmine DiMascio Date: Sun, 8 Dec 2019 15:24:01 -0500 Subject: [PATCH] Implement Spache Readability Scorer #5 --- CONTRIBUTING.md | 8 + README.md | 18 + readability/data/spache_easy.txt | 1064 +++++++++++++++++++ readability/data/spache_easy_porterstem.txt | 1064 +++++++++++++++++++ readability/data/{fix.py => stem.py} | 2 +- readability/readability.py | 6 +- readability/scorers/__init__.py | 1 + readability/scorers/spache.py | 35 + readability/text/analyzer.py | 20 + test/test_readability.py | 6 + 10 files changed, 2222 insertions(+), 2 deletions(-) create mode 100644 readability/data/spache_easy.txt create mode 100644 readability/data/spache_easy_porterstem.txt rename readability/data/{fix.py => stem.py} (91%) create mode 100644 readability/scorers/spache.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1ff9e80..7c69947 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -7,6 +7,14 @@ Need help? Reach out on [gitter](https://gitter.im/cdimascio-oss/community) +### Prequisites + +1. Install deps +``` +pip install . +python -m nltk.downloader punkt +``` + ### Code 1. Fork the repo diff --git a/README.md b/README.md index adaacca..64e8939 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ r.dale_chall() r.ari() r.linsear_write() r.smog() +r.spache() ``` **\*Note:** `text` must contain >= 100 words\* @@ -47,6 +48,7 @@ r.smog() - [Coleman Liau Index](#coleman-liau-index) - [Gunning Fog](#gunning-fog) - [SMOG](#smog) +- [Spache](#spache) - [Linsear Write](#linsear-write) ## Readability Metric Details and Properties @@ -187,6 +189,22 @@ print(s.score) print(s.grade_level) ``` +The Spache Readability Formula is used for Primary-Grade Reading Materials, published in 1953 in The Elementary School Journal. The Spache Formula is best used to calculate the difficulty of text that falls at the 3rd grade level or below. + +**_call:_** + +```python +r.spache() +``` + +**_example:_** + +```python +s = r.spache() +print(s.score) +print(s.grade_level) +``` + ### Linsear Write Linsear Write is a readability metric for English text, purportedly developed for the United States Air Force to help them calculate the readability of their technical manuals. diff --git a/readability/data/spache_easy.txt b/readability/data/spache_easy.txt new file mode 100644 index 0000000..5d264d6 --- /dev/null +++ b/readability/data/spache_easy.txt @@ -0,0 +1,1064 @@ +a +able +about +above +across +act +add +afraid +after +afternoon +again +against +ago +air +airplane +alarm +all +almost +alone +along +already +also +always +am +among +an +and +angry +animal +another +answer +any +anyone +appear +apple +are +arm +around +arrow +as +ask +asleep +at +ate +attention +aunt +awake +away +b +baby +back +bad +bag +ball +balloon +bang +bank +bark +barn +basket +be +bean +bear +beat +beautiful +became +because +become +bed +bee +been +before +began +begin +behind +believe +bell +belong +bend +bent +beside +best +better +between +big +bird +birthday +bit +bite +black +blanket +blew +block +blow +blue +board +boat +book +boot +born +borrow +both +bother +bottle +bottom +bought +bow +box +boy +branch +brave +bread +break +breakfast +breath +brick +bridge +bright +bring +broke +broken +brother +brought +brown +brush +build +bump +burn +bus +busy +but +butter +button +buy +by +c +cabin +cage +cake +call +came +camp +can +candle +candy +can\t +cap +captain +car +card +care +careful +carrot +carry +case +castle +cat +catch +cattle +caught +cause +cent +certain +chair +chance +change +chase +chicken +chief +child +children +church +circle +circus +city +clap +clean +clever +cliff +climb +clock +close +cloth +clothes +clown +coat +cold +color +come +comfortable +company +contest +continue +cook +cool +corner +could +count +country +course +cover +cow +crawl +cream +cry +cup +curtain +cut +d +Dad +dance +danger +dangerous +dark +dash +daughter +day +dear +decide +deep +desk +did +didn\t +die +different +dig +dinner +direction +disappear +disappoint +discover +distance +do +doctor +does +dog +dollar +done +don\t +door +down +dragon +dream +dress +drink +drive +drop +drove +dry +duck +during +dust +e +each +eager +ear +early +earn +earth +easy +eat +edge +egg +eight +eighteen +either +elephant +else +empty +end +enemy +enough +enter +even +ever +every +everything +exact +except +excite +exclaim +explain +eye +face +fact +fair +fall +family +far +farm +farmer +farther +fast +fat +father +feather +feed +feel +feet +fell +fellow +felt +fence +few +field +fierce +fight +figure +fill +final +find +fine +finger +finish +fire +first +fish +five +flag +flash +flat +flew +floor +flower +fly +follow +food +for +forest +forget +forth +found +four +fourth +fox +fresh +friend +frighten +frog +from +front +fruit +full +fun +funny +fur +g +game +garden +gasp +gate +gave +get +giant +gift +girl +give +glad +glass +go +goat +gone +good +got +grandfather +grandmother +grass +gray +great +green +grew +grin +ground +group +grow +growl +guess +gun +h +had +hair +half +hall +hand +handle +hang +happen +happiness +happy +hard +harm +has +hat +hate +have +he +head +hear +heard +heavy +held +hello +help +hen +her +here +herself +he\s +hid +hide +high +hill +him +himself +his +hit +hold +hole +holiday +home +honey +hop +horn +horse +hot +hour +house +how +howl +hum +hundred +hung +hungry +hunt +hurry +hurt +husband +i +I +ice +idea +if +I\ll +I\m +imagine +important +in +inch +indeed +inside +instead +into +invite +is +it +it\s +its +j +jacket +jar +jet +job +join +joke +joy +jump +just +k +keep +kept +key +kick +kill +kind +king +kitchen +kitten +knee +knew +knock +know +l +ladder +lady +laid +lake +land +large +last +late +laugh +lay +lazy +lead +leap +learn +least +leave +left +leg +less +let +let\s +letter +lick +lift +light +like +line +lion +list +listen +little +live +load +long +look +lost +lot +loud +love +low +luck +lump +lunch +m +machine +made +magic +mail +make +man +many +march +mark +market +master +matter +may +maybe +me +mean +meant +meat +meet +melt +men +merry +met +middle +might +mile +milk +milkman +mind +mine +minute +miss +mistake +moment +money +monkey +month +more +morning +most +mother +mountain +mouse +mouth +move +much +mud +music +must +my +n +name +near +neck +need +needle +neighbor +neighborhood +nest +never +new +next +nibble +nice +night +nine +no +nod +noise +none +north +nose +not +note +nothing +notice +now +number +o +ocean +of +off +offer +often +oh +old +on +once +one +only +open +or +orange +order +other +our +out +outside +over +owl +own +p +pack +paid +pail +paint +pair +palace +pan +paper +parade +parent +park +part +party +pass +past +pasture +path +paw +pay +peanut +peek +pen +penny +people +perfect +perhaps +person +pet +pick +picnic +picture +pie +piece +pig +pile +pin +place +plan +plant +play +pleasant +please +plenty +plow +picket +point +poke +pole +policeman +pond +poor +pop +postman +pot +potato +pound +pour +practice +prepare +present +pretend +pretty +princess +prize +probably +problem +promise +protect +proud +puff +pull +puppy +push +put +q +queen +queer +quick +quiet +quite +r +rabbit +raccoon +race +radio +rag +rain +raise +ran +ranch +rang +reach +read +ready +real +red +refuse +remember +reply +rest +return +reward +rich +ride +right +ring +river +road +roar +rock +rode +roll +roof +room +rope +round +row +rub +rule +run +rush +s +sad +safe +said +sail +sale +salt +same +sand +sang +sat +save +saw +say +scare +school +scold +scratch +scream +sea +seat +second +secret +see +seed +seem +seen +sell +send +sent +seven +several +sew +shadow +shake +shall +shape +she +sheep +shell +shine +ship +shoe +shone +shook +shoot +shop +shore +short +shot +should +show +sick +side +sight +sign +signal +silent +silly +silver +since +sing +sister +sit +six +size +skip +sky +sled +sleep +slid +slide +slow +small +smart +smell +smile +smoke +snap +sniff +snow +so +soft +sold +some +something +sometimes +son +song +soon +sorry +sound +speak +special +spend +spill +splash +spoke +spot +spread +spring +squirrel +stand +star +start +station +stay +step +stick +still +stone +stood +stop +store +story +straight +strange +street +stretch +strike +strong +such +sudden +sugar +suit +summer +sun +supper +suppose +sure +surprise +swallow +sweet +swim +swing +t +table +tail +take +talk +tall +tap +taste +teach +teacher +team +tear +teeth +telephone +tell +ten +tent +than +thank +that +that\s +the +their +them +then +there +these +they +thick +thin +thing +think +third +this +those +though +thought +three +threw +through +throw +tie +tiger +tight +time +tiny +tip +tire +to +today +toe +together +told +tomorrow +too +took +tooth +top +touch +toward +tower +town +toy +track +traffic +train +trap +tree +trick +trip +trot +truck +true +trunk +try +turkey +turn +turtle +twelve +twin +two +u +ugly +uncle +under +unhappy +until +up +upon +upstairs +us +use +usual +v +valley +vegetable +very +village +visit +voice +w +wag +wagon +wait +wake +walk +want +war +warm +was +wash +waste +watch +water +wave +way +we +wear +weather +week +well +went +were +wet +what +wheel +when +where +which +while +whisper +whistle +white +who +whole +whose +why +wide +wife +will +win +wind +window +wing +wink +winter +wire +wise +wish +with +without +woke +wolf +woman +women +wonder +won\t +wood +word +wore +work +world +worm +worry +worth +would +wrong +x +y +yard +year +yell +yellow +yes +yet +you +young +your +z +zoo \ No newline at end of file diff --git a/readability/data/spache_easy_porterstem.txt b/readability/data/spache_easy_porterstem.txt new file mode 100644 index 0000000..9212ba1 --- /dev/null +++ b/readability/data/spache_easy_porterstem.txt @@ -0,0 +1,1064 @@ +a +abl +about +abov +across +act +add +afraid +after +afternoon +again +against +ago +air +airplan +alarm +all +almost +alon +along +alreadi +also +alway +am +among +an +and +angri +anim +anoth +answer +ani +anyon +appear +appl +are +arm +around +arrow +as +ask +asleep +at +ate +attent +aunt +awak +away +b +babi +back +bad +bag +ball +balloon +bang +bank +bark +barn +basket +be +bean +bear +beat +beauti +becam +becaus +becom +bed +bee +been +befor +began +begin +behind +believ +bell +belong +bend +bent +besid +best +better +between +big +bird +birthday +bit +bite +black +blanket +blew +block +blow +blue +board +boat +book +boot +born +borrow +both +bother +bottl +bottom +bought +bow +box +boy +branch +brave +bread +break +breakfast +breath +brick +bridg +bright +bring +broke +broken +brother +brought +brown +brush +build +bump +burn +bu +busi +but +butter +button +buy +by +c +cabin +cage +cake +call +came +camp +can +candl +candi +can\t +cap +captain +car +card +care +care +carrot +carri +case +castl +cat +catch +cattl +caught +caus +cent +certain +chair +chanc +chang +chase +chicken +chief +child +children +church +circl +circu +citi +clap +clean +clever +cliff +climb +clock +close +cloth +cloth +clown +coat +cold +color +come +comfort +compani +contest +continu +cook +cool +corner +could +count +countri +cours +cover +cow +crawl +cream +cri +cup +curtain +cut +d +dad +danc +danger +danger +dark +dash +daughter +day +dear +decid +deep +desk +did +didn\t +die +differ +dig +dinner +direct +disappear +disappoint +discov +distanc +do +doctor +doe +dog +dollar +done +don\t +door +down +dragon +dream +dress +drink +drive +drop +drove +dri +duck +dure +dust +e +each +eager +ear +earli +earn +earth +easi +eat +edg +egg +eight +eighteen +either +eleph +els +empti +end +enemi +enough +enter +even +ever +everi +everyth +exact +except +excit +exclaim +explain +eye +face +fact +fair +fall +famili +far +farm +farmer +farther +fast +fat +father +feather +feed +feel +feet +fell +fellow +felt +fenc +few +field +fierc +fight +figur +fill +final +find +fine +finger +finish +fire +first +fish +five +flag +flash +flat +flew +floor +flower +fli +follow +food +for +forest +forget +forth +found +four +fourth +fox +fresh +friend +frighten +frog +from +front +fruit +full +fun +funni +fur +g +game +garden +gasp +gate +gave +get +giant +gift +girl +give +glad +glass +go +goat +gone +good +got +grandfath +grandmoth +grass +gray +great +green +grew +grin +ground +group +grow +growl +guess +gun +h +had +hair +half +hall +hand +handl +hang +happen +happi +happi +hard +harm +ha +hat +hate +have +he +head +hear +heard +heavi +held +hello +help +hen +her +here +herself +he\ +hid +hide +high +hill +him +himself +hi +hit +hold +hole +holiday +home +honey +hop +horn +hors +hot +hour +hous +how +howl +hum +hundr +hung +hungri +hunt +hurri +hurt +husband +i +I +ice +idea +if +i\ll +i\m +imagin +import +in +inch +inde +insid +instead +into +invit +is +it +it\ +it +j +jacket +jar +jet +job +join +joke +joy +jump +just +k +keep +kept +key +kick +kill +kind +king +kitchen +kitten +knee +knew +knock +know +l +ladder +ladi +laid +lake +land +larg +last +late +laugh +lay +lazi +lead +leap +learn +least +leav +left +leg +less +let +let\ +letter +lick +lift +light +like +line +lion +list +listen +littl +live +load +long +look +lost +lot +loud +love +low +luck +lump +lunch +m +machin +made +magic +mail +make +man +mani +march +mark +market +master +matter +may +mayb +me +mean +meant +meat +meet +melt +men +merri +met +middl +might +mile +milk +milkman +mind +mine +minut +miss +mistak +moment +money +monkey +month +more +morn +most +mother +mountain +mous +mouth +move +much +mud +music +must +my +n +name +near +neck +need +needl +neighbor +neighborhood +nest +never +new +next +nibbl +nice +night +nine +no +nod +nois +none +north +nose +not +note +noth +notic +now +number +o +ocean +of +off +offer +often +oh +old +on +onc +one +onli +open +or +orang +order +other +our +out +outsid +over +owl +own +p +pack +paid +pail +paint +pair +palac +pan +paper +parad +parent +park +part +parti +pass +past +pastur +path +paw +pay +peanut +peek +pen +penni +peopl +perfect +perhap +person +pet +pick +picnic +pictur +pie +piec +pig +pile +pin +place +plan +plant +play +pleasant +pleas +plenti +plow +picket +point +poke +pole +policeman +pond +poor +pop +postman +pot +potato +pound +pour +practic +prepar +present +pretend +pretti +princess +prize +probabl +problem +promis +protect +proud +puff +pull +puppi +push +put +q +queen +queer +quick +quiet +quit +r +rabbit +raccoon +race +radio +rag +rain +rais +ran +ranch +rang +reach +read +readi +real +red +refus +rememb +repli +rest +return +reward +rich +ride +right +ring +river +road +roar +rock +rode +roll +roof +room +rope +round +row +rub +rule +run +rush +s +sad +safe +said +sail +sale +salt +same +sand +sang +sat +save +saw +say +scare +school +scold +scratch +scream +sea +seat +second +secret +see +seed +seem +seen +sell +send +sent +seven +sever +sew +shadow +shake +shall +shape +she +sheep +shell +shine +ship +shoe +shone +shook +shoot +shop +shore +short +shot +should +show +sick +side +sight +sign +signal +silent +silli +silver +sinc +sing +sister +sit +six +size +skip +sky +sled +sleep +slid +slide +slow +small +smart +smell +smile +smoke +snap +sniff +snow +so +soft +sold +some +someth +sometim +son +song +soon +sorri +sound +speak +special +spend +spill +splash +spoke +spot +spread +spring +squirrel +stand +star +start +station +stay +step +stick +still +stone +stood +stop +store +stori +straight +strang +street +stretch +strike +strong +such +sudden +sugar +suit +summer +sun +supper +suppos +sure +surpris +swallow +sweet +swim +swing +t +tabl +tail +take +talk +tall +tap +tast +teach +teacher +team +tear +teeth +telephon +tell +ten +tent +than +thank +that +that\ +the +their +them +then +there +these +they +thick +thin +thing +think +third +thi +those +though +thought +three +threw +through +throw +tie +tiger +tight +time +tini +tip +tire +to +today +toe +togeth +told +tomorrow +too +took +tooth +top +touch +toward +tower +town +toy +track +traffic +train +trap +tree +trick +trip +trot +truck +true +trunk +tri +turkey +turn +turtl +twelv +twin +two +u +ugli +uncl +under +unhappi +until +up +upon +upstair +us +use +usual +v +valley +veget +veri +villag +visit +voic +w +wag +wagon +wait +wake +walk +want +war +warm +wa +wash +wast +watch +water +wave +way +we +wear +weather +week +well +went +were +wet +what +wheel +when +where +which +while +whisper +whistl +white +who +whole +whose +whi +wide +wife +will +win +wind +window +wing +wink +winter +wire +wise +wish +with +without +woke +wolf +woman +women +wonder +won\t +wood +word +wore +work +world +worm +worri +worth +would +wrong +x +y +yard +year +yell +yellow +ye +yet +you +young +your +z +zoo diff --git a/readability/data/fix.py b/readability/data/stem.py similarity index 91% rename from readability/data/fix.py rename to readability/data/stem.py index 23824e3..d65e66b 100644 --- a/readability/data/fix.py +++ b/readability/data/stem.py @@ -3,7 +3,7 @@ porter_stemmer = PorterStemmer() -file = 'dale_chall_easy.txt' +file = 'spache_easy.txt' cur_path = os.path.dirname(os.path.realpath(__file__)) dale_chall_path = os.path.join(cur_path, file) words = None diff --git a/readability/readability.py b/readability/readability.py index f29c7f5..dbdda6e 100644 --- a/readability/readability.py +++ b/readability/readability.py @@ -1,6 +1,6 @@ from .text import Analyzer from .scorers import ARI, ColemanLiau, DaleChall, Flesch, \ - FleschKincaid, GunningFog, LinsearWrite, Smog + FleschKincaid, GunningFog, LinsearWrite, Smog, Spache class Readability: @@ -40,6 +40,10 @@ def smog(self): """SMOG Index.""" return Smog(self._statistics, self._analyzer.sentences).score() + def spache(self): + """Spache Index.""" + return Spache(self._statistics).score() + def statistics(self): return { 'num_letters': self._statistics.num_letters, diff --git a/readability/scorers/__init__.py b/readability/scorers/__init__.py index da4e187..df708e8 100644 --- a/readability/scorers/__init__.py +++ b/readability/scorers/__init__.py @@ -7,3 +7,4 @@ from .ari import ARI from .linsear_write import LinsearWrite from .smog import Smog +from .spache import Spache diff --git a/readability/scorers/spache.py b/readability/scorers/spache.py new file mode 100644 index 0000000..0f9a2c4 --- /dev/null +++ b/readability/scorers/spache.py @@ -0,0 +1,35 @@ +from readability.exceptions import ReadabilityException + + +class Result: + def __init__(self, score, grade_level): + self.score = score + self.grade_level = grade_level + + def __str__(self): + return "score: {}, grade_level: '{}'". \ + format(self.score, self.grade_level) + + +class Spache: + def __init__(self, stats): + self._stats = stats + if stats.num_words < 100: + raise ReadabilityException('100 words required.') + + def score(self): + score = self._score() + return Result( + score=score, + grade_level=self._grade_level(score)) + + def _score(self): + stats = self._stats + avg_sentence_len = stats.num_words / stats.num_sentences + percent_difficult_words = \ + stats.num_spache_complex / stats.num_words * 100 + + return (0.141 * avg_sentence_len) + (0.086 * percent_difficult_words) + 0.839 + + def _grade_level(self, score): + return str(round(score)) diff --git a/readability/text/analyzer.py b/readability/text/analyzer.py index b04b8b6..d90789d 100644 --- a/readability/text/analyzer.py +++ b/readability/text/analyzer.py @@ -37,6 +37,10 @@ def num_gunning_complex(self): def num_dale_chall_complex(self): return self.stats['num_dale_chall_complex'] + @property + def num_spache_complex(self): + return self.stats['num_spache_complex'] + @property def avg_words_per_sentence(self): return self.num_words / self.num_sentences @@ -57,6 +61,7 @@ def __init__(self): def analyze(self, text): self._dale_chall_set = self._load_dale_chall() + self._spache_set = self._load_spache() stats = self._statistics(text) self.sentences = stats['sentences'] # hack for smog return AnalyzerStatistics(stats) @@ -80,6 +85,7 @@ def _statistics(self, text): letters_count = 0 gunning_complex_count = 0 dale_chall_complex_count = 0 + spache_complex_count = 0 porter_stemmer = PorterStemmer() def is_gunning_complex(t, syllable_count): @@ -91,6 +97,10 @@ def is_dale_chall_complex(t): stem = porter_stemmer.stem(t.lower()) return stem not in self._dale_chall_set + def is_spache_complex(t): + stem = porter_stemmer.stem(t.lower()) + return stem not in self._spache_set + for t in tokens: if not self._is_punctuation(t): @@ -104,6 +114,8 @@ def is_dale_chall_complex(t): else 0 dale_chall_complex_count += \ 1 if is_dale_chall_complex(t) else 0 + spache_complex_count += \ + 1 if is_spache_complex(t) else 0 sentences = self._tokenize_sentences(text) sentence_count = len(sentences) @@ -116,6 +128,7 @@ def is_dale_chall_complex(t): 'num_letters': letters_count, 'num_gunning_complex': gunning_complex_count, 'num_dale_chall_complex': dale_chall_complex_count, + 'num_spache_complex': spache_complex_count, 'sentences': sentences, } @@ -133,3 +146,10 @@ def _load_dale_chall(self): dale_chall_path = os.path.join(cur_path, '..', 'data', file) with open(dale_chall_path) as f: return set(line.strip() for line in f) + + def _load_spache(self): + file = 'spache_easy_porterstem.txt' + cur_path = os.path.dirname(os.path.realpath(__file__)) + spache_path = os.path.join(cur_path, '..', 'data', file) + with open(spache_path) as f: + return set(line.strip() for line in f) diff --git a/test/test_readability.py b/test/test_readability.py index ef3f0d8..62e7201 100644 --- a/test/test_readability.py +++ b/test/test_readability.py @@ -68,6 +68,12 @@ def test_smog(self): self.assertEqual(12.516099999999998, r.score) self.assertEqual('13', r.grade_level) + def test_spache(self): + r = self.readability.spache() + print(r) + self.assertEqual(7.164945054945054, r.score) + self.assertEqual('7', r.grade_level) + def test_print_stats(self): stats = self.readability.statistics() self.assertEqual(562, stats['num_letters'])