import
spacy
from
textstat.textstat
import
textstatistics,legacy_round
def
break_sentences(text):
nlp
=
spacy.load(
'en_core_web_sm'
)
doc
=
nlp(text)
return
list
(doc.sents)
def
word_count(text):
sentences
=
break_sentences(text)
words
=
0
for
sentence
in
sentences:
words
+
=
len
([token
for
token
in
sentence])
return
words
def
sentence_count(text):
sentences
=
break_sentences(text)
return
len
(sentences)
def
avg_sentence_length(text):
words
=
word_count(text)
sentences
=
sentence_count(text)
average_sentence_length
=
float
(words
/
sentences)
return
average_sentence_length
def
syllables_count(word):
return
textstatistics().syllable_count(word)
def
avg_syllables_per_word(text):
syllable
=
syllables_count(text)
words
=
word_count(text)
ASPW
=
float
(syllable)
/
float
(words)
return
legacy_round(ASPW,
1
)
def
difficult_words(text):
nlp
=
spacy.load(
'en_core_web_sm'
)
doc
=
nlp(text)
words
=
[]
sentences
=
break_sentences(text)
for
sentence
in
sentences:
words
+
=
[
str
(token)
for
token
in
sentence]
diff_words_set
=
set
()
for
word
in
words:
syllable_count
=
syllables_count(word)
if
word
not
in
nlp.Defaults.stop_words
and
syllable_count >
=
2
:
diff_words_set.add(word)
return
len
(diff_words_set)
def
poly_syllable_count(text):
count
=
0
words
=
[]
sentences
=
break_sentences(text)
for
sentence
in
sentences:
words
+
=
[token
for
token
in
sentence]
for
word
in
words:
syllable_count
=
syllables_count(word)
if
syllable_count >
=
3
:
count
+
=
1
return
count
def
flesch_reading_ease(text):
FRE
=
206.835
-
float
(
1.015
*
avg_sentence_length(text))
-
\
float
(
84.6
*
avg_syllables_per_word(text))
return
legacy_round(FRE,
2
)
def
gunning_fog(text):
per_diff_words
=
(difficult_words(text)
/
word_count(text)
*
100
)
+
5
grade
=
0.4
*
(avg_sentence_length(text)
+
per_diff_words)
return
grade
def
smog_index(text):
if
sentence_count(text) >
=
3
:
poly_syllab
=
poly_syllable_count(text)
SMOG
=
(
1.043
*
(
30
*
(poly_syllab
/
sentence_count(text)))
*
*
0.5
) \
+
3.1291
return
legacy_round(SMOG,
1
)
else
:
return
0
def
dale_chall_readability_score(text):
words
=
word_count(text)
count
=
word_count
-
difficult_words(text)
if
words >
0
:
per
=
float
(count)
/
float
(words)
*
100
diff_words
=
100
-
per
raw_score
=
(
0.1579
*
diff_words)
+
\
(
0.0496
*
avg_sentence_length(text))
if
diff_words >
5
:
raw_score
+
=
3.6365
return
legacy_round(score,
2
)