diff --git a/ch2/2_2_Basic_webscraping_byusing_beautifulsuop.py b/ch2/2_2_Basic_webscraping_byusing_beautifulsuop.py index 36314bc..49d03af 100644 --- a/ch2/2_2_Basic_webscraping_byusing_beautifulsuop.py +++ b/ch2/2_2_Basic_webscraping_byusing_beautifulsuop.py @@ -12,10 +12,10 @@ def Get_the_page_by_beautibulsoup(): #print soup() #print(soup.prettify()) #display source of the html page in readable format. soup = BeautifulSoup(page.content, 'html.parser') - print soup.find_all('p')[0].get_text() - print soup.find_all('p')[1].get_text() - print soup.find_all('p')[2].get_text() - print soup.find_all('p')[3].get_text() + print(soup.find_all('p')[0].get_text()) + print(soup.find_all('p')[1].get_text()) + print(soup.find_all('p')[2].get_text()) + print(soup.find_all('p')[3].get_text()) if __name__ =="__main__": diff --git a/ch3/3_1_wordsteam.py b/ch3/3_1_wordsteam.py index 9b10cbb..5932b0a 100644 --- a/ch3/3_1_wordsteam.py +++ b/ch3/3_1_wordsteam.py @@ -21,32 +21,32 @@ def stemmer_porter(): port = PorterStemmer() - print "\nDerivational Morphemes" - print " ".join([port.stem(i) for i in text6.split()]) - print " ".join([port.stem(i) for i in text7.split()]) - print "\nInflectional Morphemes" - print " ".join([port.stem(i) for i in text8.split()]) - print " ".join([port.stem(i) for i in text9.split()]) - print "\nSome examples" - print " ".join([port.stem(i) for i in word.split()]) - print " ".join([port.stem(i) for i in text.split()]) - print " ".join([port.stem(i) for i in text1.split()]) - print " ".join([port.stem(i) for i in text2.split()]) - print " ".join([port.stem(i) for i in text3.split()]) - print " ".join([port.stem(i) for i in text4.split()]) - print " ".join([port.stem(i) for i in text5.split()]) + print("\nDerivational Morphemes") + print(" ".join([port.stem(i) for i in text6.split()])) + print(" ".join([port.stem(i) for i in text7.split()])) + print("\nInflectional Morphemes") + print(" ".join([port.stem(i) for i in text8.split()])) + print(" ".join([port.stem(i) for i in text9.split()])) + print("\nSome examples") + print(" ".join([port.stem(i) for i in word.split()])) + print(" ".join([port.stem(i) for i in text.split()])) + print(" ".join([port.stem(i) for i in text1.split()])) + print(" ".join([port.stem(i) for i in text2.split()])) + print(" ".join([port.stem(i) for i in text3.split()])) + print(" ".join([port.stem(i) for i in text4.split()])) + print(" ".join([port.stem(i) for i in text5.split()])) def polyglot_stem(): - print "\nDerivational Morphemes using polyglot library" + print("\nDerivational Morphemes using polyglot library") for w in words_derv: w = Word(w, language="en") print("{:<20}{}".format(w, w.morphemes)) - print "\nInflectional Morphemes using polyglot library" + print("\nInflectional Morphemes using polyglot library") for w in word_infle: w = Word(w, language="en") print("{:<20}{}".format(w, w.morphemes)) - print "\nSome Morphemes examples using polyglot library" + print("\nSome Morphemes examples using polyglot library") for w in word_infle: w = Word(w, language="en") print("{:<20}{}".format(w, w.morphemes)) diff --git a/ch3/3_2_tokenization_lemmatization.py b/ch3/3_2_tokenization_lemmatization.py index 0d86209..a744582 100644 --- a/ch3/3_2_tokenization_lemmatization.py +++ b/ch3/3_2_tokenization_lemmatization.py @@ -7,22 +7,22 @@ def wordtokenization(): content = """Stemming is funnier than a bummer says the sushi loving computer scientist. She really wants to buy cars. She told me angrily. It is better for you. Man is walking. We are meeting tomorrow. You really don't know..!""" - print word_tokenize(content) + print(word_tokenize(content)) def wordlemmatization(): wordlemma = WordNetLemmatizer() - print wordlemma.lemmatize('cars') - print wordlemma.lemmatize('walking',pos='v') - print wordlemma.lemmatize('meeting',pos='n') - print wordlemma.lemmatize('meeting',pos='v') - print wordlemma.lemmatize('better',pos='a') - print wordlemma.lemmatize('is',pos='v') - print wordlemma.lemmatize('funnier',pos='a') - print wordlemma.lemmatize('expected',pos='v') - print wordlemma.lemmatize('fantasized',pos='v') + print(wordlemma.lemmatize('cars')) + print(wordlemma.lemmatize('walking',pos='v')) + print(wordlemma.lemmatize('meeting',pos='n')) + print(wordlemma.lemmatize('meeting',pos='v')) + print(wordlemma.lemmatize('better',pos='a')) + print(wordlemma.lemmatize('is',pos='v')) + print(wordlemma.lemmatize('funnier',pos='a')) + print(wordlemma.lemmatize('expected',pos='v')) + print(wordlemma.lemmatize('fantasized',pos='v')) if __name__ =="__main__": wordtokenization() - print "\n" - print "----------Word Lemmatization----------" + print("\n") + print("----------Word Lemmatization----------") wordlemmatization() \ No newline at end of file diff --git a/ch3/3_3_parsingtree.py b/ch3/3_3_parsingtree.py index e5b927a..389bbff 100644 --- a/ch3/3_3_parsingtree.py +++ b/ch3/3_3_parsingtree.py @@ -23,7 +23,7 @@ def definegrammar_pasrereult(): parser = nltk.ChartParser(Grammar) trees = parser.parse(sent) for tree in trees: - print tree + print(tree) # Part 2: Draw the parse tree def draw_parser_tree(): @@ -50,9 +50,9 @@ def stanford_parsing_result(): if __name__ == "__main__": - print "\n--------Parsing result as per defined grammar-------" + print("\n--------Parsing result as per defined grammar-------") definegrammar_pasrereult() - print "\n--------Drawing Parse Tree-------" + print("\n--------Drawing Parse Tree-------") draw_parser_tree() - print "\n--------Stanford Parser result------" + print("\n--------Stanford Parser result------") stanford_parsing_result() diff --git a/ch4/4_1_processrawtext.py b/ch4/4_1_processrawtext.py index 9dc222e..d34b8a3 100644 --- a/ch4/4_1_processrawtext.py +++ b/ch4/4_1_processrawtext.py @@ -24,30 +24,30 @@ def readcorpus(): return raw_content_cg[0:1000] if __name__ == "__main__": - print "" - print "----------Output from Raw Text file-----------" - print "" + print("") + print("----------Output from Raw Text file-----------") + print("") filecontentdetails = fileread() - print filecontentdetails + print(filecontentdetails) # sentence tokenizer st_list_rawfile = st(filecontentdetails) - print len(st_list_rawfile) + print(len(st_list_rawfile)) - print "" - print "-------Output from assigned variable-------" - print "" + print("") + print("-------Output from assigned variable-------") + print("") localveriabledata = localtextvalue() - print localveriabledata + print(localveriabledata) # sentence tokenizer st_list_local = st(localveriabledata) - print len(st_list_local) - print st_list_local + print(len(st_list_local)) + print(st_list_local) - print "" - print "-------Output Corpus data--------------" - print "" + print("") + print("-------Output Corpus data--------------") + print("") fromcorpusdata = readcorpus() - print fromcorpusdata + print(fromcorpusdata) # sentence tokenizer st_list_corpus = st(fromcorpusdata) - print len(st_list_corpus) + print(len(st_list_corpus)) diff --git a/ch4/4_2_rawtext_Stemmers.py b/ch4/4_2_rawtext_Stemmers.py index db298da..43500e6 100644 --- a/ch4/4_2_rawtext_Stemmers.py +++ b/ch4/4_2_rawtext_Stemmers.py @@ -8,28 +8,28 @@ def stemmer_porter(): port = PorterStemmer() - print "\nStemmer" + print("\nStemmer") return " ".join([port.stem(i) for i in text.split()]) def lammatizer(): wordnet_lemmatizer = WordNetLemmatizer() ADJ, ADJ_SAT, ADV, NOUN, VERB = 'a', 's', 'r', 'n', 'v' # Pos = verb - print "\nVerb lemma" - print " ".join([wordnet_lemmatizer.lemmatize(i,pos="v") for i in text.split()]) + print("\nVerb lemma") + print(" ".join([wordnet_lemmatizer.lemmatize(i,pos="v") for i in text.split()])) # Pos = noun - print "\nNoun lemma" - print " ".join([wordnet_lemmatizer.lemmatize(i,pos="n") for i in text.split()]) + print("\nNoun lemma") + print(" ".join([wordnet_lemmatizer.lemmatize(i,pos="n") for i in text.split()])) # Pos = Adjective - print "\nAdjective lemma" - print " ".join([wordnet_lemmatizer.lemmatize(i, pos="a") for i in text.split()]) + print("\nAdjective lemma") + print(" ".join([wordnet_lemmatizer.lemmatize(i, pos="a") for i in text.split()])) # Pos = satellite adjectives - print "\nSatellite adjectives lemma" - print " ".join([wordnet_lemmatizer.lemmatize(i, pos="s") for i in text.split()]) - print "\nAdverb lemma" + print("\nSatellite adjectives lemma") + print(" ".join([wordnet_lemmatizer.lemmatize(i, pos="s") for i in text.split()])) + print("\nAdverb lemma") # POS = Adverb - print " ".join([wordnet_lemmatizer.lemmatize(i, pos="r") for i in text.split()]) + print(" ".join([wordnet_lemmatizer.lemmatize(i, pos="r") for i in text.split()])) if __name__ == "__main__": - print stemmer_porter() + print(stemmer_porter()) lammatizer() \ No newline at end of file diff --git a/ch4/4_3_stopwordremove.py b/ch4/4_3_stopwordremove.py index b575525..e5a78e1 100644 --- a/ch4/4_3_stopwordremove.py +++ b/ch4/4_3_stopwordremove.py @@ -3,24 +3,24 @@ def stopwordlist(): stopwordlist = stopwords.words('english') - print "" - print "------List of stop words-------" + print("") + print("------List of stop words-------") for s in stopwordlist: - print s + print(s) def customizedstopwordremove(): stop_words = set(["hi", "bye"]) line = """hi this is foo. bye""" - print "" - print "--------Customized stopword removal---------" - print " ".join(word for word in line.split() if word not in stop_words) + print("") + print("--------Customized stopword removal---------") + print(" ".join(word for word in line.split() if word not in stop_words)) def stopwordremove(): stop = set(stopwords.words('english')) sentence = "this is a test sentence. I am very happy today." - print "" - print "--------Stop word removal from raw text---------" - print " ".join([i for i in sentence.lower().split() if i not in stop]) + print("") + print("--------Stop word removal from raw text---------") + print(" ".join([i for i in sentence.lower().split() if i not in stop])) diff --git a/ch4/4_4_wordtokenization.py b/ch4/4_4_wordtokenization.py index 01a97f7..0d1ed58 100644 --- a/ch4/4_4_wordtokenization.py +++ b/ch4/4_4_wordtokenization.py @@ -5,25 +5,25 @@ def wordtokenization(): content = """Stemming is funnier than a bummer says the sushi loving computer scientist. She really wants to buy cars. She told me angrily. It is better for you. Man is walking. We are meeting tomorrow. You really don't know..!""" - print word_tokenize(content) + print(word_tokenize(content)) def wordlemmatization(): wordlemma = WordNetLemmatizer() - print wordlemma.lemmatize('cars') - print wordlemma.lemmatize('walking',pos='v') - print wordlemma.lemmatize('meeting',pos='n') - print wordlemma.lemmatize('meeting',pos='v') - print wordlemma.lemmatize('better',pos='a') + print(wordlemma.lemmatize('cars')) + print(wordlemma.lemmatize('walking',pos='v')) + print(wordlemma.lemmatize('meeting',pos='n')) + print(wordlemma.lemmatize('meeting',pos='v')) + print(wordlemma.lemmatize('better',pos='a')) def wordlowercase(): text= "I am a person. Do you know what is time now?" - print text.lower() + print(text.lower()) if __name__ =="__main__": wordtokenization() - print "\n" - print "----------Word Lemmatization----------" + print("\n") + print("----------Word Lemmatization----------") wordlemmatization() - print "\n" - print "----------converting data to lower case ----------" + print("\n") + print("----------converting data to lower case ----------") wordlowercase() \ No newline at end of file diff --git a/ch4/4_5_regualrexpression.py b/ch4/4_5_regualrexpression.py index f59f5b8..c13849b 100644 --- a/ch4/4_5_regualrexpression.py +++ b/ch4/4_5_regualrexpression.py @@ -5,75 +5,75 @@ def searchvsmatch(): matchObj = re.match(r'animals', line, re.M | re.I) if matchObj: - print "match: ", matchObj.group() + print("match: ", matchObj.group()) else: - print "No match!!" + print("No match!!") searchObj = re.search(r'animals', line, re.M | re.I) if searchObj: - print "search: ", searchObj.group() + print("search: ", searchObj.group()) else: - print "Nothing found!!" + print("Nothing found!!") def basicregex(): line = "This is test sentence and test sentence is also a sentence." contactInfo = 'Doe, John: 1111-1212' - print "-----------Output of re.findall()--------" + print("-----------Output of re.findall()--------") # re.findall() finds all occurences of sentence from line variable. findallobj = re.findall(r'sentence', line) - print findallobj + print(findallobj) # re.search() and group wise extraction groupwiseobj = re.search(r'(\w+), (\w+): (\S+)', contactInfo) - print "\n" - print "-----------Output of Groups--------" - print "1st group ------- " + groupwiseobj.group(1) - print "2nd group ------- " + groupwiseobj.group(2) - print "3rd group ------- " + groupwiseobj.group(3) + print("\n") + print("-----------Output of Groups--------") + print("1st group ------- " + groupwiseobj.group(1)) + print("2nd group ------- " + groupwiseobj.group(2)) + print("3rd group ------- " + groupwiseobj.group(3)) # re.sub() replace string phone = "1111-2222-3333 # This is Phone Number" # Delete Python-style comments num = re.sub(r'#.*$', "", phone) - print "\n" - print "-----------Output of re.sub()--------" - print "Phone Num : ", num + print("\n") + print("-----------Output of re.sub()--------") + print("Phone Num : ", num) # Replace John to Peter in contactInfo contactInforevised = re.sub(r'John', "Peter", contactInfo) - print "Revised contactINFO : ", contactInforevised + print("Revised contactINFO : ", contactInforevised) def advanceregex(): text = "I play on playground. It is the best ground." positivelookaheadobjpattern = re.findall(r'play(?=ground)',text,re.M | re.I) - print "Positive lookahead: " + str(positivelookaheadobjpattern) + print("Positive lookahead: " + str(positivelookaheadobjpattern)) positivelookaheadobj = re.search(r'play(?=ground)',text,re.M | re.I) - print "Positive lookahead character index: "+ str(positivelookaheadobj.span()) + print("Positive lookahead character index: "+ str(positivelookaheadobj.span())) possitivelookbehindobjpattern = re.findall(r'(?<=play)ground',text,re.M | re.I) - print "Positive lookbehind: " + str(possitivelookbehindobjpattern) + print("Positive lookbehind: " + str(possitivelookbehindobjpattern)) possitivelookbehindobj = re.search(r'(?<=play)ground',text,re.M | re.I) - print "Positive lookbehind character index: " + str(possitivelookbehindobj.span()) + print("Positive lookbehind character index: " + str(possitivelookbehindobj.span())) negativelookaheadobjpattern = re.findall(r'play(?!ground)', text, re.M | re.I) - print "Negative lookahead: " + str(negativelookaheadobjpattern) + print("Negative lookahead: " + str(negativelookaheadobjpattern)) negativelookaheadobj = re.search(r'play(?!ground)', text, re.M | re.I) - print "Negative lookahead character index: " + str(negativelookaheadobj.span()) + print("Negative lookahead character index: " + str(negativelookaheadobj.span())) negativelookbehindobjpattern = re.findall(r'(?2}".format(word, tag)) \ No newline at end of file + print("{:<16}{:>2}".format(word, tag)) \ No newline at end of file diff --git a/ch5/POStagdemo/snafordparserPOStags.py b/ch5/POStagdemo/snafordparserPOStags.py index 5fc9149..5779f79 100644 --- a/ch5/POStagdemo/snafordparserPOStags.py +++ b/ch5/POStagdemo/snafordparserPOStags.py @@ -9,7 +9,7 @@ def stnfordpostagdemofunction(text): }) for s in output["sentences"]: for t in s["tokens"]: - print str(t["word"])+ " --- postag --"+ str(t["pos"]) + print(str(t["word"])+ " --- postag --"+ str(t["pos"])) if __name__ == "__main__": diff --git a/ch5/TFIDFdemo/tfidf_scikitlearn.py b/ch5/TFIDFdemo/tfidf_scikitlearn.py index 510dc14..26960ae 100644 --- a/ch5/TFIDFdemo/tfidf_scikitlearn.py +++ b/ch5/TFIDFdemo/tfidf_scikitlearn.py @@ -78,15 +78,15 @@ def tokenize(text): feature_names = tfidf.get_feature_names() for col in response.nonzero()[1]: - print feature_names[col], ' - ', response[0, col] + print(feature_names[col], ' - ', response[0, col]) feature_array = np.array(tfidf.get_feature_names()) tfidf_sorting = np.argsort(response.toarray()).flatten()[::-1] n = 3 top_n = feature_array[tfidf_sorting][:n] -print top_n +print(top_n) n = 4 top_n = feature_array[tfidf_sorting][:n] -print top_n +print(top_n) diff --git a/ch5/TFIDFdemo/tfidf_textblob.py b/ch5/TFIDFdemo/tfidf_textblob.py index aed9bca..49109d0 100644 --- a/ch5/TFIDFdemo/tfidf_textblob.py +++ b/ch5/TFIDFdemo/tfidf_textblob.py @@ -26,7 +26,7 @@ def tfidf(word, blob, bloblist): tf_score = tf('short', blob) idf_score = idf('short', bloblist) tfidf_score = tfidf('short', blob, bloblist) -print "tf score for word short--- "+ str(tf_score)+"\n" -print "idf score for word short--- "+ str(idf_score)+"\n" -print "tf x idf score of word short--- "+str(tfidf_score) +print("tf score for word short--- "+ str(tf_score)+"\n") +print("idf score for word short--- "+ str(idf_score)+"\n") +print("tf x idf score of word short--- "+str(tfidf_score)) diff --git a/ch5/bagofwordsdemo/BOWdemo.py b/ch5/bagofwordsdemo/BOWdemo.py index a6f3209..8b377a1 100644 --- a/ch5/bagofwordsdemo/BOWdemo.py +++ b/ch5/bagofwordsdemo/BOWdemo.py @@ -7,4 +7,4 @@ counts = ngram_vectorizer.fit_transform(['words', 'wprds']) # this check weather the given word character is present in the above teo word which are documents here. ngram_vectorizer.get_feature_names() == ([' w', 'ds', 'or', 'pr', 'rd', 's ', 'wo', 'wp']) -print counts.toarray().astype(int) +print(counts.toarray().astype(int)) diff --git a/ch5/n_gram/nltkngrmdemo.py b/ch5/n_gram/nltkngrmdemo.py index 1148561..ac89635 100644 --- a/ch5/n_gram/nltkngrmdemo.py +++ b/ch5/n_gram/nltkngrmdemo.py @@ -3,6 +3,5 @@ n = 4 # you can give 4, 5, 1 or any number less than sentences length ngramsres = ngrams(sentence.split(), n) for grams in ngramsres: - print grams + print(grams) - \ No newline at end of file diff --git a/ch5/onehotencodingdemo/OHEdemo.py b/ch5/onehotencodingdemo/OHEdemo.py index 884f023..f5cc1b8 100644 --- a/ch5/onehotencodingdemo/OHEdemo.py +++ b/ch5/onehotencodingdemo/OHEdemo.py @@ -3,9 +3,9 @@ df = pd.DataFrame([['rick','young'],['phil','old']],columns=['name','age-group']) -print df -print "\n----By using Panda ----\n" -print pd.get_dummies(df) +print(df) +print("\n----By using Panda ----\n") +print(pd.get_dummies(df)) X = pd.DataFrame({'income': [100000,110000,90000,30000,14000,50000], 'country':['US', 'CAN', 'US', 'CAN', 'MEX', 'US'], @@ -13,9 +13,9 @@ -print "\n----By using Sikit-learn ----\n" +print("\n----By using Sikit-learn ----\n") v = DictVectorizer() qualitative_features = ['country'] X_qual = v.fit_transform(X[qualitative_features].to_dict('records')) -print v.vocabulary_ -print X_qual.toarray() \ No newline at end of file +print(v.vocabulary_) +print(X_qual.toarray()) \ No newline at end of file diff --git a/ch5/parserexample/StanfordParserdemo.py b/ch5/parserexample/StanfordParserdemo.py index ad5caa1..9878d5f 100644 --- a/ch5/parserexample/StanfordParserdemo.py +++ b/ch5/parserexample/StanfordParserdemo.py @@ -1,3 +1,6 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function from pycorenlp import StanfordCoreNLP from nltk.tree import Tree from nltk.parse.stanford import StanfordDependencyParser @@ -11,18 +14,18 @@ def stanfordparserdemo(sentnece): 'outputFormat': 'json' }) - print "\n------------Stanford Parser Parseing Result------------" + print ("\n------------Stanford Parser Parseing Result------------") parsetree = output['sentences'][0]['parse'] - print "\n------parsing------\n" - print parsetree - print "\n------ Words inside NP ------\n" + print("\n------parsing------\n") + print(parsetree) + print ("\n------ Words inside NP ------\n") for i in Tree.fromstring(parsetree).subtrees(): if i.label() == 'NP': - print i.leaves(),i.label() - print "\n------ Words inside NP with POS tags ------\n" + print(i.leaves(), i.label()) + print ("\n------ Words inside NP with POS tags ------\n") for i in Tree.fromstring(parsetree).subtrees(): if i.label() == 'NP': - print i + print(i) def NLTKparserfordependancies(sentnece): @@ -30,9 +33,9 @@ def NLTKparserfordependancies(sentnece): path_to_models_jar = '/home/jalaj/stanford-corenlp-full-2016-10-31/stanford-corenlp-3.7.0-models.jar' dependency_parser = StanfordDependencyParser(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar) result = dependency_parser.raw_parse(sentnece) - dep = result.next() - print "\n------Dependencies------\n" - print list(dep.triples()) + dep = next(result) + print("\n------Dependencies------\n") + print(list(dep.triples())) if __name__ == "__main__": stanfordparserdemo('The boy put tortoise on the rug.') diff --git a/ch5/parserexample/scpacyparserdemo.py b/ch5/parserexample/scpacyparserdemo.py index 30dbca0..e7f1061 100644 --- a/ch5/parserexample/scpacyparserdemo.py +++ b/ch5/parserexample/scpacyparserdemo.py @@ -4,13 +4,12 @@ nlp = spacy.load('en') def spacyparserdemo(): - example = u"The boy with the spotted dog quickly ran after the firetruck." + example = "The boy with the spotted dog quickly ran after the firetruck." parsedEx = parser(example) # shown as: original token, dependency tag, head word, left dependents, right dependents - print "\n-----------original token, dependency tag, head word, left dependents, right dependents-------\n" + print("\n-----------original token, dependency tag, head word, left dependents, right dependents-------\n") for token in parsedEx: - print( - token.orth_, token.dep_, token.head.orth_, [t.orth_ for t in token.lefts], [t.orth_ for t in token.rights]) + print(token.orth_, token.dep_, token.head.orth_, [t.orth_ for t in token.lefts], [t.orth_ for t in token.rights]) if __name__ == "__main__": spacyparserdemo() diff --git a/ch6/doc2vecexample.py b/ch6/doc2vecexample.py index 5509c11..c50738e 100644 --- a/ch6/doc2vecexample.py +++ b/ch6/doc2vecexample.py @@ -36,9 +36,9 @@ # save model model.save(saved_path) -print "training is over....!\n" +print("training is over....!\n") -print "testing started....!\n" +print("testing started....!\n") #parameters model="/home/jalaj/PycharmProjects/NLPython/NLPython/ch6/doc2vecdata/model.bin" test_docs="/home/jalaj/PycharmProjects/NLPython/NLPython/ch6/doc2vecdata/test_docs.txt" @@ -50,7 +50,7 @@ #load model m = g.Doc2Vec.load(model) -print m.wv.most_similar(positive=['family', 'dog']) +print(m.wv.most_similar(positive=['family', 'dog'])) test_docs = [ x.strip().split() for x in codecs.open(test_docs, "r", "utf-8").readlines() ] #infer test vectors diff --git a/ch6/gloveexample.py b/ch6/gloveexample.py index 4e8b629..d82f892 100644 --- a/ch6/gloveexample.py +++ b/ch6/gloveexample.py @@ -15,6 +15,6 @@ glove.fit(corpus.matrix, epochs=30, no_threads=4, verbose=True) glove.add_dictionary(corpus.dictionary) -print glove.most_similar('frog', number=10) -print glove.most_similar('girl', number=10) -print glove.most_similar('car', number=10) \ No newline at end of file +print(glove.most_similar('frog', number=10)) +print(glove.most_similar('girl', number=10)) +print(glove.most_similar('car', number=10)) \ No newline at end of file diff --git a/ch6/kingqueenexample.py b/ch6/kingqueenexample.py index 497f468..33bc876 100644 --- a/ch6/kingqueenexample.py +++ b/ch6/kingqueenexample.py @@ -1,9 +1,9 @@ from gensim import models w = models.Word2Vec.load_word2vec_format('/home/jalaj/Downloads/GoogleNews-vectors-negative300.bin', binary=True) -print 'King - man + woman:' -print '' -print w.wv.most_similar(positive=['woman', 'king'], negative=['man']) +print('King - man + woman:') +print('') +print(w.wv.most_similar(positive=['woman', 'king'], negative=['man'])) -print 'Similarity between man and woman:' -print w.similarity('woman', 'man') +print('Similarity between man and woman:') +print(w.similarity('woman', 'man')) diff --git a/ch6/stopwordfacts.py b/ch6/stopwordfacts.py index 8fc573f..d27d5b6 100644 --- a/ch6/stopwordfacts.py +++ b/ch6/stopwordfacts.py @@ -1,12 +1,12 @@ from gensim import models w = models.Word2Vec.load_word2vec_format('/home/jalaj/Downloads/GoogleNews-vectors-negative300.bin', binary=True) if 'the' in w.wv.vocab: - print "Vector for word 'the' \n" - print w.wv['the'] + print("Vector for word 'the' \n") + print(w.wv['the']) else: - print "Vocabulary doesn't include word 'the'\n" + print("Vocabulary doesn't include word 'the'\n") if 'a' in w.wv.vocab: - print "Vector for word 'a' \n" - print w.wv['a'] + print("Vector for word 'a' \n") + print(w.wv['a']) else: - print "Vocabulary doesn't include word 'a'\n" \ No newline at end of file + print("Vocabulary doesn't include word 'a'\n") \ No newline at end of file diff --git a/ch7/7_1_simplerule.py b/ch7/7_1_simplerule.py index 9b0b93b..d822d95 100644 --- a/ch7/7_1_simplerule.py +++ b/ch7/7_1_simplerule.py @@ -1,5 +1,9 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function from bs4 import BeautifulSoup import requests +import six def savedatainfile(filecontent): file = open("/home/jalaj/PycharmProjects/NLPython/NLPython/data/simpleruledata.txt", "a+") @@ -8,7 +12,7 @@ def savedatainfile(filecontent): def rulelogic(filecontent): programminglanguagelist = [] - with open(filecontent)as file: + with open(filecontent) as file: for line in file: if 'languages' in line or 'language' in line: # print line @@ -17,7 +21,7 @@ def rulelogic(filecontent): if word[0].isupper(): programminglanguagelist.append(word) # print programminglanguagelist - print programminglanguagelist + print(programminglanguagelist) def scrapdata(): url = 'https://en.wikipedia.org/wiki/Programming_language' @@ -26,7 +30,7 @@ def scrapdata(): tag = soup.find('div', {'class': 'mw-content-ltr'}) paragraphs = tag.findAll('p') for para in paragraphs: - paraexport = para.text.encode('utf-8') + paraexport = six.ensure_binary(para.text, 'utf-8') savedatainfile(paraexport) rulelogic("/home/jalaj/PycharmProjects/NLPython/NLPython/data/simpleruledata.txt") diff --git a/ch7/7_2_basicpythonrule.py b/ch7/7_2_basicpythonrule.py index 20c3eba..164f083 100644 --- a/ch7/7_2_basicpythonrule.py +++ b/ch7/7_2_basicpythonrule.py @@ -10,9 +10,9 @@ if findpattern_am: #print findpattern_am.group() - print re.sub(r'\b([1-9]|0[1-9]|1[0-2]{1,2})(am)\b', r'\1 a.m.', inputstring) + print(re.sub(r'\b([1-9]|0[1-9]|1[0-2]{1,2})(am)\b', r'\1 a.m.', inputstring)) elif findpattern_pm: #print findpattern_pm.group() - print re.sub(r'\b([1-9]|0[1-9]|1[0-2]{1,2})(pm)\b', r'\1 p.m.', inputstring) + print(re.sub(r'\b([1-9]|0[1-9]|1[0-2]{1,2})(pm)\b', r'\1 p.m.', inputstring)) else: - print "Not matched...!" + print("Not matched...!") diff --git a/ch7/7_3_SVArule.py b/ch7/7_3_SVArule.py index eb6da21..a854264 100644 --- a/ch7/7_3_SVArule.py +++ b/ch7/7_3_SVArule.py @@ -24,13 +24,13 @@ def rulelogic(sentnece): #print leaves_list if (any("We" in x for x in leaves_list) or any("I" in x for x in leaves_list) or any( "You" in x for x in leaves_list) or any("They" in x for x in leaves_list)) and any("VBZ" in x for x in leaves_list): - print "Alert: \nPlease check Subject and verb in the sentence.\nYou may have plural subject and singular verb. " + print("Alert: \nPlease check Subject and verb in the sentence.\nYou may have plural subject and singular verb. ") elif(any("He" in x for x in leaves_list) or any("She" in x for x in leaves_list) or any( "It" in x for x in leaves_list)) and any("VBP" in x for x in leaves_list): - print "Alert: \nPlease check subject and verb in the sentence.\n" \ - "You may have singular subject and plural verb." + print("Alert: \nPlease check subject and verb in the sentence.\n" \ + "You may have singular subject and plural verb.") else: - print "You have correct sentence." + print("You have correct sentence.") if __name__ == "__main__": rulelogic('We know cooking.') diff --git a/ch7/chatbot/flaskengin.py b/ch7/chatbot/flaskengin.py index 508894f..a5193fc 100644 --- a/ch7/chatbot/flaskengin.py +++ b/ch7/chatbot/flaskengin.py @@ -168,7 +168,7 @@ def mobilenumber_asking(): # db_handler.update({"request_user_id": request_user_id}, { # '$set': {"request_user_id": request_user_id, "conversation": conversation_list_history, "time": now_india.strftime(fmt)}, # "$currentDate": {"lastModified": True}}, upsert=True) - except Exception: + except Exception : conversation_list_history.append(mobilenumber_asking_status) # db_handler.insert({"request_user_id": request_user_id, "conversation": conversation_list_history, # "time": now_india.strftime(fmt)}) @@ -194,7 +194,7 @@ def loan_ammount_asking_coversation(): # db_handler.update({"request_user_id": request_user_id}, { # '$set': {"request_user_id": request_user_id, "conversation": conversation_list_history, "time": now_india.strftime(fmt)}, # "$currentDate": {"lastModified": True}}, upsert=True) - print post_data_lead_create + print(post_data_lead_create) resp = Response(loan_conv_msg, status=200, mimetype='application/json') return resp diff --git a/ch8/Own_Logistic_Regression/logistic.py b/ch8/Own_Logistic_Regression/logistic.py index b9fd661..b76d89b 100644 --- a/ch8/Own_Logistic_Regression/logistic.py +++ b/ch8/Own_Logistic_Regression/logistic.py @@ -1,3 +1,6 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function """ This program performs two different logistic regression implementations on two different datasets of the format [float,float,boolean], one @@ -11,12 +14,12 @@ import math import numpy as np import pandas as pd -from pandas import DataFrame from sklearn import preprocessing from sklearn.linear_model import LogisticRegression from sklearn.cross_validation import train_test_split -from numpy import loadtxt, where +from numpy import where from pylab import scatter, show, legend, xlabel, ylabel +from six.moves import range # scale larger positive and values to between -1,1 depending on the largest # value in the data @@ -48,7 +51,7 @@ # train scikit learn model clf = LogisticRegression() clf.fit(X_train,Y_train) -print 'score Scikit learn: ', clf.score(X_test,Y_test) +print('score Scikit learn: ', clf.score(X_test,Y_test)) # visualize data, uncomment "show()" to run it pos = where(Y == 1) @@ -69,7 +72,7 @@ def Sigmoid(z): ##This hypothesis will be used to calculate each instance of the Cost Function def Hypothesis(theta, x): z = 0 - for i in xrange(len(theta)): + for i in range(len(theta)): z += x[i]*theta[i] return Sigmoid(z) @@ -78,7 +81,7 @@ def Hypothesis(theta, x): ##The "punishment" makes the change in the gradient of ThetaCurrent - Average(CostFunction(Dataset)) greater def Cost_Function(X,Y,theta,m): sumOfErrors = 0 - for i in xrange(m): + for i in range(m): xi = X[i] hi = Hypothesis(theta,xi) if Y[i] == 1: @@ -88,7 +91,7 @@ def Cost_Function(X,Y,theta,m): sumOfErrors += error const = -1/m J = const * sumOfErrors - print 'cost is ', J + print('cost is ', J ) return J ##This function creates the gradient component for each Theta value @@ -97,7 +100,7 @@ def Cost_Function(X,Y,theta,m): ##For each Theta there is a cost function calculated for each member of the dataset def Cost_Function_Derivative(X,Y,theta,j,m,alpha): sumErrors = 0 - for i in xrange(m): + for i in range(m): xi = X[i] xij = xi[j] hi = Hypothesis(theta,X[i]) @@ -114,7 +117,7 @@ def Cost_Function_Derivative(X,Y,theta,j,m,alpha): def Gradient_Descent(X,Y,theta,m,alpha): new_theta = [] constant = alpha/m - for j in xrange(len(theta)): + for j in range(len(theta)): CFDerivative = Cost_Function_Derivative(X,Y,theta,j,m,alpha) new_theta_value = theta[j] - CFDerivative new_theta.append(new_theta_value) @@ -125,14 +128,14 @@ def Gradient_Descent(X,Y,theta,m,alpha): ##set of values best representing the system in a linear combination model def Logistic_Regression(X,Y,alpha,theta,num_iters): m = len(Y) - for x in xrange(num_iters): + for x in range(num_iters): new_theta = Gradient_Descent(X,Y,theta,m,alpha) theta = new_theta if x % 100 == 0: #here the cost function is used to present the final hypothesis of the model in the same form for each gradient-step iteration Cost_Function(X,Y,theta,m) - print 'theta ', theta - print 'cost is ', Cost_Function(X,Y,theta,m) + print('theta ', theta ) + print('cost is ', Cost_Function(X,Y,theta,m)) Declare_Winner(theta) ##This method compares the accuracy of the model generated by the scikit library with the model generated by this implementation @@ -143,7 +146,7 @@ def Declare_Winner(theta): #if the prediction is the same as the dataset measured value it counts as a point for thie scikit version of LR scikit_score = clf.score(X_test,Y_test) length = len(X_test) - for i in xrange(length): + for i in range(length): prediction = round(Hypothesis(X_test[i],theta)) answer = Y_test[i] if prediction == answer: @@ -151,13 +154,13 @@ def Declare_Winner(theta): #the same process is repeated for the implementation from this module and the scores compared to find the higher match-rate my_score = float(score) / float(length) if my_score > scikit_score: - print 'You won!' + print('You won!') elif my_score == scikit_score: - print 'Its a tie!' + print('Its a tie!') else: - print 'Scikit won.. :(' - print 'Your score: ', my_score - print 'Scikits score: ', scikit_score + print('Scikit won.. :(') + print('Your score: ', my_score) + print('Scikits score: ', scikit_score ) # These are the initial guesses for theta as well as the learning rate of the algorithm # A learning rate too low will not close in on the most accurate values within a reasonable number of iterations diff --git a/ch8/sentimentanalysis/sentimentanalysis_NB.py b/ch8/sentimentanalysis/sentimentanalysis_NB.py index dbe4b20..37839ca 100644 --- a/ch8/sentimentanalysis/sentimentanalysis_NB.py +++ b/ch8/sentimentanalysis/sentimentanalysis_NB.py @@ -63,8 +63,8 @@ def usage(): print("Results for NaiveBayes (MultinomialNB) ") print("Training time: %fs; Prediction time: %fs" % (time_train, time_predict)) print(classification_report(test_labels, prediction)) - print "Reviews Prediction" - print prediction[10] + "----"+test_data[10] + print("Reviews Prediction") + print(prediction[10] + "----"+test_data[10]) - print "\nReviews Prediction" - print prediction[100] + "----" + test_data[100] + print("\nReviews Prediction") + print(prediction[100] + "----" + test_data[100]) diff --git a/ch9/gradientdescentexample/gradient_descent_example.py b/ch9/gradientdescentexample/gradient_descent_example.py index 2746fd1..2e26e37 100644 --- a/ch9/gradientdescentexample/gradient_descent_example.py +++ b/ch9/gradientdescentexample/gradient_descent_example.py @@ -2,7 +2,10 @@ # I've merely created a wrapper to get readers started. # This is the amazing video by Siraj Raval : https://www.youtube.com/watch?v=XdM6ER7zTLk -from numpy import * +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from six.moves import range # y = mx + b # m is slope, b is y-intercept @@ -67,15 +70,15 @@ def run(): # Here we are printing the initial values of b, m and error. # As well as there is the function compute_error_for_line_given_points() # which compute the errors for given point - print "Starting gradient descent at b = {0}, m = {1}, error = {2}".format(initial_b, initial_m, - compute_error_for_line_given_points(initial_b, initial_m, points)) - print "Running..." + print("Starting gradient descent at b = {0}, m = {1}, error = {2}".format(initial_b, initial_m, + compute_error_for_line_given_points(initial_b, initial_m, points))) + print("Running...") # By using this gradient_descent_runner() function we will actually calculate gradient descent [b, m] = gradient_descent_runner(points, initial_b, initial_m, learning_rate, num_iterations) # Here we are printing the values of b, m and error after getting the line of best fit for the given dataset. - print "After {0} iterations b = {1}, m = {2}, error = {3}".format(num_iterations, b, m, compute_error_for_line_given_points(b, m, points)) + print("After {0} iterations b = {1}, m = {2}, error = {3}".format(num_iterations, b, m, compute_error_for_line_given_points(b, m, points))) if __name__ == '__main__': run() diff --git a/ch9/make_neural_net/singlelayerANN.py b/ch9/make_neural_net/singlelayerANN.py index 50c49ed..db3fbd1 100644 --- a/ch9/make_neural_net/singlelayerANN.py +++ b/ch9/make_neural_net/singlelayerANN.py @@ -1,7 +1,11 @@ #The credits for this code go to Milo Harper. # I've merely created a wrapper to get people started. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function from numpy import exp, array, random, dot +from six.moves import range class NeuralNetwork(): @@ -30,7 +34,7 @@ def __sigmoid_derivative(self, x): # We train the neural network through a process of trial and error. # Adjusting the synaptic weights each time. def train(self, training_set_inputs, training_set_outputs, number_of_training_iterations): - for iteration in xrange(number_of_training_iterations): + for iteration in range(number_of_training_iterations): # Pass the training set through our neural network (a single neuron). output = self.think(training_set_inputs) @@ -57,8 +61,8 @@ def think(self, inputs): #Intialise a single neuron neural network. neural_network = NeuralNetwork() - print "Random starting synaptic weights: " - print neural_network.synaptic_weights + print("Random starting synaptic weights: ") + print(neural_network.synaptic_weights) # The training set. We have 4 examples, each consisting of 3 input values # and 1 output value. @@ -70,9 +74,9 @@ def think(self, inputs): # Do it 10,000 times and make small adjustments each time. neural_network.train(training_set_inputs, training_set_outputs, 10000) - print "New synaptic weights after training: " - print neural_network.synaptic_weights + print("New synaptic weights after training: ") + print(neural_network.synaptic_weights) # Test the neural network with a new situation. - print "Considering new situation [1, 0, 0] -> ?: " - print neural_network.think(array([1, 0, 0])) \ No newline at end of file + print("Considering new situation [1, 0, 0] -> ?: ") + print(neural_network.think(array([1, 0, 0]))) \ No newline at end of file diff --git a/ch9/make_neural_net/threelayersANN.py b/ch9/make_neural_net/threelayersANN.py index b1aa4ed..1c15a5b 100644 --- a/ch9/make_neural_net/threelayersANN.py +++ b/ch9/make_neural_net/threelayersANN.py @@ -1,7 +1,11 @@ #The credits for this code go to Ludo Bouan. # I've merely created a wrapper to get people started. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import numpy as np +from six.moves import range np.seterr(over='ignore') class NeuralNetwork(): diff --git a/solution/ch4_solution/stopwordremovefromfile.py b/solution/ch4_solution/stopwordremovefromfile.py index 5848cca..cbe505a 100644 --- a/solution/ch4_solution/stopwordremovefromfile.py +++ b/solution/ch4_solution/stopwordremovefromfile.py @@ -7,7 +7,7 @@ def readfileandremovestopword(): words = file_contents.lower().split() stops = set(stopwords.words("english")) preprocessed_words = [w for w in words if not w in stops] - print "" + print("") return (" ".join(preprocessed_words)) @@ -22,9 +22,9 @@ def fileloadandremovestopwords(): return processedword if __name__ == "__main__": - print "---------------" - print readfileandremovestopword() - print "\n" - print "---------------" - print "\n" - print fileloadandremovestopwords() \ No newline at end of file + print("---------------") + print(readfileandremovestopword()) + print("\n") + print("---------------") + print("\n") + print(fileloadandremovestopwords()) \ No newline at end of file