diff --git a/scraping/new_mexico_tech_banweb.py b/scraping/new_mexico_tech_banweb.py index 0d650b9..3269a47 100755 --- a/scraping/new_mexico_tech_banweb.py +++ b/scraping/new_mexico_tech_banweb.py @@ -159,7 +159,7 @@ def getTerm(semester, subjects, parser): url = "http://banweb7.nmt.edu/pls/PROD/hwzkcrof.P_UncgSrchCrsOff?p_term="+t.getSemester()+"&p_subj="+subjectName.replace(" ", "%20") print url page = urllib2.urlopen(url) - soup = BeautifulSoup(page, "html.parser") + soup = BeautifulSoup(page, "lxml") trs = soup.findAll("tr") trs = trs[1:] #discard the retarded row that banweb is retarded about print_verbose("adding subject "+subjectName) @@ -179,7 +179,7 @@ def main(parser): page = urllib2.urlopen(url) - soup = BeautifulSoup(page, "html.parser") + soup = BeautifulSoup(page, "lxml") path = "" if (type(parser.path) == type("")):