from getpass import getpass import requests from html.parser import HTMLParser from html.entities import name2codepoint class lesezeichenParser(HTMLParser): inHeader = False watchlist = [] readlist = [] inh4 = False inWatchlist = False inReadlist = False inRow = False inData = False tdCount = 0 inName = False anime = dict() def handle_starttag(self, tag, attrs): #headline to diff read/watchlist if tag == 'h4': self.inh4 = True #tablerow for parsing entry if (self.inWatchlist or self.inReadlist) and tag == 'tr': self.inRow = True self.tdCount = 0 if self.inWatchlist or self.inReadlist: self.anime = dict() #table data for parsing info if self.inRow and tag == 'td': self.tdCount +=1 self.inData = True if self.inData and tag == 'a' and self.tdCount == 2: self.inName = True if self.inWatchlist or self.inReadlist: self.anime['link'] = attrs[2][1] if self.inData and tag == 'img': if 'online' in attrs[0][1]: self.anime['new'] = True else: self.anime['new'] = False if (self.inWatchlist or self.inReadlist) and tag == 'th': self.inHeader = True def handle_endtag(self, tag): if tag == 'h4' and self.inh4: self.inh4 = False if tag == 'table' and self.inReadlist: self.inReadlist = False if tag == 'table' and self.inWatchlist: self.inWatchlist = False if tag == 'tr' and self.inRow: self.inRow = False if self.inHeader: self.inHeader = False else: if self.inWatchlist: self.watchlist.append(self.anime) if self.inReadlist: self.readlist.append(self.anime) if tag == 'td' and self.inData: self.inData = False if self.inName and tag == 'a': self.inName = False def handle_data(self, data): if self.inh4 == True and data == 'Watchlist (Anime)': self.inWatchlist = True if self.inh4 and data == 'Readlist (Manga)': self.inReadlist = True if self.inName: if self.inWatchlist or self.inReadlist: self.anime['name'] = data