|
|
@@ -0,0 +1,82 @@
|
|
|
+from getpass import getpass
|
|
|
+import requests
|
|
|
+from html.parser import HTMLParser
|
|
|
+from html.entities import name2codepoint
|
|
|
+
|
|
|
+class lesezeichenParser(HTMLParser):
|
|
|
+ inHeader = False
|
|
|
+ watchlist = []
|
|
|
+ readlist = []
|
|
|
+ inh4 = False
|
|
|
+ inWatchlist = False
|
|
|
+ inReadlist = False
|
|
|
+ inRow = False
|
|
|
+ inData = False
|
|
|
+ tdCount = 0
|
|
|
+ inName = False
|
|
|
+ anime = dict()
|
|
|
+
|
|
|
+
|
|
|
+ def handle_starttag(self, tag, attrs):
|
|
|
+ #headline to diff read/watchlist
|
|
|
+ if tag == 'h4':
|
|
|
+ self.inh4 = True
|
|
|
+
|
|
|
+ #tablerow for parsing entry
|
|
|
+ if (self.inWatchlist or self.inReadlist) and tag == 'tr':
|
|
|
+ self.inRow = True
|
|
|
+ self.tdCount = 0
|
|
|
+ if self.inWatchlist or self.inReadlist:
|
|
|
+ self.anime = dict()
|
|
|
+
|
|
|
+ #table data for parsing info
|
|
|
+ if self.inRow and tag == 'td':
|
|
|
+ self.tdCount +=1
|
|
|
+ self.inData = True
|
|
|
+
|
|
|
+ if self.inData and tag == 'a' and self.tdCount == 2:
|
|
|
+ self.inName = True
|
|
|
+ if self.inWatchlist or self.inReadlist:
|
|
|
+ self.anime['link'] = attrs[2][1]
|
|
|
+
|
|
|
+ if self.inData and tag == 'img':
|
|
|
+ if 'online' in attrs[0][1]:
|
|
|
+ self.anime['new'] = True
|
|
|
+ else:
|
|
|
+ self.anime['new'] = False
|
|
|
+
|
|
|
+ if (self.inWatchlist or self.inReadlist) and tag == 'th':
|
|
|
+ self.inHeader = True
|
|
|
+
|
|
|
+ def handle_endtag(self, tag):
|
|
|
+ if tag == 'h4' and self.inh4:
|
|
|
+ self.inh4 = False
|
|
|
+ if tag == 'table' and self.inReadlist:
|
|
|
+ self.inReadlist = False
|
|
|
+ if tag == 'table' and self.inWatchlist:
|
|
|
+ self.inWatchlist = False
|
|
|
+ if tag == 'tr' and self.inRow:
|
|
|
+ self.inRow = False
|
|
|
+ if self.inHeader:
|
|
|
+ self.inHeader = False
|
|
|
+ else:
|
|
|
+ if self.inWatchlist:
|
|
|
+ self.watchlist.append(self.anime)
|
|
|
+ if self.inReadlist:
|
|
|
+ self.readlist.append(self.anime)
|
|
|
+ if tag == 'td' and self.inData:
|
|
|
+ self.inData = False
|
|
|
+ if self.inName and tag == 'a':
|
|
|
+ self.inName = False
|
|
|
+
|
|
|
+ def handle_data(self, data):
|
|
|
+ if self.inh4 == True and data == 'Watchlist (Anime)':
|
|
|
+ self.inWatchlist = True
|
|
|
+
|
|
|
+ if self.inh4 and data == 'Readlist (Manga)':
|
|
|
+ self.inReadlist = True
|
|
|
+
|
|
|
+ if self.inName:
|
|
|
+ if self.inWatchlist or self.inReadlist:
|
|
|
+ self.anime['name'] = data
|
|
|
+
|