| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485 |
- from getpass import getpass
- import requests
- from html.parser import HTMLParser
- from html.entities import name2codepoint
- class lesezeichenParser(HTMLParser):
- inHeader = False
- watchlist = []
- readlist = []
- inh4 = False
- inWatchlist = False
- inReadlist = False
- inRow = False
- inData = False
- tdCount = 0
- inName = False
- anime = dict()
-
- def handle_starttag(self, tag, attrs):
- #headline to diff read/watchlist
- if tag == 'h4':
- self.inh4 = True
-
- #tablerow for parsing entry
- if (self.inWatchlist or self.inReadlist) and tag == 'tr':
- self.inRow = True
- self.tdCount = 0
- if self.inWatchlist or self.inReadlist:
- self.anime = dict()
- #table data for parsing info
- if self.inRow and tag == 'td':
- self.tdCount +=1
- self.inData = True
- if self.inData and tag == 'a' and self.tdCount == 2:
- self.inName = True
- if self.inWatchlist or self.inReadlist:
- self.anime['link'] = attrs[2][1]
- if self.inData and tag == 'img':
- if 'online' in attrs[0][1]:
- self.anime['new'] = True
- else:
- self.anime['new'] = False
-
- if (self.inWatchlist or self.inReadlist) and tag == 'th':
- self.inHeader = True
- def handle_endtag(self, tag):
- if tag == 'h4' and self.inh4:
- self.inh4 = False
- if tag == 'table' and self.inReadlist:
- self.inReadlist = False
- if tag == 'table' and self.inWatchlist:
- self.inWatchlist = False
- if tag == 'tr' and self.inRow:
- self.inRow = False
- if self.inHeader:
- self.inHeader = False
- else:
- if self.inWatchlist:
- self.watchlist.append(self.anime)
- if self.inReadlist:
- self.readlist.append(self.anime)
- if tag == 'td' and self.inData:
- self.inData = False
- if self.inName and tag == 'a':
- self.inName = False
- def handle_data(self, data):
- if self.inh4 == True and data == 'Watchlist (Anime)':
- self.inWatchlist = True
- if self.inh4 and data == 'Readlist (Manga)':
- self.inReadlist = True
- if (self.inWatchlist or self.inReadlist) and self.inData and self.tdCount == 3:
- self.anime['num'] = data
- if self.inName:
- if self.inWatchlist or self.inReadlist:
- self.anime['name'] = data
|