lesezeichen.py 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. from getpass import getpass
  2. import requests
  3. from html.parser import HTMLParser
  4. from html.entities import name2codepoint
  5. class lesezeichenParser(HTMLParser):
  6. inHeader = False
  7. watchlist = []
  8. readlist = []
  9. inh4 = False
  10. inWatchlist = False
  11. inReadlist = False
  12. inRow = False
  13. inData = False
  14. tdCount = 0
  15. inName = False
  16. anime = dict()
  17. def handle_starttag(self, tag, attrs):
  18. #headline to diff read/watchlist
  19. if tag == 'h4':
  20. self.inh4 = True
  21. #tablerow for parsing entry
  22. if (self.inWatchlist or self.inReadlist) and tag == 'tr':
  23. self.inRow = True
  24. self.tdCount = 0
  25. if self.inWatchlist or self.inReadlist:
  26. self.anime = dict()
  27. #table data for parsing info
  28. if self.inRow and tag == 'td':
  29. self.tdCount +=1
  30. self.inData = True
  31. if self.inData and tag == 'a' and self.tdCount == 2:
  32. self.inName = True
  33. if self.inWatchlist or self.inReadlist:
  34. self.anime['link'] = attrs[2][1]
  35. if self.inData and tag == 'img':
  36. if 'online' in attrs[0][1]:
  37. self.anime['new'] = True
  38. else:
  39. self.anime['new'] = False
  40. if (self.inWatchlist or self.inReadlist) and tag == 'th':
  41. self.inHeader = True
  42. def handle_endtag(self, tag):
  43. if tag == 'h4' and self.inh4:
  44. self.inh4 = False
  45. if tag == 'table' and self.inReadlist:
  46. self.inReadlist = False
  47. if tag == 'table' and self.inWatchlist:
  48. self.inWatchlist = False
  49. if tag == 'tr' and self.inRow:
  50. self.inRow = False
  51. if self.inHeader:
  52. self.inHeader = False
  53. else:
  54. if self.inWatchlist:
  55. self.watchlist.append(self.anime)
  56. if self.inReadlist:
  57. self.readlist.append(self.anime)
  58. if tag == 'td' and self.inData:
  59. self.inData = False
  60. if self.inName and tag == 'a':
  61. self.inName = False
  62. def handle_data(self, data):
  63. if self.inh4 == True and data == 'Watchlist (Anime)':
  64. self.inWatchlist = True
  65. if self.inh4 and data == 'Readlist (Manga)':
  66. self.inReadlist = True
  67. if (self.inWatchlist or self.inReadlist) and self.inData and self.tdCount == 3:
  68. self.anime['num'] = data
  69. if self.inName:
  70. if self.inWatchlist or self.inReadlist:
  71. self.anime['name'] = data