chapter.py 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. from html.parser import HTMLParser
  2. from html.entities import name2codepoint
  3. class chapterParser(HTMLParser):
  4. inScript = False
  5. serverurl = ""
  6. images = []
  7. token = ""
  8. def handle_starttag(self, tag, attrs):
  9. if tag=='script':
  10. self.inScript = True
  11. if tag == 'input' and len(attrs) > 2:
  12. if attrs[2][1] == 'proxerToken':
  13. self.token = attrs[1][1]
  14. def handle_data(self, data):
  15. if self.inScript:
  16. if 'var serverurl' in data:
  17. self.serverurl = data[data.index('serverurl')+16:len(data)-5]
  18. self.serverurl = self.serverurl.replace('\\','').replace(';','')
  19. print("serverurl " + self.serverurl)
  20. arrString = data[data.index('var pages')+ 14:]
  21. file = ""
  22. tempStr = ""
  23. width = 0
  24. height = 0
  25. fileDict = dict()
  26. commaCount = 0
  27. arrString = arrString[:arrString.index(';') - 1]
  28. print(arrString)
  29. for c in arrString:
  30. if c == '[':
  31. fileDict = dict()
  32. if commaCount == 0 and c != '[' and c != ',':
  33. if c != '"':
  34. file += c
  35. if c == ',':
  36. commaCount += 1
  37. tempStr = ''
  38. if commaCount == 1 and c != ',' and c != ']':
  39. tempStr += c
  40. height = int(tempStr)
  41. if commaCount == 2 and c != ',' and c != ']':
  42. tempStr += c
  43. width = int(tempStr)
  44. if c == ']':
  45. fileDict['file'] = file
  46. fileDict['width'] = width
  47. fileDict['height'] = height
  48. self.images.append(fileDict)
  49. file = ""
  50. tempStr = ""
  51. commaCount = -1
  52. print(str(self.images))
  53. def handle_endtag(self, tag):
  54. if self.inScript and tag=='script':
  55. self.inScript = False