chapter.py 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. from html.parser import HTMLParser
  2. from html.entities import name2codepoint
  3. class chapterParser(HTMLParser):
  4. inScript = False
  5. serverurl = ""
  6. images = []
  7. def handle_starttag(self, tag, attrs):
  8. if tag=='script':
  9. self.inScript = True
  10. def handle_data(self, data):
  11. if self.inScript:
  12. if 'var serverurl' in data:
  13. self.serverurl = data[data.index('serverurl')+16:len(data)-5]
  14. self.serverurl = self.serverurl.replace('\\','').replace(';','')
  15. #print("serverurl " + self.serverurl)
  16. arrString = data[data.index('var pages')+ 14:]
  17. file = ""
  18. tempStr = ""
  19. width = 0
  20. height = 0
  21. fileDict = dict()
  22. commaCount = 0
  23. arrString = arrString[:arrString.index(';') - 1]
  24. print(arrString)
  25. for c in arrString:
  26. if c == '[':
  27. fileDict = dict()
  28. if commaCount == 0 and c != '[' and c != ',':
  29. if c != '"':
  30. file += c
  31. if c == ',':
  32. commaCount += 1
  33. tempStr = ''
  34. if commaCount == 1 and c != ',' and c != ']':
  35. tempStr += c
  36. height = int(tempStr)
  37. if commaCount == 2 and c != ',' and c != ']':
  38. tempStr += c
  39. width = int(tempStr)
  40. if c == ']':
  41. fileDict['file'] = file
  42. fileDict['width'] = width
  43. fileDict['height'] = height
  44. self.images.append(fileDict)
  45. file = ""
  46. tempStr = ""
  47. commaCount = -1
  48. print(str(self.images))
  49. def handle_endtag(self, tag):
  50. if self.inScript and tag=='script':
  51. self.inScript = False