from html.parser import HTMLParser
from html.entities import name2codepoint
class chapterParser(HTMLParser):
inScript = False
serverurl = ""
images = []
token = ""
def handle_starttag(self, tag, attrs):
if tag=='script':
self.inScript = True
if tag == 'input' and len(attrs) > 2:
if attrs[2][1] == 'proxerToken':
self.token = attrs[1][1]
def handle_data(self, data):
if self.inScript:
if 'var serverurl' in data:
self.serverurl = data[data.index('serverurl')+16:len(data)-5]
self.serverurl = self.serverurl.replace('\\','').replace(';','')
print("serverurl " + self.serverurl)
arrString = data[data.index('var pages')+ 14:]
file = ""
tempStr = ""
width = 0
height = 0
fileDict = dict()
commaCount = 0
arrString = arrString[:arrString.index(';') - 1]
print(arrString)
for c in arrString:
if c == '[':
fileDict = dict()
if commaCount == 0 and c != '[' and c != ',':
if c != '"':
file += c
if c == ',':
commaCount += 1
tempStr = ''
if commaCount == 1 and c != ',' and c != ']':
tempStr += c
height = int(tempStr)
if commaCount == 2 and c != ',' and c != ']':
tempStr += c
width = int(tempStr)
if c == ']':
fileDict['file'] = file
fileDict['width'] = width
fileDict['height'] = height
self.images.append(fileDict)
file = ""
tempStr = ""
commaCount = -1
print(str(self.images))
def handle_endtag(self, tag):
if self.inScript and tag=='script':
self.inScript = False