| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758 |
- from html.parser import HTMLParser
- from html.entities import name2codepoint
- class chapterParser(HTMLParser):
- inScript = False
- serverurl = ""
- images = []
- token = ""
- def handle_starttag(self, tag, attrs):
- if tag=='script':
- self.inScript = True
- if tag == 'input' and len(attrs) > 2:
- if attrs[2][1] == 'proxerToken':
- self.token = attrs[1][1]
- def handle_data(self, data):
- if self.inScript:
- if 'var serverurl' in data:
- self.serverurl = data[data.index('serverurl')+16:len(data)-5]
- self.serverurl = self.serverurl.replace('\\','').replace(';','')
- print("serverurl " + self.serverurl)
- arrString = data[data.index('var pages')+ 14:]
- file = ""
- tempStr = ""
- width = 0
- height = 0
- fileDict = dict()
- commaCount = 0
- arrString = arrString[:arrString.index(';') - 1]
- print(arrString)
- for c in arrString:
- if c == '[':
- fileDict = dict()
- if commaCount == 0 and c != '[' and c != ',':
- if c != '"':
- file += c
- if c == ',':
- commaCount += 1
- tempStr = ''
- if commaCount == 1 and c != ',' and c != ']':
- tempStr += c
- height = int(tempStr)
- if commaCount == 2 and c != ',' and c != ']':
- tempStr += c
- width = int(tempStr)
- if c == ']':
- fileDict['file'] = file
- fileDict['width'] = width
- fileDict['height'] = height
- self.images.append(fileDict)
- file = ""
- tempStr = ""
- commaCount = -1
- print(str(self.images))
-
- def handle_endtag(self, tag):
- if self.inScript and tag=='script':
- self.inScript = False
|