from html.parser import HTMLParser from html.entities import name2codepoint class chapterParser(HTMLParser): inScript = False serverurl = "" images = [] token = "" def handle_starttag(self, tag, attrs): if tag=='script': self.inScript = True if tag == 'input' and len(attrs) > 2: if attrs[2][1] == 'proxerToken': self.token = attrs[1][1] def handle_data(self, data): if self.inScript: if 'var serverurl' in data: self.serverurl = data[data.index('serverurl')+16:len(data)-5] self.serverurl = self.serverurl.replace('\\','').replace(';','') #print("serverurl " + self.serverurl) arrString = data[data.index('var pages')+ 14:] file = "" tempStr = "" width = 0 height = 0 fileDict = dict() commaCount = 0 arrString = arrString[:arrString.index(';') - 1] print(arrString) for c in arrString: if c == '[': fileDict = dict() if commaCount == 0 and c != '[' and c != ',': if c != '"': file += c if c == ',': commaCount += 1 tempStr = '' if commaCount == 1 and c != ',' and c != ']': tempStr += c height = int(tempStr) if commaCount == 2 and c != ',' and c != ']': tempStr += c width = int(tempStr) if c == ']': fileDict['file'] = file fileDict['width'] = width fileDict['height'] = height self.images.append(fileDict) file = "" tempStr = "" commaCount = -1 print(str(self.images)) def handle_endtag(self, tag): if self.inScript and tag=='script': self.inScript = False