|
|
@@ -4,7 +4,7 @@ from html.entities import name2codepoint
|
|
|
class chapterParser(HTMLParser):
|
|
|
inScript = False
|
|
|
serverurl = ""
|
|
|
- imageCount = 0
|
|
|
+ images = []
|
|
|
def handle_starttag(self, tag, attrs):
|
|
|
if tag=='script':
|
|
|
self.inScript = True
|
|
|
@@ -14,11 +14,39 @@ class chapterParser(HTMLParser):
|
|
|
self.serverurl = data[data.index('serverurl')+16:len(data)-5]
|
|
|
self.serverurl = self.serverurl.replace('\\','').replace(';','')
|
|
|
#print("serverurl " + self.serverurl)
|
|
|
- imageString = data[data.rindex('[')+2:data.rindex(']')]
|
|
|
- imageString = imageString[:imageString.index('"')]
|
|
|
- imageString = imageString.replace('.jpg','')
|
|
|
- self.imageCount = int(imageString)
|
|
|
- #print(imageCount)
|
|
|
+ arrString = data[data.index('var pages')+ 14:]
|
|
|
+ file = ""
|
|
|
+ tempStr = ""
|
|
|
+ width = 0
|
|
|
+ height = 0
|
|
|
+ fileDict = dict()
|
|
|
+ commaCount = 0
|
|
|
+ arrString = arrString[:arrString.index(';') - 1]
|
|
|
+ print(arrString)
|
|
|
+ for c in arrString:
|
|
|
+ if c == '[':
|
|
|
+ fileDict = dict()
|
|
|
+ if commaCount == 0 and c != '[' and c != ',':
|
|
|
+ if c != '"':
|
|
|
+ file += c
|
|
|
+ if c == ',':
|
|
|
+ commaCount += 1
|
|
|
+ tempStr = ''
|
|
|
+ if commaCount == 1 and c != ',' and c != ']':
|
|
|
+ tempStr += c
|
|
|
+ height = int(tempStr)
|
|
|
+ if commaCount == 2 and c != ',' and c != ']':
|
|
|
+ tempStr += c
|
|
|
+ width = int(tempStr)
|
|
|
+ if c == ']':
|
|
|
+ fileDict['file'] = file
|
|
|
+ fileDict['width'] = width
|
|
|
+ fileDict['height'] = height
|
|
|
+ self.images.append(fileDict)
|
|
|
+ file = ""
|
|
|
+ tempStr = ""
|
|
|
+ commaCount = -1
|
|
|
+ print(str(self.images))
|
|
|
|
|
|
def handle_endtag(self, tag):
|
|
|
if self.inScript and tag=='script':
|