Bladeren bron

parsing pages array and saving to manga/title/chapter | lesezeichen parser chapter/episode item added

Phil 5 jaren geleden
bovenliggende
commit
7170c276fa
3 gewijzigde bestanden met toevoegingen van 45 en 7 verwijderingen
  1. 8 1
      main.py
  2. 34 6
      parsers/chapter.py
  3. 3 0
      parsers/lesezeichen.py

+ 8 - 1
main.py

@@ -54,6 +54,9 @@ def chapter_dummy():
     f = open('dummys/chapter.html','r')
     content = f.readlines()
     f.close()
+    chapPars = chapterParser()
+    chapPars.feed(str(content))
+
     
     
 def chapter(sess,ep):
@@ -62,8 +65,12 @@ def chapter(sess,ep):
     content = response.content
     chapPars = chapterParser()
     chapPars.feed(str(content))
-    print(chapPars.imageCount)
+    path = 'manga/'+ep['name']+'/'+ep['num']+'/'
+    if not os.path.exists(path):
+        os.makedirs(path)
     print(chapPars.serverurl)
+    for p in chapPars.images:
+        call(['curl',chapPars.serverurl + p['file'],'-o',path+p['file']])
 
 
 def episode(sess,episode):

+ 34 - 6
parsers/chapter.py

@@ -4,7 +4,7 @@ from html.entities import name2codepoint
 class chapterParser(HTMLParser):
     inScript = False
     serverurl = ""
-    imageCount = 0
+    images = []
     def handle_starttag(self, tag, attrs):
         if tag=='script':
             self.inScript = True
@@ -14,11 +14,39 @@ class chapterParser(HTMLParser):
                 self.serverurl = data[data.index('serverurl')+16:len(data)-5]
                 self.serverurl = self.serverurl.replace('\\','').replace(';','')
                 #print("serverurl " + self.serverurl)
-                imageString = data[data.rindex('[')+2:data.rindex(']')]                
-                imageString = imageString[:imageString.index('"')]
-                imageString = imageString.replace('.jpg','')
-                self.imageCount = int(imageString)
-                #print(imageCount)
+                arrString = data[data.index('var pages')+ 14:]
+                file = ""
+                tempStr = ""
+                width = 0
+                height = 0
+                fileDict = dict()
+                commaCount = 0
+                arrString = arrString[:arrString.index(';') - 1]
+                print(arrString)
+                for c in arrString:
+                    if c == '[':
+                        fileDict = dict()
+                    if commaCount == 0 and c != '[' and c != ',':
+                        if c != '"':
+                            file += c
+                    if c == ',':
+                        commaCount += 1
+                        tempStr = ''
+                    if commaCount == 1 and c != ',' and c != ']':
+                        tempStr += c
+                        height = int(tempStr)
+                    if commaCount == 2 and c != ',' and c != ']':
+                        tempStr += c
+                        width = int(tempStr)
+                    if c == ']':
+                        fileDict['file'] = file
+                        fileDict['width'] = width
+                        fileDict['height'] = height
+                        self.images.append(fileDict)
+                        file = ""
+                        tempStr = ""
+                        commaCount = -1
+                print(str(self.images))
                 
     def handle_endtag(self, tag):
         if self.inScript and tag=='script':

+ 3 - 0
parsers/lesezeichen.py

@@ -76,6 +76,9 @@ class lesezeichenParser(HTMLParser):
         if self.inh4 and data == 'Readlist (Manga)':
            self.inReadlist = True
 
+        if (self.inWatchlist or self.inReadlist) and self.inData and self.tdCount == 3:
+            self.anime['num'] = data
+
         if self.inName:
             if self.inWatchlist or self.inReadlist:
                 self.anime['name'] = data