浏览代码

added chapter parser

Phil 5 年之前
父节点
当前提交
97fea18bcd
共有 2 个文件被更改,包括 47 次插入6 次删除
  1. 22 6
      main.py
  2. 25 0
      parsers/chapter.py

+ 22 - 6
main.py

@@ -6,6 +6,7 @@ from subprocess import call
 import subprocess
 from parsers.episode import episodeParser
 from parsers.lesezeichen import lesezeichenParser
+from parsers.chapter import chapterParser
 
 def main():
     while True:
@@ -16,6 +17,7 @@ def main():
         print('4 - Lesezeichen - Manga')
         print('5 - Lesezeichen - dummy')
         print('6 - Episode test')
+        print('7 - Chapter test')
         print('99 - exit')
 
         uin = input('$>: ')
@@ -39,16 +41,30 @@ def main():
         if uin == '6':
             episode_test()
 
+        if uin == '7':
+            chapter_dummy()
+
         if uin =='99':
             exit()
 
-def chapter_dummy(sess,ep):
+def chapter_dummy():
+    #url = 'https://proxer.me' + ep['link'].replace('chapter','read').replace('#top','')+'/1'
+    #print(url)
+    #response = sess.get(url)
+    f = open('dummys/chapter.html','r')
+    content = f.readlines()
+    f.close()
+    
+    
+def chapter(sess,ep):
     url = 'https://proxer.me' + ep['link'].replace('chapter','read').replace('#top','')+'/1'
-    print(url)
     response = sess.get(url)
-    f = open('dummys/chapter.html','w')
-    f.write(str(response.content))
-    f.close()
+    content = response.content
+    chapPars = chapterParser()
+    chapPars.feed(str(content))
+    print(chapPars.imageCount)
+    print(chapPars.serverurl)
+
 
 def episode(sess,episode):
     response = sess.get('https://proxer.me'+ep['link'])
@@ -105,7 +121,7 @@ def LesezeichenAll(sess):
         episode(sess,ep)
     else:
         ep = readlist[int(uin.replace('r',''))]
-        chapter_dummy(sess,ep)
+        chapter(sess,ep)
     print(link)
     
 

+ 25 - 0
parsers/chapter.py

@@ -0,0 +1,25 @@
+from html.parser import HTMLParser
+from html.entities import name2codepoint
+
+class chapterParser(HTMLParser):
+    inScript = False
+    serverurl = ""
+    imageCount = 0
+    def handle_starttag(self, tag, attrs):
+        if tag=='script':
+            self.inScript = True
+    def handle_data(self, data):
+        if self.inScript:
+            if 'var serverurl' in data:
+                self.serverurl = data[data.index('serverurl')+16:len(data)-5]
+                self.serverurl = self.serverurl.replace('\\','').replace(';','')
+                #print("serverurl " + self.serverurl)
+                imageString = data[data.rindex('[')+2:data.rindex(']')]                
+                imageString = imageString[:imageString.index('"')]
+                imageString = imageString.replace('.jpg','')
+                self.imageCount = int(imageString)
+                #print(imageCount)
+                
+    def handle_endtag(self, tag):
+        if self.inScript and tag=='script':
+            self.inScript = False