Phil 5 gadi atpakaļ
vecāks
revīzija
5be7ee052a

BIN
parsers/__pycache__/episode.cpython-38.pyc


BIN
parsers/__pycache__/lesezeichen.cpython-38.pyc


+ 32 - 0
parsers/episode.py

@@ -0,0 +1,32 @@
+from getpass import getpass
+import requests
+from html.parser import HTMLParser
+from html.entities import name2codepoint
+    
+class episodeParser(HTMLParser):
+    inScript = False
+    code = ""
+
+    def handle_starttag(self, tag, attrs):
+        if tag == 'script':
+            self.inScript = True
+
+
+    def handle_data(self, data):
+        if self.inScript:
+            lines = data.replace('\\n','').split(';')
+            streams = lines[0][lines[0].index('['):]
+            streams = streams.split('}')
+            print(streams[0])
+            typeIndex = streams[0].index('type') + 7
+            if streams[0][typeIndex:typeIndex + 13] == 'proxer-stream':
+                print('proxer-steram')
+                codeStart = streams[0].index('code') + 7
+                code = streams[0][codeStart:]
+                code = code[:code.index('"')]
+                print(code)
+                self.code = code
+
+    def handle_endtag(self, tag):
+        if tag == 'script' and self.inScript:
+            self.inScript = False

+ 82 - 0
parsers/lesezeichen.py

@@ -0,0 +1,82 @@
+from getpass import getpass
+import requests
+from html.parser import HTMLParser
+from html.entities import name2codepoint    
+
+class lesezeichenParser(HTMLParser):
+    inHeader = False
+    watchlist = []
+    readlist = []
+    inh4 = False
+    inWatchlist = False
+    inReadlist = False
+    inRow = False
+    inData = False
+    tdCount = 0
+    inName = False
+    anime = dict()
+
+    
+    def handle_starttag(self, tag, attrs):
+        #headline to diff read/watchlist
+        if tag == 'h4':
+            self.inh4 = True
+        
+        #tablerow for parsing entry
+        if (self.inWatchlist or self.inReadlist) and tag == 'tr':
+            self.inRow = True
+            self.tdCount = 0
+            if self.inWatchlist or self.inReadlist:
+                self.anime = dict()
+
+        #table data for parsing info
+        if self.inRow and tag == 'td':
+            self.tdCount +=1
+            self.inData = True
+
+        if self.inData and tag == 'a' and self.tdCount == 2:
+            self.inName = True
+            if self.inWatchlist or self.inReadlist:
+                self.anime['link'] = attrs[2][1]
+
+        if self.inData and tag == 'img':
+            if 'online' in attrs[0][1]:
+                self.anime['new'] = True
+            else:
+                self.anime['new'] = False
+            
+        if (self.inWatchlist or self.inReadlist) and tag == 'th':
+            self.inHeader = True
+
+    def handle_endtag(self, tag):
+        if tag == 'h4' and self.inh4:
+            self.inh4 = False
+        if tag == 'table' and self.inReadlist:
+            self.inReadlist = False
+        if tag == 'table' and self.inWatchlist:
+            self.inWatchlist = False
+        if tag == 'tr' and self.inRow:
+            self.inRow = False
+            if self.inHeader:
+                self.inHeader = False
+            else:
+                if self.inWatchlist:
+                    self.watchlist.append(self.anime)
+                if self.inReadlist:
+                    self.readlist.append(self.anime)
+        if tag == 'td' and self.inData:
+            self.inData = False
+        if self.inName and tag == 'a':
+            self.inName = False
+
+    def handle_data(self, data):
+        if self.inh4 == True and data == 'Watchlist (Anime)':
+            self.inWatchlist = True
+
+        if self.inh4 and data == 'Readlist (Manga)':
+           self.inReadlist = True
+
+        if self.inName:
+            if self.inWatchlist or self.inReadlist:
+                self.anime['name'] = data
+