# -*- coding: utf-8 -*- from html.parser import HTMLParser import logging import urllib3 import re class MyHTMLParser(HTMLParser): test = [] startFlag = False def handle_data(self, data): new_data = data.replace(' ','').replace(',','') ret = re.match(r'^\d{4}$', new_data) #只需要4位數的股票,權證之類不用. if ret != None and self.startFlag == False: self.startFlag = True self.test.append(new_data) elif len(self.test) < 10 and self.startFlag == True: self.test.append(new_data) elif len(self.test) == 10: for w in self.test: print(w, end ='*') print(' ') print('===============') self.startFlag = False self.test[:] = [] def main(): http = urllib3.PoolManager() r = http.request('GET', 'http://mops.twse.com.tw/nas/t21/sii/t21sc03_106_6_0.html') FORMAT = '%(message)s' logging.basicConfig(level=logging.DEBUG, format=FORMAT, filename='system.log') #logging.debug(r.data.decode('big5', 'ignore')) #print(r.data) parser = MyHTMLParser() parser.feed(r.data.decode('big5', 'ignore')) #gg = ' 5,106,247' #print(gg.replace(' ', '').replace(',', '')) if __name__ == '__main__': main()
效果:
1213*大飲*46321*49849*70686*-7.07*-34.46*295094*312894*-5.68*
沒有留言:
張貼留言