# -*- coding: utf-8 -*-
from html.parser import HTMLParser
import logging
import urllib3
import re
class MyHTMLParser(HTMLParser):
test = []
startFlag = False
def handle_data(self, data):
new_data = data.replace(' ','').replace(',','')
ret = re.match(r'^\d{4}$', new_data) #只需要4位數的股票,權證之類不用.
if ret != None and self.startFlag == False:
self.startFlag = True
self.test.append(new_data)
elif len(self.test) < 10 and self.startFlag == True:
self.test.append(new_data)
elif len(self.test) == 10:
for w in self.test:
print(w, end ='*')
print(' ')
print('===============')
self.startFlag = False
self.test[:] = []
def main():
http = urllib3.PoolManager()
r = http.request('GET', 'http://mops.twse.com.tw/nas/t21/sii/t21sc03_106_6_0.html')
FORMAT = '%(message)s'
logging.basicConfig(level=logging.DEBUG, format=FORMAT, filename='system.log')
#logging.debug(r.data.decode('big5', 'ignore'))
#print(r.data)
parser = MyHTMLParser()
parser.feed(r.data.decode('big5', 'ignore'))
#gg = ' 5,106,247'
#print(gg.replace(' ', '').replace(',', ''))
if __name__ == '__main__':
main()
效果:
1213*大飲*46321*49849*70686*-7.07*-34.46*295094*312894*-5.68*
沒有留言:
張貼留言