1. 封装获取单个节目的epg成函数,保存成文件tvmao.py
#!/usr/bin/python # coding: utf-8 # by 黑鸟博客 import urllib3 import requests import datetime import time import base64 import ssl import json #import http.cookiejar from bs4 import BeautifulSoup def is_valid_date(strdate): try: if ":" in strdate: time.strptime(strdate, "%H:%M") else: return False return True except: return False def sub_req(a, q, id): _keyStr = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="; str1 = "|"+q; v = base64.b64encode(str1.encode('utf-8')); str2 = id+"|"+a; w = base64.b64encode(str2.encode('utf-8')); str3 = time.strftime("%w"); wday = (7 if(int(str3) == 0) else int(str3)); #print(wday); F = _keyStr[wday*wday]; return (F+str(w,'utf-8')+str(v,'utf-8')); def get_program_info(link, sublink, week_day, epg_file_name): with open(epg_file_name, "a+") as f: str3 = time.strftime("%Y/%m/%d %A",time.localtime(time.time()+(week_day-int(time.strftime("%w")))*24*3600) ); #str3 = datetime.date.today()+datetime.timedelta(days = (1-int(time.strftime("%w")))) f.write(str3) f.write("\n\n") f.close() headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101 Firefox/59.0', 'Connection' : 'keep-alive', 'Cache-Control': 'no-cache'} website = '%s%s' % (link, sublink) r = requests.get(website, headers= headers) soup = BeautifulSoup(r.text, 'lxml') # 获取节目列表,https://www.guihet.com/ list_program_div = soup.find(name='div',attrs={"class":"epg"}).find_all(name='span'); with open(epg_file_name, "a+") as f: for tagprogram in list_program_div: #print(tagprogram) try: if is_valid_date(tagprogram.text): f.write(tagprogram.text) f.write(" ") else: if tagprogram.text != '正在播出': f.write(tagprogram.text) f.write("\n") except: continue f.close() list_first_form = soup.find(name='form'); sublink = "/api/pg?p="+sub_req(list_first_form["a"], list_first_form["q"], list_first_form.button["id"]); website = '%s%s' % (link, sublink); sub_r = requests.get(website); soup = BeautifulSoup(sub_r.json()[1], 'lxml') list_program_div = soup.find_all(name='span'); with open(epg_file_name, "a+") as f: for tagprogram in list_program_div: try: if is_valid_date(tagprogram.text): f.write(tagprogram.text) f.write(" ") else: if tagprogram.text != '正在播出': f.write(tagprogram.text) f.write("\n") except: continue f.write("\n\n") f.close() def get_program(link, sublink, week_day, epg_file_name): get_program_info(link, sublink, week_day, epg_file_name);
2. 调用 tvmao 封装的函数,实现多节目epg 获取。
#!/usr/bin/python # coding: utf-8 # by 黑鸟博客 import os import tvmao link = "https://www.tvmao.com" #中央 , 名字在网址里复制,https://www.guihet.com/ CCTV_prog = ['CCTV1', 'CCTV2', 'CCTV3', 'CCTV4', 'CCTV5', 'CCTV6'] epg_path = 'epg/cctv/'; if not os.path.exists(epg_path): os.makedirs(epg_path) for prog in CCTV_prog: epg_name = epg_path+prog+'.txt'; with open(epg_name, "w+") as f: f.write("") f.close() print(prog) for num in range(1, 8): sublink = "/program/CCTV-"+prog+"-w"+str(num)+".html"; tvmao.get_program(link, sublink, num, epg_name); #省台 , 名字在网址里复制,https://www.guihet.com/ province_prog = ['AHTV1', 'BTV1', 'CCQTV1', 'FJTV2', 'XMTV5', 'HUNANTV1'] epg_path = 'epg/province/'; if not os.path.exists(epg_path): os.makedirs(epg_path) for prog in province_prog: epg_name = epg_path+prog+'.txt'; with open(epg_name, "w+") as f: f.write("") f.close() print(prog) for num in range(1, 8): sublink = "/program_satellite/"+prog+"-w"+str(num)+".html"; tvmao.get_program(link, sublink, num, epg_name);
剩下的就是根据需求对 EPG 文件的处理了。
说明: 当期的节目路径是手动从网页上复制的, 需要哪个节目,添加到列表中。需要人工确认路径正确性。
下一步要完成的是实现从网页获取节目的路径, 实现全自动化。
算法好像变了,能更新一下吗
寻求 能解析国外直播电视网的高手 有偿解析 qq1376214911 http://onair.kbs.co.kr/
老实说,电视猫很不错