Files
a/PY1/啪啪视频.py
2026-03-24 18:40:17 +08:00

107 lines
4.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# coding=utf-8
import re
from urllib.parse import urljoin
from base.spider import Spider
class Spider(Spider):
def getName(self):
return "PaPa视频"
def init(self, extend=""):
# 建议动态配置或确保此 host 正确
self.host = "http://202601.duduo.vip"
self.headers = {
"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1",
"Referer": self.host
}
def homeContent(self, filter):
# 根据图片 1000202100.jpg 准确提取分类 ID
result = {}
classes = [
{'type_name': '国产', 'type_id': '20'},
{'type_name': '无码', 'type_id': '21'},
{'type_name': '字幕', 'type_id': '22'},
{'type_name': '欧美', 'type_id': '23'},
{'type_name': '三级', 'type_id': '24'},
{'type_name': '动漫', 'type_id': '25'}
]
result['class'] = classes
return result
def _parse_vod_list(self, root):
"""公共解析逻辑:根据图片 1000202101.jpg 的 stui-vodlist 结构"""
videos = []
# 定位 li 容器,类名为 stui-vodlist__item
items = root.xpath("//li[contains(@class,'stui-vodlist__item')]")
for item in items:
try:
# 1. 提取标题:从 h4 标签下的 a 标签获取
name = item.xpath(".//h4[contains(@class,'title')]/a/text()")[0].strip()
# 2. 提取链接并截取 ID从 href="/6/index.php/vod/play/id/116255..." 中提取 116255
href = item.xpath(".//h4[contains(@class,'title')]/a/@href")[0]
vid = re.search(r'id/(\d+)', href).group(1)
# 3. 提取图片:优先获取 data-original (懒加载地址)
pic = item.xpath(".//a[contains(@class,'thumb')]/@data-original")
if not pic:
pic = item.xpath(".//a[contains(@class,'thumb')]/@src")
pic_url = urljoin(self.host, pic[0]) if pic else ""
# 4. 提取副标题/备注(如:点击播放 或 时长)
remark = item.xpath(".//span[contains(@class,'pic-text')]/text()")
videos.append({
"vod_id": vid,
"vod_name": name,
"vod_pic": pic_url,
"vod_remarks": remark[0].strip() if remark else ""
})
except Exception:
continue
return videos
def homeVideoContent(self):
# 首页通常展示最新更新
rsp = self.fetch(f"{self.host}/6/index.php", headers=self.headers)
root = self.html(self.cleanText(rsp.text))
return {'list': self._parse_vod_list(root)}
def categoryContent(self, tid, pg, filter, extend):
# 根据图片 1000202100.jpg 的链接格式拼接
# 格式示例:/6/index.php/vod/type/id/20.html
url = f"{self.host}/6/index.php/vod/type/id/{tid}/page/{pg}.html"
rsp = self.fetch(url, headers=self.headers)
root = self.html(self.cleanText(rsp.text))
return {'list': self._parse_vod_list(root), 'page': pg}
def detailContent(self, array):
vid = array[0]
# 直接构造播放页地址 (根据图片 1000202101.jpg 逻辑)
# 苹果CMS通常播放地址是 /vod/play/id/{vid}/sid/1/nid/1.html
play_url = f"{self.host}/6/index.php/vod/play/id/{vid}/sid/1/nid/1.html"
# 详情页信息通常需要再次请求 vid 对应的 detail 页面,这里简单处理直接跳播放
vod = {
"vod_id": vid,
"vod_name": "视频详情",
"vod_play_from": "PaPa线路",
"vod_play_url": f"点击播放${play_url}"
}
return {'list': [vod]}
def searchContent(self, key, quick, pg):
url = f"{self.host}/6/index.php/vod/search/page/{pg}/wd/{key}.html"
rsp = self.fetch(url, headers=self.headers)
root = self.html(self.cleanText(rsp.text))
return {'list': self._parse_vod_list(root), 'page': pg}
def playerContent(self, flag, id, vipFlags):
# 此类网站通常需要 web 嗅探
return {
"parse": 1,
"url": id,
"header": self.headers
}