xxxx
This commit is contained in:
251
PY1/JAV目录.py
Normal file
251
PY1/JAV目录.py
Normal file
@@ -0,0 +1,251 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from pyquery import PyQuery as pq
|
||||
from base64 import b64decode, b64encode
|
||||
from requests import Session
|
||||
|
||||
sys.path.append('..')
|
||||
from base.spider import Spider
|
||||
|
||||
|
||||
class Spider(Spider):
|
||||
def init(self, extend=""):
|
||||
self.headers['referer'] = f'{self.host}/'
|
||||
self.session = Session()
|
||||
self.session.headers.update(self.headers)
|
||||
|
||||
def getName(self):
|
||||
return "JAV目录大全"
|
||||
|
||||
def isVideoFormat(self, url):
|
||||
pass
|
||||
|
||||
def manualVideoCheck(self):
|
||||
pass
|
||||
|
||||
def destroy(self):
|
||||
pass
|
||||
|
||||
host = "https://javmenu.com"
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
||||
'sec-ch-ua': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-full-version': '"133.0.6943.98"',
|
||||
'sec-ch-ua-arch': '"x86"',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-ch-ua-platform-version': '"19.0.0"',
|
||||
'sec-ch-ua-model': '""',
|
||||
'sec-ch-ua-full-version-list': '"Not(A:Brand";v="99.0.0.0", "Google Chrome";v="133.0.6943.98", "Chromium";v="133.0.6943.98"',
|
||||
'dnt': '1',
|
||||
'upgrade-insecure-requests': '1',
|
||||
'sec-fetch-site': 'none',
|
||||
'sec-fetch-mode': 'navigate',
|
||||
'sec-fetch-user': '?1',
|
||||
'sec-fetch-dest': 'document',
|
||||
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
'priority': 'u=0, i'
|
||||
}
|
||||
|
||||
# -------------------- 业务接口 --------------------
|
||||
def homeContent(self, filter):
|
||||
cateManual = {
|
||||
"FC2在线": "/zh/fc2/online",
|
||||
"成人动画": "/zh/hanime/online",
|
||||
"国产在线": "/zh/chinese/online",
|
||||
"有码在线": "/zh/censored/online",
|
||||
"无码在线": "/zh/uncensored/online",
|
||||
"欧美在线": "/zh/western/online"
|
||||
}
|
||||
classes = [{'type_name': k, 'type_id': v} for k, v in cateManual.items()]
|
||||
return {'class': classes}
|
||||
|
||||
def homeVideoContent(self):
|
||||
data = self.getpq("/zh")
|
||||
return {'list': self.getlist(data(".video-list-item"))}
|
||||
|
||||
def categoryContent(self, tid, pg, filter, extend):
|
||||
url = f"{self.host}{tid}" if pg == '1' else f"{self.host}{tid}?page={pg}"
|
||||
data = self.getpq(url)
|
||||
return {
|
||||
'list': self.getlist(data(".video-list-item")),
|
||||
'page': pg,
|
||||
'pagecount': 9999,
|
||||
'limit': 90,
|
||||
'total': 999999
|
||||
}
|
||||
|
||||
def detailContent(self, ids):
|
||||
vod_id = ids[0]
|
||||
if not vod_id.startswith('http'):
|
||||
url = f"{self.host}{vod_id}"
|
||||
else:
|
||||
url = vod_id
|
||||
vod_id = vod_id.replace(self.host, '')
|
||||
data = self.getpq(url)
|
||||
vod = {
|
||||
'vod_id': vod_id,
|
||||
'vod_name': data('h1').text() or data('title').text().split(' - ')[0],
|
||||
'vod_pic': self.getCover(data),
|
||||
'vod_content': data('.card-text').text() or '',
|
||||
'vod_director': '',
|
||||
'vod_actor': self.getActors(data),
|
||||
'vod_area': '日本',
|
||||
'vod_year': self.getYear(data('.text-muted').text()),
|
||||
'vod_remarks': self.getRemarks(data),
|
||||
'vod_play_from': 'JAV在线',
|
||||
'vod_play_url': self.getPlaylist(data, url)
|
||||
}
|
||||
return {'list': [vod]}
|
||||
|
||||
def searchContent(self, key, quick, pg="1"):
|
||||
url = f"{self.host}/zh/search?wd={key}&page={pg}"
|
||||
data = self.getpq(url)
|
||||
return {'list': self.getlist(data(".video-list-item"))}
|
||||
|
||||
def playerContent(self, flag, id, vipFlags):
|
||||
return {'parse': 0, 'url': self.d64(id), 'header': self.headers}
|
||||
|
||||
# -------------------- 私有工具 --------------------
|
||||
def getlist(self, data):
|
||||
vlist = []
|
||||
for item in data.items():
|
||||
link = item('a').attr('href')
|
||||
if not link or '/zh/' not in link:
|
||||
continue
|
||||
link = link.replace(self.host, '') if link.startswith(self.host) else link
|
||||
name = item('.card-title').text() or item('img').attr('alt') or ''
|
||||
if not name:
|
||||
continue
|
||||
vlist.append({
|
||||
'vod_id': link,
|
||||
'vod_name': name.split(' - ')[0].strip(),
|
||||
'vod_pic': self.getListPicture(item),
|
||||
'vod_remarks': (item('.text-muted').text() or '').strip(),
|
||||
'style': {'ratio': 1.5, 'type': 'rect'}
|
||||
})
|
||||
return vlist
|
||||
|
||||
# ******** 修复版本:支持LazyLoad和正确过滤 ********
|
||||
def getListPicture(self, item):
|
||||
"""
|
||||
获取列表中的图片
|
||||
支持LazyLoad延迟加载机制
|
||||
过滤水印、占位符和无预览图
|
||||
"""
|
||||
# 获取所有img标签
|
||||
imgs = item('img')
|
||||
|
||||
for img in imgs.items():
|
||||
# 优先级:先从data-src获取(LazyLoad属性),再从src获取
|
||||
pic = img.attr('data-src') or img.attr('src')
|
||||
|
||||
# 过滤条件:排除水印、占位符、加载图片
|
||||
if pic and not any(keyword in pic for keyword in ['button_logo', 'no_preview', 'loading.gif', 'loading.png']):
|
||||
return pic
|
||||
|
||||
return ''
|
||||
|
||||
def getCover(self, data):
|
||||
"""
|
||||
获取详情页的图片
|
||||
支持LazyLoad延迟加载机制
|
||||
过滤水印、占位符和无预览图
|
||||
"""
|
||||
# 获取所有img标签
|
||||
imgs = data('img')
|
||||
|
||||
for img in imgs.items():
|
||||
# 优先级:先从data-src获取(LazyLoad属性),再从src获取
|
||||
pic = img.attr('data-src') or img.attr('src')
|
||||
|
||||
# 过滤条件:排除水印、占位符、加载图片
|
||||
if pic and not any(keyword in pic for keyword in ['button_logo', 'no_preview', 'loading.gif', 'loading.png', 'website_building']):
|
||||
return pic
|
||||
|
||||
return ''
|
||||
|
||||
# **********************************
|
||||
def getActors(self, data):
|
||||
"""获取演员信息"""
|
||||
actors = []
|
||||
h1_text = data('h1').text()
|
||||
if h1_text:
|
||||
actors.extend(h1_text.strip().split()[1:])
|
||||
actor_links = data('a[href*="/actor/"]')
|
||||
for actor_link in actor_links.items():
|
||||
actor_text = actor_link.text()
|
||||
if actor_text and actor_text not in actors:
|
||||
actors.append(actor_text)
|
||||
return ','.join(actors) if actors else '未知'
|
||||
|
||||
def getYear(self, date_str):
|
||||
"""从日期字符串中提取年份"""
|
||||
m = re.search(r'(\d{4})-\d{2}-\d{2}', date_str or '')
|
||||
return m.group(1) if m else ''
|
||||
|
||||
def getRemarks(self, data):
|
||||
"""获取备注信息(标签)"""
|
||||
tags = [tag.text() for tag in data('.badge').items() if tag.text()]
|
||||
return ' '.join(set(tags)) if tags else ''
|
||||
|
||||
def getPlaylist(self, data, url):
|
||||
"""
|
||||
获取播放列表
|
||||
从source、video标签和脚本中提取m3u8链接
|
||||
"""
|
||||
play_urls, seen = [], set()
|
||||
|
||||
# 从source标签获取
|
||||
for src in data('source').items():
|
||||
u = src.attr('src')
|
||||
if u and u not in seen:
|
||||
play_urls.append(f"源{len(play_urls)+1}${self.e64(u)}")
|
||||
seen.add(u)
|
||||
|
||||
# 从video标签获取
|
||||
for u in data('video').items():
|
||||
u = u.attr('src')
|
||||
if u and u not in seen:
|
||||
play_urls.append(f"线路{len(play_urls)+1}${self.e64(u)}")
|
||||
seen.add(u)
|
||||
|
||||
# 从脚本中提取m3u8链接
|
||||
for m in re.findall(r'https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*', data('script').text()):
|
||||
if m not in seen:
|
||||
play_urls.append(f"线路{len(play_urls)+1}${self.e64(m)}")
|
||||
seen.add(m)
|
||||
|
||||
# 如果没有找到播放链接,使用页面URL
|
||||
if not play_urls:
|
||||
play_urls.append(f"在线播放${self.e64(url)}")
|
||||
|
||||
return '#'.join(play_urls)
|
||||
|
||||
def getpq(self, path=''):
|
||||
"""获取页面内容并返回PyQuery对象"""
|
||||
url = path if path.startswith('http') else f'{self.host}{path}'
|
||||
try:
|
||||
rsp = self.session.get(url, timeout=20)
|
||||
rsp.encoding = 'utf-8'
|
||||
return pq(rsp.text)
|
||||
except Exception as e:
|
||||
print(f"getpq error: {e}")
|
||||
return pq('')
|
||||
|
||||
def e64(self, text):
|
||||
"""Base64编码"""
|
||||
try:
|
||||
return b64encode(text.encode('utf-8')).decode('utf-8')
|
||||
except Exception:
|
||||
return ''
|
||||
|
||||
def d64(self, encoded_text):
|
||||
"""Base64解码"""
|
||||
try:
|
||||
return b64decode(encoded_text.encode('utf-8')).decode('utf-8')
|
||||
except Exception:
|
||||
return ''
|
||||
Reference in New Issue
Block a user