Files
a/PY1/韩国色情电影.py
2026-03-24 18:40:17 +08:00

283 lines
9.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# coding=utf-8
#!/usr/bin/python
import sys
sys.path.append('..')
from base.spider import Spider
import json
import time
import urllib.parse
import re
import requests
from lxml import etree
import base64
class Spider(Spider):
def getName(self):
return "韩国色情电影"
def init(self, extend):
print("=============韩国色情电影初始化===========")
def homeContent(self, filter):
result = {}
cateManual = {
"最新视频": "latest",
"最长的视频": "longest",
"随机视频": "random"
}
classes = []
for k in cateManual:
classes.append({
'type_name': k,
'type_id': cateManual[k]
})
result['class'] = classes
return result
def homeVideoContent(self):
result = {}
videos = self.getVideos('https://koreanpornmovie.com/', 1)
result['list'] = videos
return result
def categoryContent(self, tid, pg, filter, extend):
result = {}
# 将 pg 转换为整数
try:
page_num = int(pg)
except (ValueError, TypeError):
page_num = 1
url = 'https://koreanpornmovie.com/'
if tid == 'longest':
url = 'https://koreanpornmovie.com/?filter=longest'
elif tid == 'random':
url = 'https://koreanpornmovie.com/?filter=random'
if page_num > 1:
url = url + 'page/{0}/'.format(page_num)
videos = self.getVideos(url, page_num)
result['list'] = videos
result['page'] = page_num
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
return result
def detailContent(self, array):
tid = array[0]
url = 'https://koreanpornmovie.com/{0}'.format(tid)
rsp = self.fetch(url)
html = rsp.text
# 获取视频信息
video = self.getDetail(html, url)
# 获取播放地址
play_url = self.getPlayUrl(html, url)
# 构建播放列表
playFrom = ['韩国色情电影']
playList = [play_url] if play_url else []
result = {
'list': [
{
'vod_id': tid,
'vod_name': video['title'],
'vod_pic': video['pic'],
'type_name': video['type'],
'vod_year': video['year'],
'vod_area': "韩国",
'vod_remarks': video['remarks'],
'vod_actor': video['actor'],
'vod_director': video['director'],
'vod_content': video['content'],
'vod_play_from': '$$$'.join(playFrom),
'vod_play_url': '$$$'.join(playList)
}
]
}
return result
def searchContent(self, key, quick, page='1'):
result = {}
url = 'https://koreanpornmovie.com/?s={0}'.format(urllib.parse.quote(key))
videos = self.getVideos(url, 1)
result['list'] = videos
return result
def playerContent(self, flag, id, vipFlags):
result = {}
result["parse"] = 0
result["playUrl"] = ''
result["url"] = id
result["header"] = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36",
"Referer": "https://koreanpornmovie.com/"
}
return result
def getVideos(self, url, pg):
videos = []
try:
rsp = self.fetch(url)
html = rsp.text
root = etree.HTML(html)
# 解析视频列表
video_list = root.xpath('//article[contains(@class, "thumb-block")]')
for item in video_list:
try:
# 获取视频链接
link = item.xpath('.//a/@href')[0]
vid = link.split('/')[-2] if link.endswith('/') else link.split('/')[-1]
# 获取缩略图
img = item.xpath('.//img[@class="video-main-thumb"]/@src')[0]
# 获取标题
title = item.xpath('.//header[@class="entry-header"]/span/text()')[0].strip()
# 获取时长
duration = item.xpath('.//span[@class="duration"]/text()')
remarks = duration[0].strip() if duration else ''
videos.append({
"vod_id": vid,
"vod_name": title,
"vod_pic": img,
"vod_remarks": remarks
})
except Exception as e:
print(f"解析视频项时出错: {e}")
continue
except Exception as e:
print(f"获取视频列表时出错: {e}")
return videos
def getDetail(self, html, url):
root = etree.HTML(html)
detail = {
'title': '',
'pic': '',
'type': '韩国情色',
'year': '',
'actor': '',
'director': '',
'content': '',
'remarks': ''
}
try:
# 标题
title_elem = root.xpath('//h1[@class="entry-title"]/text()')
if title_elem:
detail['title'] = title_elem[0].strip()
# 缩略图
pic_elem = root.xpath('//meta[@property="og:image"]/@content')
if pic_elem:
detail['pic'] = pic_elem[0]
# 演员信息
actors = root.xpath('//div[@id="video-actors"]//a/text()')
if actors:
detail['actor'] = ' / '.join(actors)
# 内容描述
content_elem = root.xpath('//div[@class="video-description"]//p/text()')
if content_elem:
detail['content'] = content_elem[0].strip()
# 时长
duration_elem = root.xpath('//span[@class="duration"]/text()')
if duration_elem:
detail['remarks'] = duration_elem[0].strip()
except Exception as e:
print(f"获取详情时出错: {e}")
return detail
def getPlayUrl(self, html, url):
play_url = ''
# 方法1: 从meta标签中提取contentURL
meta_pattern = r'<meta\s+itemprop="contentURL"\s+content="([^"]+)"'
meta_match = re.search(meta_pattern, html)
if meta_match:
play_url = meta_match.group(1)
print(f"从meta标签找到播放链接: {play_url}")
return play_url
# 方法2: 从iframe的src中提取base64编码的视频链接
if not play_url:
iframe_pattern = r'<iframe[^>]+src="[^"]*\?q=([^"]+)"[^>]*>'
iframe_match = re.search(iframe_pattern, html)
if iframe_match:
base64_str = iframe_match.group(1)
try:
decoded = base64.b64decode(base64_str).decode('utf-8')
# 从解码后的内容中提取mp4链接
mp4_pattern = r'src="([^"]+\.mp4)"'
mp4_match = re.search(mp4_pattern, decoded)
if mp4_match:
play_url = mp4_match.group(1)
print(f"从iframe解码找到播放链接: {play_url}")
except Exception as e:
print(f"解码base64时出错: {e}")
# 方法3: 直接搜索mp4链接
if not play_url:
mp4_pattern = r'https?://[^\s"\']+\.mp4'
mp4_matches = re.findall(mp4_pattern, html)
if mp4_matches:
# 优先选择koreanporn.stream域名的链接
for mp4_url in mp4_matches:
if 'koreanporn.stream' in mp4_url:
play_url = mp4_url
break
if not play_url and mp4_matches:
play_url = mp4_matches[0]
print(f"直接搜索找到播放链接: {play_url}")
# 方法4: 从JavaScript变量中提取
if not play_url:
js_patterns = [
r'file\s*:\s*["\']([^"\']+\.mp4)["\']',
r'src\s*:\s*["\']([^"\']+\.mp4)["\']',
r'videoSrc\s*:\s*["\']([^"\']+\.mp4)["\']'
]
for pattern in js_patterns:
js_match = re.search(pattern, html)
if js_match:
play_url = js_match.group(1)
print(f"从JS变量找到播放链接: {play_url}")
break
# 如果找到播放链接确保是完整的URL
if play_url and not play_url.startswith('http'):
if play_url.startswith('//'):
play_url = 'https:' + play_url
else:
# 尝试从当前页面URL构造完整URL
from urllib.parse import urljoin
play_url = urljoin(url, play_url)
return play_url
def isVideoFormat(self, url):
video_formats = ['.mp4', '.m3u8', '.avi', '.mov', '.wmv', '.flv', '.mkv']
return any(format in url.lower() for format in video_formats)
def manualVideoCheck(self):
return True
def localProxy(self, param):
action = {}
return []