xxxx

2026-03-24 18:40:17 +08:00
parent a53ca2fa61
commit 82656f8f2a
637 changed files with 3306118 additions and 0 deletions
--- a/PY1/四虎视频.py
+++ b/PY1/四虎视频.py
@@ -0,0 +1,328 @@
+# coding=utf-8
+#!/usr/bin/python
+import sys
+sys.path.append('..')
+from base.spider import Spider
+import json
+import time
+import urllib.parse
+import re
+import requests
+from lxml import etree
+
+class Spider(Spider):
+    def getName(self):
+        return "四虎视频"
+
+    def init(self, extend=""):
+        self.baseUrl = "https://www.sihuhu.xyz"
+        self.headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
+            "Referer": self.baseUrl
+        }
+
+    def homeContent(self, filter):
+        result = {}
+        # 从HTML中提取分类信息
+        cateManual = {
+            "传媒厂商": "20",
+            "麻豆传媒": "21",
+            "91制片": "22",
+            "蜜桃传媒": "23",
+            "天美传媒": "24",
+            "精东影片": "25",
+            "星空传媒": "26",
+            "葫芦影业": "27",
+            "糖心VLOG": "28",
+            "精品推荐": "29",
+            "日本无码": "30",
+            "日本有码": "31",
+            "AV解说": "32",
+            "中文有码": "33",
+            "中文无码": "34",
+            "日韩极品": "35",
+            "日韩无码": "36",
+            "少女萝莉": "37",
+            "水嫩萝莉": "38",
+            "极品主播": "40",
+            "卡通动漫": "43",
+            "SM调教": "44",
+            "探花合集": "50",
+            "91大神": "51",
+            "台湾萝莉": "54",
+            "萝莉传媒": "55",
+            "白虎口爆": "57",
+            "嫩女网爆": "47",
+            "嫩逼乌鸡": "42",
+            "少女伦理": "45",
+            "萝莉互口": "46",
+            "黑料网爆": "48",
+            "野战车震": "52",
+            "萝莉黑瓜": "53",
+            "萝莉巨乳": "58",
+            "明星换脸": "73",
+            "萝莉抠逼": "56",
+            "国产大作": "39",
+            "欧美萝莉": "41",
+            "热门事件": "49",
+            "少女3P": "59",
+            "偷拍萝莉": "60",
+            "强奸少女": "61",
+            "重口猎奇": "62",
+            "制服萝控": "63",
+            "极品少女": "64",
+            "明星爆料": "65",
+            "X短视频": "66",
+            "AV明星": "67",
+            "极品萝莉": "68",
+            "人妻艹妈": "69",
+            "VR视角": "70",
+            "角色扮演": "71",
+            "男同男娘": "72"
+        }
+        classes = []
+        for k in cateManual:
+            classes.append({
+                'type_name': k,
+                'type_id': cateManual[k]
+            })
+        result['class'] = classes
+        return result
+
+    def homeVideoContent(self):
+        result = {}
+        # 尝试获取首页推荐视频
+        try:
+            rsp = self.fetch(self.baseUrl, headers=self.headers)
+            html = etree.HTML(rsp.text)
+            
+            videos = []
+            # 尝试解析首页视频列表
+            video_elements = html.xpath('//ul[@class="thumbnail-group clearfix"]/li')
+            for element in video_elements:
+                try:
+                    name = element.xpath('.//h5/a/text()')[0].strip()
+                    pic = element.xpath('.//img/@data-original')[0]
+                    if not pic.startswith('http'):
+                        pic = self.baseUrl + pic
+                    href = element.xpath('.//a[@class="thumbnail"]/@href')[0]
+                    vid = href.split('/')[-1].replace('.html', '')
+                    remark = element.xpath('.//span[@class="title"]/text()')
+                    remark = remark[0] if remark else ""
+                    
+                    videos.append({
+                        "vod_id": vid,
+                        "vod_name": name,
+                        "vod_pic": pic,
+                        "vod_remarks": remark
+                    })
+                except:
+                    continue
+            
+            result['list'] = videos
+        except:
+            result['list'] = []
+        
+        return result
+
+    def categoryContent(self, tid, pg, filter, extend):
+        result = {}
+        url = f'{self.baseUrl}/vod/type/id/{tid}/page/{pg}.html'
+        rsp = self.fetch(url, headers=self.headers)
+        html = etree.HTML(rsp.text)
+        
+        vodList = []
+        video_elements = html.xpath('//ul[@class="thumbnail-group clearfix"]/li')
+        for element in video_elements:
+            try:
+                name = element.xpath('.//h5/a/text()')[0].strip()
+                pic = element.xpath('.//img/@data-original')[0]
+                if not pic.startswith('http'):
+                    pic = self.baseUrl + pic
+                href = element.xpath('.//a[@class="thumbnail"]/@href')[0]
+                vid = href.split('/')[-1].replace('.html', '')
+                remark = element.xpath('.//span[@class="title"]/text()')
+                remark = remark[0] if remark else ""
+                
+                vodList.append({
+                    "vod_id": vid,
+                    "vod_name": name,
+                    "vod_pic": pic,
+                    "vod_remarks": remark
+                })
+            except:
+                continue
+                
+        result['list'] = vodList
+        result['page'] = pg
+        result['pagecount'] = 9999
+        result['limit'] = 20
+        result['total'] = 999999
+        return result
+
+    def detailContent(self, array):
+        tid = array[0]
+        url = f'{self.baseUrl}/vod/detail/id/{tid}.html'
+        rsp = self.fetch(url, headers=self.headers)
+        html = etree.HTML(rsp.text)
+        
+        # 获取视频详情
+        title = html.xpath('//title/text()')[0].replace(' - 四虎视频', '')
+        pic = html.xpath('//meta[@property="og:image"]/@content')
+        pic = pic[0] if pic else ""
+        desc = html.xpath('//meta[@name="description"]/@content')
+        desc = desc[0] if desc else ""
+        
+        # 获取播放列表 - 修复播放地址获取
+        play_from = []
+        play_url = []
+        
+        # 尝试获取所有播放源
+        play_sources = html.xpath('//div[@class="module-play-list"]/div')
+        for source in play_sources:
+            source_name = source.xpath('.//span/text()')
+            if source_name:
+                source_name = source_name[0].strip()
+                play_from.append(source_name)
+                
+                # 获取该源下的所有剧集
+                episodes = source.xpath('.//a')
+                episode_urls = []
+                for episode in episodes:
+                    ep_name = episode.xpath('./text()')[0].strip()
+                    ep_href = episode.xpath('./@href')[0]
+                    episode_urls.append(f"{ep_name}${self.baseUrl}{ep_href}")
+                
+                play_url.append("#".join(episode_urls))
+        
+        # 如果没有找到播放源，使用默认方式
+        if not play_from:
+            play_from = ["默认"]
+            play_page_url = f"{self.baseUrl}/vod/play/id/{tid}/sid/1/nid/1.html"
+            play_url.append(f"第1集${play_page_url}")
+        
+        vod = {
+            "vod_id": tid,
+            "vod_name": title,
+            "vod_pic": pic,
+            "vod_content": desc,
+            "vod_play_from": "$$$".join(play_from),
+            "vod_play_url": "$$$".join(play_url)
+        }
+        
+        return {'list': [vod]}
+
+    def searchContent(self, key, quick):
+        result = {}
+        url = f'{self.baseUrl}/vod/search/page/1/wd/{urllib.parse.quote(key)}.html'
+        rsp = self.fetch(url, headers=self.headers)
+        html = etree.HTML(rsp.text)
+        
+        vodList = []
+        video_elements = html.xpath('//ul[@class="thumbnail-group clearfix"]/li')
+        for element in video_elements:
+            try:
+                name = element.xpath('.//h5/a/text()')[0].strip()
+                pic = element.xpath('.//img/@data-original')[0]
+                if not pic.startswith('http'):
+                    pic = self.baseUrl + pic
+                href = element.xpath('.//a[@class="thumbnail"]/@href')[0]
+                vid = href.split('/')[-1].replace('.html', '')
+                remark = element.xpath('.//span[@class="title"]/text()')
+                remark = remark[0] if remark else ""
+                
+                vodList.append({
+                    "vod_id": vid,
+                    "vod_name": name,
+                    "vod_pic": pic,
+                    "vod_remarks": remark
+                })
+            except:
+                continue
+                
+        result['list'] = vodList
+        return result
+
+    def playerContent(self, flag, id, vipFlags):
+        result = {}
+        # 解析播放地址
+        url = f'{self.baseUrl}{id}' if id.startswith('/') else id
+        
+        # 检查是否已经是m3u8链接
+        if id.endswith('.m3u8'):
+            result["parse"] = 0
+            result["playUrl"] = ""
+            result["url"] = id
+            result["header"] = json.dumps(self.headers)
+            return result
+            
+        rsp = self.fetch(url, headers=self.headers)
+        
+        # 方法1: 尝试从JavaScript变量中提取播放信息
+        pattern = r'var player_aaaa\s*=\s*({.*?});'
+        match = re.search(pattern, rsp.text, re.DOTALL)
+        
+        if match:
+            try:
+                player_info = json.loads(match.group(1))
+                video_url = player_info.get('url', '')
+                
+                if video_url:
+                    # 处理转义字符
+                    video_url = video_url.replace('\\/', '/')
+                    result["parse"] = 0
+                    result["playUrl"] = ""
+                    result["url"] = video_url
+                    result["header"] = json.dumps(self.headers)
+                    return result
+            except:
+                pass
+        
+        # 方法2: 尝试从JavaScript中找到url字段
+        url_patterns = [
+            r'"url"\s*:\s*"([^"]+)"',
+            r"url\s*:\s*'([^']+)'",
+            r'video_url\s*:\s*"([^"]+)"',
+            r"video_url\s*:\s*'([^']+)'"
+        ]
+        
+        for pattern in url_patterns:
+            match = re.search(pattern, rsp.text)
+            if match:
+                video_url = match.group(1).replace('\\/', '/')
+                if video_url and ('m3u8' in video_url or 'mp4' in video_url):
+                    result["parse"] = 0
+                    result["playUrl"] = ""
+                    result["url"] = video_url
+                    result["header"] = json.dumps(self.headers)
+                    return result
+        
+        # 方法3: 尝试从iframe中提取视频地址
+        iframe_pattern = r'<iframe[^>]+src="([^"]+)"'
+        iframe_match = re.search(iframe_pattern, rsp.text)
+        if iframe_match:
+            iframe_src = iframe_match.group(1)
+            if iframe_src.startswith('//'):
+                iframe_src = 'https:' + iframe_src
+            elif iframe_src.startswith('/'):
+                iframe_src = self.baseUrl + iframe_src
+                
+            # 递归获取iframe内容
+            return self.playerContent(flag, iframe_src, vipFlags)
+        
+        # 如果以上方法都失败，返回原始页面供进一步解析
+        result["parse"] = 1
+        result["playUrl"] = ""
+        result["url"] = url
+        result["header"] = json.dumps(self.headers)
+        
+        return result
+
+    def isVideoFormat(self, url):
+        pass
+
+    def manualVideoCheck(self):
+        pass
+
+    def localProxy(self, param):
+        return []