2025-04-01 12:10:19 +08:00
|
|
|
|
import cv2
|
|
|
|
|
import numpy as np
|
|
|
|
|
import pandas as pd
|
|
|
|
|
import requests
|
|
|
|
|
from io import BytesIO
|
|
|
|
|
from PIL import Image
|
|
|
|
|
import os
|
|
|
|
|
from multiprocessing import Pool
|
2025-04-01 19:46:25 +08:00
|
|
|
|
from sklearn.cluster import MiniBatchKMeans
|
|
|
|
|
from tqdm import tqdm
|
2025-04-01 12:10:19 +08:00
|
|
|
|
import warnings
|
|
|
|
|
|
|
|
|
|
warnings.filterwarnings('ignore')
|
|
|
|
|
|
|
|
|
|
# ------------------图像处理-初始化配置 ---------------------
|
|
|
|
|
# 人脸检测模型初始化
|
|
|
|
|
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
|
|
|
|
|
|
|
|
|
|
# ------------------处理图像 ---------------------
|
|
|
|
|
def get_image(url):
|
|
|
|
|
"""从URL获取图像并预处理"""
|
|
|
|
|
try:
|
|
|
|
|
response = requests.get(url, timeout=10)
|
|
|
|
|
img = Image.open(BytesIO(response.content))
|
|
|
|
|
return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"Error loading {url}: {str(e)}")
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
2025-04-01 19:46:25 +08:00
|
|
|
|
def extract_color_palette(img_rgb, color_count=5):
|
|
|
|
|
"""提取前N种主色及比例"""
|
|
|
|
|
# 使用k-means聚类提取主色
|
|
|
|
|
pixels = img_rgb.reshape(-1, 3)
|
|
|
|
|
kmeans = MiniBatchKMeans(n_clusters=color_count, random_state=0)
|
|
|
|
|
labels = kmeans.fit_predict(pixels)
|
2025-04-01 12:10:19 +08:00
|
|
|
|
|
2025-04-01 19:46:25 +08:00
|
|
|
|
# 计算每种颜色的比例
|
|
|
|
|
counts = np.bincount(labels)
|
|
|
|
|
total = counts.sum()
|
|
|
|
|
palette = []
|
|
|
|
|
for i in range(color_count):
|
|
|
|
|
ratio = counts[i] / total
|
|
|
|
|
color = kmeans.cluster_centers_[i].astype(int)
|
|
|
|
|
palette.append((color, ratio))
|
2025-04-01 12:10:19 +08:00
|
|
|
|
|
2025-04-01 19:46:25 +08:00
|
|
|
|
return sorted(palette, key=lambda x: -x[1]) # 按比例降序排列
|
2025-04-01 12:10:19 +08:00
|
|
|
|
|
|
|
|
|
|
2025-04-01 19:46:25 +08:00
|
|
|
|
def classify_hsv_color(rgb_color):
|
|
|
|
|
"""将RGB颜色分类为暖色/冷色/中性色"""
|
|
|
|
|
try:
|
|
|
|
|
# 确保输入是有效的RGB颜色值
|
|
|
|
|
rgb_color = np.clip(rgb_color, 0, 255)
|
|
|
|
|
hsv = cv2.cvtColor(np.uint8([[rgb_color]]), cv2.COLOR_RGB2HSV)[0][0]
|
|
|
|
|
h, s, v = hsv[0], hsv[1] / 255.0, hsv[2] / 255.0 # 归一化
|
|
|
|
|
|
|
|
|
|
# 中性色判断(根据Palmer标准)
|
|
|
|
|
if s < 0.2 or v < 0.2:
|
|
|
|
|
return 'neutral'
|
|
|
|
|
|
|
|
|
|
# 色相分类
|
|
|
|
|
if (0 <= h < 90) or (270 <= h <= 360):
|
|
|
|
|
return 'warm'
|
|
|
|
|
return 'cool'
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"颜色越界: {str(e)}")
|
|
|
|
|
return 'neutral' # 出错时默认返回中性色
|
2025-04-01 12:10:19 +08:00
|
|
|
|
|
2025-04-01 19:46:25 +08:00
|
|
|
|
def determine_warm_tone(img_rgb):
|
|
|
|
|
"""返回(暖色标签, 暖色比例, 冷色比例)"""
|
|
|
|
|
palette = extract_color_palette(img_rgb)
|
2025-04-01 12:10:19 +08:00
|
|
|
|
|
2025-04-01 19:46:25 +08:00
|
|
|
|
warm_ratio, cool_ratio, neutral_ratio = 0.0, 0.0, 0.0
|
|
|
|
|
for color, ratio in palette:
|
|
|
|
|
category = classify_hsv_color(color)
|
|
|
|
|
if category == 'warm':
|
|
|
|
|
warm_ratio += ratio
|
|
|
|
|
elif category == 'cool':
|
|
|
|
|
cool_ratio += ratio
|
|
|
|
|
else:
|
|
|
|
|
neutral_ratio += ratio
|
2025-04-01 12:10:19 +08:00
|
|
|
|
|
2025-04-01 19:46:25 +08:00
|
|
|
|
return warm_ratio, cool_ratio, neutral_ratio
|
2025-04-01 12:10:19 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def detect_human(img):
|
|
|
|
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
2025-04-01 19:46:25 +08:00
|
|
|
|
# 优化参数组合
|
|
|
|
|
faces = face_cascade.detectMultiScale(
|
|
|
|
|
gray,
|
|
|
|
|
scaleFactor=1.02, # 减小缩放步长,增加检测粒度
|
|
|
|
|
minNeighbors=5, # 提高邻居数要求,减少误检
|
|
|
|
|
minSize=(50, 50), # 适配B站封面最小人脸尺寸
|
|
|
|
|
flags=cv2.CASCADE_FIND_BIGGEST_OBJECT # 优先检测最大人脸
|
|
|
|
|
)
|
2025-04-01 12:10:19 +08:00
|
|
|
|
return len(faces) > 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def process_url(url):
|
|
|
|
|
try:
|
|
|
|
|
img = get_image(url)
|
|
|
|
|
if img is None:
|
|
|
|
|
return None
|
|
|
|
|
|
2025-04-01 19:46:25 +08:00
|
|
|
|
# 颜色分析
|
|
|
|
|
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
|
|
|
|
warm_ratio, cool_ratio, neutral_ratio = determine_warm_tone(img_rgb)
|
2025-04-01 12:10:19 +08:00
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
'url': url,
|
2025-04-01 19:46:25 +08:00
|
|
|
|
'Portrait': int(detect_human(img)),
|
|
|
|
|
'WarmRatio': round(warm_ratio, 3),
|
|
|
|
|
'CoolRatio': round(cool_ratio, 3),
|
|
|
|
|
'NeutralRatio': round(neutral_ratio, 3)
|
2025-04-01 12:10:19 +08:00
|
|
|
|
}
|
2025-04-01 19:46:25 +08:00
|
|
|
|
|
2025-04-01 12:10:19 +08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"Error processing {url}: {str(e)}")
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 批量处理
|
|
|
|
|
def batch_process(urls, workers=4):
|
2025-04-01 19:56:57 +08:00
|
|
|
|
# 创建包含所有URL的初始结果列表
|
|
|
|
|
results = [{'url': url, 'success': False} for url in urls]
|
|
|
|
|
|
2025-04-01 12:10:19 +08:00
|
|
|
|
with Pool(workers) as pool:
|
2025-04-01 19:56:57 +08:00
|
|
|
|
processed = list(tqdm(pool.imap(process_url, urls),
|
|
|
|
|
total=len(urls),
|
|
|
|
|
desc="处理进度"))
|
|
|
|
|
|
|
|
|
|
# 按原始顺序更新成功处理的结果
|
|
|
|
|
for i, res in enumerate(processed):
|
|
|
|
|
if res is not None:
|
|
|
|
|
results[i] = res
|
|
|
|
|
results[i]['success'] = True
|
|
|
|
|
|
2025-04-01 12:10:19 +08:00
|
|
|
|
return pd.DataFrame(results)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 使用示例
|
|
|
|
|
if __name__ == "__main__":
|
2025-04-02 09:18:48 +08:00
|
|
|
|
# # 读取URL列表
|
|
|
|
|
# input_csv = "data_all_first_ver.csv"
|
|
|
|
|
# #输出路径
|
|
|
|
|
# os.makedirs('./result', exist_ok=True)
|
|
|
|
|
# output_csv = "result/analysis_results.csv"
|
|
|
|
|
#
|
|
|
|
|
# #完整运行
|
|
|
|
|
# df = pd.read_csv(input_csv)
|
|
|
|
|
# urls = df['视频封面URL'].tolist()
|
|
|
|
|
#
|
|
|
|
|
# # 执行分析
|
|
|
|
|
# result_df = batch_process(urls)
|
|
|
|
|
# result_df.to_csv(output_csv, index=False, encoding='utf-8-sig')
|
|
|
|
|
# print(f"成功处理 {len(result_df)}/{len(urls)} 张图片")
|
|
|
|
|
# print("分析完成!结果已保存至", output_csv)
|
|
|
|
|
|
|
|
|
|
#重新执行失败的url
|
|
|
|
|
urls_failed=[
|
|
|
|
|
'http://i1.hdslb.com/bfs/archive/5c42e0fa42ec945106d2e167253889e8a05541c9.jpg',
|
|
|
|
|
'http://i1.hdslb.com/bfs/archive/d2ca3e3f4c543245715937bf643e98b55badcc21.jpg',
|
|
|
|
|
'http://i0.hdslb.com/bfs/archive/2b1cf64d70bf2036793e33b2de3067344a7ff77d.jpg',
|
|
|
|
|
'http://i0.hdslb.com/bfs/archive/123ddc4cdf429968fa416f78f4049a728e8da3ab.jpg',
|
|
|
|
|
'http://i2.hdslb.com/bfs/archive/b07446d2176cec63d42f204504f4cda7a940b05b.jpg',
|
|
|
|
|
]
|
|
|
|
|
result_failed = batch_process(urls_failed)
|
|
|
|
|
result_failed.to_csv('result/reanalyze.csv', index=False, encoding='utf-8-sig')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|