176 lines
5.0 KiB
Python
176 lines
5.0 KiB
Python
|
import cv2
|
|||
|
import numpy as np
|
|||
|
import pandas as pd
|
|||
|
import requests
|
|||
|
from io import BytesIO
|
|||
|
from PIL import Image
|
|||
|
import os
|
|||
|
from colorthief import ColorThief
|
|||
|
import pytesseract
|
|||
|
from multiprocessing import Pool
|
|||
|
from cnsenti import Sentiment
|
|||
|
import pynlpir
|
|||
|
from collections import defaultdict
|
|||
|
import warnings
|
|||
|
|
|||
|
warnings.filterwarnings('ignore')
|
|||
|
|
|||
|
#设置OCR路径
|
|||
|
pytesseract.pytesseract.tesseract_cmd = r'D:Program files\Tesseract-OCR\tesseract.exe'
|
|||
|
|
|||
|
# ------------------图像处理-初始化配置 ---------------------
|
|||
|
# 人脸检测模型初始化
|
|||
|
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
|
|||
|
|
|||
|
# 图像情感模型系数(基于IAPS数据集校准)
|
|||
|
VALENCE_WEIGHTS = {
|
|||
|
'warm_ratio': 0.35,
|
|||
|
'brightness': 0.15,
|
|||
|
'symmetry': 0.20,
|
|||
|
'colorfulness': 0.30
|
|||
|
}
|
|||
|
|
|||
|
AROUSAL_WEIGHTS = {
|
|||
|
'contrast': 0.40,
|
|||
|
'edge_density': 0.35,
|
|||
|
'saturation_std': 0.25
|
|||
|
}
|
|||
|
|
|||
|
# 暖色调定义(HSV色相范围)
|
|||
|
WARM_HUE_RANGE = (0, 60) # 红色到黄色
|
|||
|
|
|||
|
# ------------------处理图像 ---------------------
|
|||
|
def get_image(url):
|
|||
|
"""从URL获取图像并预处理"""
|
|||
|
try:
|
|||
|
response = requests.get(url, timeout=10)
|
|||
|
img = Image.open(BytesIO(response.content))
|
|||
|
return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
|
|||
|
except Exception as e:
|
|||
|
print(f"Error loading {url}: {str(e)}")
|
|||
|
return None
|
|||
|
|
|||
|
|
|||
|
def analyze_image(img):
|
|||
|
"""分析图像特征"""
|
|||
|
if img is None:
|
|||
|
return {}
|
|||
|
|
|||
|
# 转换为HSV颜色空间
|
|||
|
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
|
|||
|
h, s, v = cv2.split(hsv)
|
|||
|
|
|||
|
# 计算基础特征
|
|||
|
features = {
|
|||
|
'brightness': np.mean(v),
|
|||
|
'contrast': np.max(v) - np.min(v),
|
|||
|
'saturation_std': np.std(s),
|
|||
|
'colorfulness': np.std(h) + np.std(s) + np.std(v)
|
|||
|
}
|
|||
|
|
|||
|
# 暖色比例计算
|
|||
|
hue_mask = cv2.inRange(h, WARM_HUE_RANGE[0], WARM_HUE_RANGE[1])
|
|||
|
features['warm_ratio'] = np.count_nonzero(hue_mask) / (img.shape[0] * img.shape[1])
|
|||
|
|
|||
|
# 对称性计算
|
|||
|
mid = img.shape[1] // 2
|
|||
|
left = img[:, :mid]
|
|||
|
right = cv2.flip(img[:, mid:], 1)
|
|||
|
features['symmetry'] = cv2.matchTemplate(left, right, cv2.TM_CCOEFF_NORMED)[0][0]
|
|||
|
|
|||
|
# 边缘密度
|
|||
|
edges = cv2.Canny(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), 100, 200)
|
|||
|
features['edge_density'] = np.mean(edges)
|
|||
|
|
|||
|
return features
|
|||
|
|
|||
|
|
|||
|
def calculate_affect(features):
|
|||
|
"""计算情感效价和唤醒度"""
|
|||
|
poslm = sum(features[k] * VALENCE_WEIGHTS[k] for k in VALENCE_WEIGHTS)
|
|||
|
actlm = sum(features[k] * AROUSAL_WEIGHTS[k] for k in AROUSAL_WEIGHTS)
|
|||
|
|
|||
|
# Sigmoid归一化
|
|||
|
return {
|
|||
|
'Poslm': 2 / (1 + np.exp(-poslm)) - 1,
|
|||
|
'Actlm': 2 / (1 + np.exp(-actlm)) - 1
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
def detect_human(img):
|
|||
|
"""检测人像"""
|
|||
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
|||
|
faces = face_cascade.detectMultiScale(gray, 1.05, 3)
|
|||
|
return len(faces) > 0
|
|||
|
|
|||
|
|
|||
|
def process_url(url):
|
|||
|
try:
|
|||
|
img = get_image(url)
|
|||
|
if img is None:
|
|||
|
return None
|
|||
|
|
|||
|
features = analyze_image(img)
|
|||
|
affect = calculate_affect(features)
|
|||
|
|
|||
|
# 主色分析
|
|||
|
color_thief = ColorThief(BytesIO(requests.get(url).content))
|
|||
|
dominant_color = color_thief.get_color(quality=1)
|
|||
|
hsv_color = cv2.cvtColor(np.uint8([[dominant_color]]), cv2.COLOR_RGB2HSV)[0][0]
|
|||
|
warm = 1 if WARM_HUE_RANGE[0] <= hsv_color[0] <= WARM_HUE_RANGE[1] else 0
|
|||
|
|
|||
|
# 人像检测
|
|||
|
has_human = detect_human(img)
|
|||
|
|
|||
|
return {
|
|||
|
'url': url,
|
|||
|
**affect,
|
|||
|
'Warm': warm,
|
|||
|
'Portrait': int(has_human)
|
|||
|
}
|
|||
|
except Exception as e:
|
|||
|
print(f"Error processing {url}: {str(e)}")
|
|||
|
return None
|
|||
|
|
|||
|
|
|||
|
# 批量处理
|
|||
|
def batch_process(urls, workers=4):
|
|||
|
with Pool(workers) as pool:
|
|||
|
results = [res for res in pool.imap(process_url, urls) if res is not None]
|
|||
|
return pd.DataFrame(results)
|
|||
|
|
|||
|
|
|||
|
# 使用示例
|
|||
|
if __name__ == "__main__":
|
|||
|
# 读取URL列表
|
|||
|
input_csv = "data_all.csv"
|
|||
|
#输出路径
|
|||
|
os.makedirs('./result', exist_ok=True)
|
|||
|
output_csv = "result/analysis_results.csv"
|
|||
|
|
|||
|
##完整运行
|
|||
|
# df = pd.read_csv(input_csv)
|
|||
|
# urls = df['视频封面'].tolist()
|
|||
|
#
|
|||
|
# # 执行分析
|
|||
|
# result_df = batch_process(urls)
|
|||
|
#
|
|||
|
# # 合并原始数据
|
|||
|
# final_df = df.merge(result_df, left_on='视频封面')
|
|||
|
# final_df.drop('url', axis=1).to_csv(output_csv, index=False)
|
|||
|
|
|||
|
# 示例URL列表
|
|||
|
#小批量实验
|
|||
|
urls = [
|
|||
|
'http://i0.hdslb.com/bfs/archive/393a8e961b704d43256fe7e6c89fee04df966e17.jpg',
|
|||
|
'http://i0.hdslb.com/bfs/archive/072e16a1237040941f15b1ed67a8d1ebe6f2e041.jpg',
|
|||
|
'http://i2.hdslb.com/bfs/archive/1c56b5bec767c604175983cc5926f5832baa9bb8.jpg',
|
|||
|
'http://i0.hdslb.com/bfs/archive/66384e53a15345a539ccbb2989442f1d960b9235.jpg',
|
|||
|
'http://i2.hdslb.com/bfs/archive/836b762456f0b4d65dd2c40fc4cd120107e46b88.jpg',
|
|||
|
]
|
|||
|
result_df = batch_process(urls)
|
|||
|
result_df.to_csv("result/analysis_results.csv", index=False)
|
|||
|
print(f"成功处理 {len(result_df)}/{len(urls)} 张图片")
|
|||
|
|
|||
|
|
|||
|
print("分析完成!结果已保存至", output_csv)
|