176 lines
5.0 KiB
Python
176 lines
5.0 KiB
Python
import cv2
|
||
import numpy as np
|
||
import pandas as pd
|
||
import requests
|
||
from io import BytesIO
|
||
from PIL import Image
|
||
import os
|
||
from colorthief import ColorThief
|
||
import pytesseract
|
||
from multiprocessing import Pool
|
||
from cnsenti import Sentiment
|
||
import pynlpir
|
||
from collections import defaultdict
|
||
import warnings
|
||
|
||
warnings.filterwarnings('ignore')
|
||
|
||
#设置OCR路径
|
||
pytesseract.pytesseract.tesseract_cmd = r'D:Program files\Tesseract-OCR\tesseract.exe'
|
||
|
||
# ------------------图像处理-初始化配置 ---------------------
|
||
# 人脸检测模型初始化
|
||
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
|
||
|
||
# 图像情感模型系数(基于IAPS数据集校准)
|
||
VALENCE_WEIGHTS = {
|
||
'warm_ratio': 0.35,
|
||
'brightness': 0.15,
|
||
'symmetry': 0.20,
|
||
'colorfulness': 0.30
|
||
}
|
||
|
||
AROUSAL_WEIGHTS = {
|
||
'contrast': 0.40,
|
||
'edge_density': 0.35,
|
||
'saturation_std': 0.25
|
||
}
|
||
|
||
# 暖色调定义(HSV色相范围)
|
||
WARM_HUE_RANGE = (0, 60) # 红色到黄色
|
||
|
||
# ------------------处理图像 ---------------------
|
||
def get_image(url):
|
||
"""从URL获取图像并预处理"""
|
||
try:
|
||
response = requests.get(url, timeout=10)
|
||
img = Image.open(BytesIO(response.content))
|
||
return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
|
||
except Exception as e:
|
||
print(f"Error loading {url}: {str(e)}")
|
||
return None
|
||
|
||
|
||
def analyze_image(img):
|
||
"""分析图像特征"""
|
||
if img is None:
|
||
return {}
|
||
|
||
# 转换为HSV颜色空间
|
||
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
|
||
h, s, v = cv2.split(hsv)
|
||
|
||
# 计算基础特征
|
||
features = {
|
||
'brightness': np.mean(v),
|
||
'contrast': np.max(v) - np.min(v),
|
||
'saturation_std': np.std(s),
|
||
'colorfulness': np.std(h) + np.std(s) + np.std(v)
|
||
}
|
||
|
||
# 暖色比例计算
|
||
hue_mask = cv2.inRange(h, WARM_HUE_RANGE[0], WARM_HUE_RANGE[1])
|
||
features['warm_ratio'] = np.count_nonzero(hue_mask) / (img.shape[0] * img.shape[1])
|
||
|
||
# 对称性计算
|
||
mid = img.shape[1] // 2
|
||
left = img[:, :mid]
|
||
right = cv2.flip(img[:, mid:], 1)
|
||
features['symmetry'] = cv2.matchTemplate(left, right, cv2.TM_CCOEFF_NORMED)[0][0]
|
||
|
||
# 边缘密度
|
||
edges = cv2.Canny(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), 100, 200)
|
||
features['edge_density'] = np.mean(edges)
|
||
|
||
return features
|
||
|
||
|
||
def calculate_affect(features):
|
||
"""计算情感效价和唤醒度"""
|
||
poslm = sum(features[k] * VALENCE_WEIGHTS[k] for k in VALENCE_WEIGHTS)
|
||
actlm = sum(features[k] * AROUSAL_WEIGHTS[k] for k in AROUSAL_WEIGHTS)
|
||
|
||
# Sigmoid归一化
|
||
return {
|
||
'Poslm': 2 / (1 + np.exp(-poslm)) - 1,
|
||
'Actlm': 2 / (1 + np.exp(-actlm)) - 1
|
||
}
|
||
|
||
|
||
def detect_human(img):
|
||
"""检测人像"""
|
||
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||
faces = face_cascade.detectMultiScale(gray, 1.05, 3)
|
||
return len(faces) > 0
|
||
|
||
|
||
def process_url(url):
|
||
try:
|
||
img = get_image(url)
|
||
if img is None:
|
||
return None
|
||
|
||
features = analyze_image(img)
|
||
affect = calculate_affect(features)
|
||
|
||
# 主色分析
|
||
color_thief = ColorThief(BytesIO(requests.get(url).content))
|
||
dominant_color = color_thief.get_color(quality=1)
|
||
hsv_color = cv2.cvtColor(np.uint8([[dominant_color]]), cv2.COLOR_RGB2HSV)[0][0]
|
||
warm = 1 if WARM_HUE_RANGE[0] <= hsv_color[0] <= WARM_HUE_RANGE[1] else 0
|
||
|
||
# 人像检测
|
||
has_human = detect_human(img)
|
||
|
||
return {
|
||
'url': url,
|
||
**affect,
|
||
'Warm': warm,
|
||
'Portrait': int(has_human)
|
||
}
|
||
except Exception as e:
|
||
print(f"Error processing {url}: {str(e)}")
|
||
return None
|
||
|
||
|
||
# 批量处理
|
||
def batch_process(urls, workers=4):
|
||
with Pool(workers) as pool:
|
||
results = [res for res in pool.imap(process_url, urls) if res is not None]
|
||
return pd.DataFrame(results)
|
||
|
||
|
||
# 使用示例
|
||
if __name__ == "__main__":
|
||
# 读取URL列表
|
||
input_csv = "data_all.csv"
|
||
#输出路径
|
||
os.makedirs('./result', exist_ok=True)
|
||
output_csv = "result/analysis_results.csv"
|
||
|
||
##完整运行
|
||
# df = pd.read_csv(input_csv)
|
||
# urls = df['视频封面'].tolist()
|
||
#
|
||
# # 执行分析
|
||
# result_df = batch_process(urls)
|
||
#
|
||
# # 合并原始数据
|
||
# final_df = df.merge(result_df, left_on='视频封面')
|
||
# final_df.drop('url', axis=1).to_csv(output_csv, index=False)
|
||
|
||
# 示例URL列表
|
||
#小批量实验
|
||
urls = [
|
||
'http://i0.hdslb.com/bfs/archive/393a8e961b704d43256fe7e6c89fee04df966e17.jpg',
|
||
'http://i0.hdslb.com/bfs/archive/072e16a1237040941f15b1ed67a8d1ebe6f2e041.jpg',
|
||
'http://i2.hdslb.com/bfs/archive/1c56b5bec767c604175983cc5926f5832baa9bb8.jpg',
|
||
'http://i0.hdslb.com/bfs/archive/66384e53a15345a539ccbb2989442f1d960b9235.jpg',
|
||
'http://i2.hdslb.com/bfs/archive/836b762456f0b4d65dd2c40fc4cd120107e46b88.jpg',
|
||
]
|
||
result_df = batch_process(urls)
|
||
result_df.to_csv("result/analysis_results.csv", index=False)
|
||
print(f"成功处理 {len(result_df)}/{len(urls)} 张图片")
|
||
|
||
|
||
print("分析完成!结果已保存至", output_csv) |