# 价格敏感度分析 def parallel_sentiment(comment): try: return SnowNLP(comment).sentiments except Exception as e: # 解析失败，打印异常信息并返回中性情感得分为0.5 print(f"解析评论时出现错误: {e}") return 0.5 def analyze_price_sentiment(df): # 筛选价格相关评论 price_df = df[df['is_price_related'] == 1].copy() print(f"共找到{len(price_df)}条价格相关评论") # 并行情感分析 with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool: sentiments = pool.map(parallel_sentiment, pr

代码分析与优化 1. 导入缺失的库代码中使用了 `SnowNLP`、`multiprocessing`、`jieba` 和 `Counter`，需要确保这些库已经导入。 2. 异常处理在 `parallel_sentiment` 函数中，异常处理只是简单地打印错误信息并返回 0.5，在多进程环境下，打印信息可能会混乱，建议使用日志记录。 3. 代码优化 - 可以将一些常量（如停用词、情感标签阈值等）提取出来，提高代码的可维护性。 - 对于关键词提取部分，可以将分词和过滤停用词的逻辑封装成一个函数，提高代码的可读性。优化后的代码 ```python import multiprocessing from snownlp import SnowNLP import jieba from collections import Counter # 定义常量 SENTIMENT_POSITIVE_THRESHOLD = 0.6 SENTIMENT_NEGATIVE_THRESHOLD = 0.4 STOPWORDS = set(['但是', '虽然', '如果', '因为', '所以', '这个', '那个', '一种', '就是', '可以']) TOP_COMPLAINTS_COUNT = 10 TOP_FILTERED_COMPLAINTS_COUNT = 5 # 定义分词和过滤停用词的函数 def filter_words(comment): words = jieba.lcut(str(comment)) return [word for word in words if len(word) > 1 and word not in STOPWORDS and not word.isdigit()] # 并行情感分析函数 def parallel_sentiment(comment): try: return SnowNLP(comment).sentiments except Exception as e: # 解析失败，使用日志记录异常信息并返回中性情感得分为0.5 import logging logging.error(f"解析评论时出现错误: {e}") return 0.5 def analyze_price_sentiment(df, price_keywords): # 筛选价格相关评论 price_df = df[df['is_price_related'] == 1].copy() print(f"共找到{len(price_df)}条价格相关评论") # 并行情感分析 with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool: sentiments = pool.map(parallel_sentiment, price_df['comment'].tolist()) price_df['sentiment'] = sentiments # 情感标签分类 price_df['sentiment_label'] = price_df['sentiment'].apply( lambda x: 'positive' if x > SENTIMENT_POSITIVE_THRESHOLD else 'negative' if x < SENTIMENT_NEGATIVE_THRESHOLD else 'neutral' ) # 计算价格敏感指标 sentiment_counts = price_df['sentiment_label'].value_counts(normalize=True) negative_ratio = sentiment_counts.get('negative', 0) price_sensitivity = negative_ratio * 100 # 负面评价比例作为敏感度指标 # 关键词提取（仅负面评论） n_words = [] for comment in price_df[price_df['sentiment_label'] == 'negative']['comment']: n_words.extend(filter_words(comment)) top_complaints = Counter(n_words).most_common(TOP_COMPLAINTS_COUNT) # 取前10个高频词 # 过滤掉与价格无关的词（因为已经知道是价格相关评论，但可能分词后还有无关词） filtered_complaints = [(word, cnt) for word, cnt in top_complaints if word not in price_keywords] # 取前5个 top5_complaints = filtered_complaints[:TOP_FILTERED_COMPLAINTS_COUNT] return price_sensitivity, top5_complaints, price_df # 假设 df 和 price_keywords 已经定义 # df = ... # price_keywords = ... # 执行分析 price_sensitivity, top_complaints, price_df = analyze_price_sentiment(df, price_keywords) print(f"价格敏感度: {price_sensitivity:.1f}%") print("主要抱怨点:", top_complaints) ``` 额外需要注意的逻辑点 1. 确保 `df` 数据框中包含 `is_price_related` 和 `comment` 列，否则会出现 `KeyError`。 2. 在使用多进程时，要注意数据的安全性，避免多个进程同时修改同一个数据。 3. 对于 `price_keywords` 列表，要确保其包含了所有可能的与价格无关的词，否则可能会影响关键词提取的结果。 [2025-06-25 15:56:13 | AI写代码神器 | 1048点数解答]

服务商

更多选项

快捷项

直通车