Files
claude-skills-reference/product-team/product-manager-toolkit/scripts/customer_interview_analyzer.py
Reza Rezvani 5add886197 fix: repair 25 Python scripts failing --help across all domains
- Fix Python 3.10+ syntax (float | None → Optional[float]) in 2 scripts
- Add argparse CLI handling to 9 marketing scripts using raw sys.argv
- Fix 10 scripts crashing at module level (wrap in __main__, add argparse)
- Make yaml/prefect/mcp imports conditional with stdlib fallbacks (4 scripts)
- Fix f-string backslash syntax in project_bootstrapper.py
- Fix -h flag conflict in pr_analyzer.py
- Fix tech-debt.md description (score → prioritize)

All 237 scripts now pass python3 --help verification.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-10 05:51:27 +01:00

453 lines
17 KiB
Python

#!/usr/bin/env python3
"""
Customer Interview Analyzer
Extracts insights, patterns, and opportunities from user interviews
"""
import re
from typing import Dict, List, Tuple, Set
from collections import Counter, defaultdict
import json
class InterviewAnalyzer:
"""Analyze customer interviews for insights and patterns"""
def __init__(self):
# Pain point indicators
self.pain_indicators = [
'frustrat', 'annoy', 'difficult', 'hard', 'confus', 'slow',
'problem', 'issue', 'struggle', 'challeng', 'pain', 'waste',
'manual', 'repetitive', 'tedious', 'boring', 'time-consuming',
'complicated', 'complex', 'unclear', 'wish', 'need', 'want'
]
# Positive indicators
self.delight_indicators = [
'love', 'great', 'awesome', 'amazing', 'perfect', 'easy',
'simple', 'quick', 'fast', 'helpful', 'useful', 'valuable',
'save', 'efficient', 'convenient', 'intuitive', 'clear'
]
# Feature request indicators
self.request_indicators = [
'would be nice', 'wish', 'hope', 'want', 'need', 'should',
'could', 'would love', 'if only', 'it would help', 'suggest',
'recommend', 'idea', 'what if', 'have you considered'
]
# Jobs to be done patterns
self.jtbd_patterns = [
r'when i\s+(.+?),\s+i want to\s+(.+?)\s+so that\s+(.+)',
r'i need to\s+(.+?)\s+because\s+(.+)',
r'my goal is to\s+(.+)',
r'i\'m trying to\s+(.+)',
r'i use \w+ to\s+(.+)',
r'helps me\s+(.+)',
]
def analyze_interview(self, text: str) -> Dict:
"""Analyze a single interview transcript"""
text_lower = text.lower()
sentences = self._split_sentences(text)
analysis = {
'pain_points': self._extract_pain_points(sentences),
'delights': self._extract_delights(sentences),
'feature_requests': self._extract_requests(sentences),
'jobs_to_be_done': self._extract_jtbd(text_lower),
'sentiment_score': self._calculate_sentiment(text_lower),
'key_themes': self._extract_themes(text_lower),
'quotes': self._extract_key_quotes(sentences),
'metrics_mentioned': self._extract_metrics(text),
'competitors_mentioned': self._extract_competitors(text)
}
return analysis
def _split_sentences(self, text: str) -> List[str]:
"""Split text into sentences"""
# Simple sentence splitting
sentences = re.split(r'[.!?]+', text)
return [s.strip() for s in sentences if s.strip()]
def _extract_pain_points(self, sentences: List[str]) -> List[Dict]:
"""Extract pain points from sentences"""
pain_points = []
for sentence in sentences:
sentence_lower = sentence.lower()
for indicator in self.pain_indicators:
if indicator in sentence_lower:
# Extract context around the pain point
pain_points.append({
'quote': sentence,
'indicator': indicator,
'severity': self._assess_severity(sentence_lower)
})
break
return pain_points[:10] # Return top 10
def _extract_delights(self, sentences: List[str]) -> List[Dict]:
"""Extract positive feedback"""
delights = []
for sentence in sentences:
sentence_lower = sentence.lower()
for indicator in self.delight_indicators:
if indicator in sentence_lower:
delights.append({
'quote': sentence,
'indicator': indicator,
'strength': self._assess_strength(sentence_lower)
})
break
return delights[:10]
def _extract_requests(self, sentences: List[str]) -> List[Dict]:
"""Extract feature requests and suggestions"""
requests = []
for sentence in sentences:
sentence_lower = sentence.lower()
for indicator in self.request_indicators:
if indicator in sentence_lower:
requests.append({
'quote': sentence,
'type': self._classify_request(sentence_lower),
'priority': self._assess_request_priority(sentence_lower)
})
break
return requests[:10]
def _extract_jtbd(self, text: str) -> List[Dict]:
"""Extract Jobs to Be Done patterns"""
jobs = []
for pattern in self.jtbd_patterns:
matches = re.findall(pattern, text, re.IGNORECASE)
for match in matches:
if isinstance(match, tuple):
job = ''.join(match)
else:
job = match
jobs.append({
'job': job,
'pattern': pattern.pattern if hasattr(pattern, 'pattern') else pattern
})
return jobs[:5]
def _calculate_sentiment(self, text: str) -> Dict:
"""Calculate overall sentiment of the interview"""
positive_count = sum(1 for ind in self.delight_indicators if ind in text)
negative_count = sum(1 for ind in self.pain_indicators if ind in text)
total = positive_count + negative_count
if total == 0:
sentiment_score = 0
else:
sentiment_score = (positive_count - negative_count) / total
if sentiment_score > 0.3:
sentiment_label = 'positive'
elif sentiment_score < -0.3:
sentiment_label = 'negative'
else:
sentiment_label = 'neutral'
return {
'score': round(sentiment_score, 2),
'label': sentiment_label,
'positive_signals': positive_count,
'negative_signals': negative_count
}
def _extract_themes(self, text: str) -> List[str]:
"""Extract key themes using word frequency"""
# Remove common words
stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at',
'to', 'for', 'of', 'with', 'by', 'from', 'as', 'is',
'was', 'are', 'were', 'been', 'be', 'have', 'has',
'had', 'do', 'does', 'did', 'will', 'would', 'could',
'should', 'may', 'might', 'must', 'can', 'shall',
'it', 'i', 'you', 'we', 'they', 'them', 'their'}
# Extract meaningful words
words = re.findall(r'\b[a-z]{4,}\b', text)
meaningful_words = [w for w in words if w not in stop_words]
# Count frequency
word_freq = Counter(meaningful_words)
# Extract themes (top frequent meaningful words)
themes = [word for word, count in word_freq.most_common(10) if count >= 3]
return themes
def _extract_key_quotes(self, sentences: List[str]) -> List[str]:
"""Extract the most insightful quotes"""
scored_sentences = []
for sentence in sentences:
if len(sentence) < 20 or len(sentence) > 200:
continue
score = 0
sentence_lower = sentence.lower()
# Score based on insight indicators
if any(ind in sentence_lower for ind in self.pain_indicators):
score += 2
if any(ind in sentence_lower for ind in self.request_indicators):
score += 2
if 'because' in sentence_lower:
score += 1
if 'but' in sentence_lower:
score += 1
if '?' in sentence:
score += 1
if score > 0:
scored_sentences.append((score, sentence))
# Sort by score and return top quotes
scored_sentences.sort(reverse=True)
return [s[1] for s in scored_sentences[:5]]
def _extract_metrics(self, text: str) -> List[str]:
"""Extract any metrics or numbers mentioned"""
metrics = []
# Find percentages
percentages = re.findall(r'\d+%', text)
metrics.extend(percentages)
# Find time metrics
time_metrics = re.findall(r'\d+\s*(?:hours?|minutes?|days?|weeks?|months?)', text, re.IGNORECASE)
metrics.extend(time_metrics)
# Find money metrics
money_metrics = re.findall(r'\$[\d,]+', text)
metrics.extend(money_metrics)
# Find general numbers with context
number_contexts = re.findall(r'(\d+)\s+(\w+)', text)
for num, context in number_contexts:
if context.lower() not in ['the', 'a', 'an', 'and', 'or', 'of']:
metrics.append(f"{num} {context}")
return list(set(metrics))[:10]
def _extract_competitors(self, text: str) -> List[str]:
"""Extract competitor mentions"""
# Common competitor indicators
competitor_patterns = [
r'(?:use|used|using|tried|trying|switch from|switched from|instead of)\s+(\w+)',
r'(\w+)\s+(?:is better|works better|is easier)',
r'compared to\s+(\w+)',
r'like\s+(\w+)',
r'similar to\s+(\w+)',
]
competitors = set()
for pattern in competitor_patterns:
matches = re.findall(pattern, text, re.IGNORECASE)
competitors.update(matches)
# Filter out common words
common_words = {'this', 'that', 'it', 'them', 'other', 'another', 'something'}
competitors = [c for c in competitors if c.lower() not in common_words and len(c) > 2]
return list(competitors)[:5]
def _assess_severity(self, text: str) -> str:
"""Assess severity of pain point"""
if any(word in text for word in ['very', 'extremely', 'really', 'totally', 'completely']):
return 'high'
elif any(word in text for word in ['somewhat', 'bit', 'little', 'slightly']):
return 'low'
return 'medium'
def _assess_strength(self, text: str) -> str:
"""Assess strength of positive feedback"""
if any(word in text for word in ['absolutely', 'definitely', 'really', 'very']):
return 'strong'
return 'moderate'
def _classify_request(self, text: str) -> str:
"""Classify the type of request"""
if any(word in text for word in ['ui', 'design', 'look', 'color', 'layout']):
return 'ui_improvement'
elif any(word in text for word in ['feature', 'add', 'new', 'build']):
return 'new_feature'
elif any(word in text for word in ['fix', 'bug', 'broken', 'work']):
return 'bug_fix'
elif any(word in text for word in ['faster', 'slow', 'performance', 'speed']):
return 'performance'
return 'general'
def _assess_request_priority(self, text: str) -> str:
"""Assess priority of request"""
if any(word in text for word in ['critical', 'urgent', 'asap', 'immediately', 'blocking']):
return 'critical'
elif any(word in text for word in ['need', 'important', 'should', 'must']):
return 'high'
elif any(word in text for word in ['nice', 'would', 'could', 'maybe']):
return 'low'
return 'medium'
def aggregate_interviews(interviews: List[Dict]) -> Dict:
"""Aggregate insights from multiple interviews"""
aggregated = {
'total_interviews': len(interviews),
'common_pain_points': defaultdict(list),
'common_requests': defaultdict(list),
'jobs_to_be_done': [],
'overall_sentiment': {
'positive': 0,
'negative': 0,
'neutral': 0
},
'top_themes': Counter(),
'metrics_summary': set(),
'competitors_mentioned': Counter()
}
for interview in interviews:
# Aggregate pain points
for pain in interview.get('pain_points', []):
indicator = pain.get('indicator', 'unknown')
aggregated['common_pain_points'][indicator].append(pain['quote'])
# Aggregate requests
for request in interview.get('feature_requests', []):
req_type = request.get('type', 'general')
aggregated['common_requests'][req_type].append(request['quote'])
# Aggregate JTBD
aggregated['jobs_to_be_done'].extend(interview.get('jobs_to_be_done', []))
# Aggregate sentiment
sentiment = interview.get('sentiment_score', {}).get('label', 'neutral')
aggregated['overall_sentiment'][sentiment] += 1
# Aggregate themes
for theme in interview.get('key_themes', []):
aggregated['top_themes'][theme] += 1
# Aggregate metrics
aggregated['metrics_summary'].update(interview.get('metrics_mentioned', []))
# Aggregate competitors
for competitor in interview.get('competitors_mentioned', []):
aggregated['competitors_mentioned'][competitor] += 1
# Process aggregated data
aggregated['common_pain_points'] = dict(aggregated['common_pain_points'])
aggregated['common_requests'] = dict(aggregated['common_requests'])
aggregated['top_themes'] = dict(aggregated['top_themes'].most_common(10))
aggregated['metrics_summary'] = list(aggregated['metrics_summary'])
aggregated['competitors_mentioned'] = dict(aggregated['competitors_mentioned'])
return aggregated
def format_single_interview(analysis: Dict) -> str:
"""Format single interview analysis"""
output = ["=" * 60]
output.append("CUSTOMER INTERVIEW ANALYSIS")
output.append("=" * 60)
# Sentiment
sentiment = analysis['sentiment_score']
output.append(f"\n📊 Overall Sentiment: {sentiment['label'].upper()}")
output.append(f" Score: {sentiment['score']}")
output.append(f" Positive signals: {sentiment['positive_signals']}")
output.append(f" Negative signals: {sentiment['negative_signals']}")
# Pain Points
if analysis['pain_points']:
output.append("\n🔥 Pain Points Identified:")
for i, pain in enumerate(analysis['pain_points'][:5], 1):
output.append(f"\n{i}. [{pain['severity'].upper()}] {pain['quote'][:100]}...")
# Feature Requests
if analysis['feature_requests']:
output.append("\n💡 Feature Requests:")
for i, req in enumerate(analysis['feature_requests'][:5], 1):
output.append(f"\n{i}. [{req['type']}] Priority: {req['priority']}")
output.append(f" \"{req['quote'][:100]}...\"")
# Jobs to Be Done
if analysis['jobs_to_be_done']:
output.append("\n🎯 Jobs to Be Done:")
for i, job in enumerate(analysis['jobs_to_be_done'], 1):
output.append(f"{i}. {job['job']}")
# Key Themes
if analysis['key_themes']:
output.append("\n🏷️ Key Themes:")
output.append(", ".join(analysis['key_themes']))
# Key Quotes
if analysis['quotes']:
output.append("\n💬 Key Quotes:")
for i, quote in enumerate(analysis['quotes'][:3], 1):
output.append(f'{i}. "{quote}"')
# Metrics
if analysis['metrics_mentioned']:
output.append("\n📈 Metrics Mentioned:")
output.append(", ".join(analysis['metrics_mentioned']))
# Competitors
if analysis['competitors_mentioned']:
output.append("\n🏢 Competitors Mentioned:")
output.append(", ".join(analysis['competitors_mentioned']))
return "\n".join(output)
def main():
import sys
import argparse
parser = argparse.ArgumentParser(
description="Customer Interview Analyzer - Extracts insights, patterns, and opportunities from user interviews"
)
parser.add_argument(
"file", nargs="?", default=None,
help="Interview transcript text file to analyze"
)
parser.add_argument(
"--json", action="store_true",
help="Output results as JSON"
)
args = parser.parse_args()
if not args.file:
print("Usage: python customer_interview_analyzer.py <interview_file.txt>")
print("\nThis tool analyzes customer interview transcripts to extract:")
print(" - Pain points and frustrations")
print(" - Feature requests and suggestions")
print(" - Jobs to be done")
print(" - Sentiment analysis")
print(" - Key themes and quotes")
sys.exit(1)
with open(args.file, 'r') as f:
interview_text = f.read()
analyzer = InterviewAnalyzer()
analysis = analyzer.analyze_interview(interview_text)
if args.json:
print(json.dumps(analysis, indent=2))
else:
print(format_single_interview(analysis))
if __name__ == "__main__":
main()