Advanced SQL-powered movie discovery and recommendation system
-- Find users with similar preferences and recommend their liked movies
WITH user_similarities AS (
SELECT
r1.user_id AS user1,
r2.user_id AS user2,
ROUND(
SUM(r1.rating * r2.rating) /
(SQRT(SUM(r1.rating * r1.rating)) * SQRT(SUM(r2.rating * r2.rating))), 4
) AS similarity_score
FROM ratings r1
JOIN ratings r2 ON r1.movie_id = r2.movie_id
WHERE r1.user_id != r2.user_id
GROUP BY r1.user_id, r2.user_id
HAVING similarity_score > 0.5
),
similar_users_movies AS (
SELECT
us.user1 AS target_user,
r.movie_id,
AVG(r.rating * us.similarity_score) AS predicted_rating
FROM user_similarities us
JOIN ratings r ON us.user2 = r.user_id
LEFT JOIN ratings ur ON ur.user_id = us.user1 AND ur.movie_id = r.movie_id
WHERE ur.movie_id IS NULL -- User hasn't seen this movie
GROUP BY us.user1, r.movie_id
)
SELECT
m.title,
m.genre,
m.release_year,
ROUND(sum.predicted_rating, 2) AS predicted_rating,
m.avg_rating,
m.popularity_score
FROM similar_users_movies sum
JOIN movies m ON sum.movie_id = m.movie_id
WHERE sum.target_user = ?
ORDER BY sum.predicted_rating DESC
LIMIT 10;
-- Recommend movies based on user's genre and actor preferences
WITH user_preferences AS (
SELECT
r.user_id,
m.genre,
ma.actor_name,
AVG(r.rating) AS avg_preference_rating,
COUNT(*) AS interaction_count
FROM ratings r
JOIN movies m ON r.movie_id = m.movie_id
JOIN movie_actors ma ON m.movie_id = ma.movie_id
WHERE r.rating >= 4 -- Only consider liked movies
GROUP BY r.user_id, m.genre, ma.actor_name
),
content_scores AS (
SELECT
m.movie_id,
m.title,
m.genre,
SUM(CASE
WHEN up_genre.avg_preference_rating IS NOT NULL THEN up_genre.avg_preference_rating * 0.6
ELSE 0
END) +
SUM(CASE
WHEN up_actor.avg_preference_rating IS NOT NULL THEN up_actor.avg_preference_rating * 0.4
ELSE 0
END) AS content_score
FROM movies m
LEFT JOIN user_preferences up_genre ON m.genre = up_genre.genre AND up_genre.user_id = ?
LEFT JOIN movie_actors ma ON m.movie_id = ma.movie_id
LEFT JOIN user_preferences up_actor ON ma.actor_name = up_actor.actor_name AND up_actor.user_id = ?
LEFT JOIN ratings r ON m.movie_id = r.movie_id AND r.user_id = ?
WHERE r.movie_id IS NULL -- User hasn't seen this movie
GROUP BY m.movie_id, m.title, m.genre
HAVING content_score > 0
)
SELECT
cs.title,
cs.genre,
m.release_year,
ROUND(cs.content_score, 2) AS recommendation_score,
m.avg_rating,
m.popularity_score
FROM content_scores cs
JOIN movies m ON cs.movie_id = m.movie_id
ORDER BY cs.content_score DESC, m.avg_rating DESC
LIMIT 10;
-- Combine collaborative and content-based approaches
WITH collaborative_scores AS (
-- Collaborative filtering logic (abbreviated)
SELECT movie_id, predicted_rating AS collab_score
FROM collaborative_recommendations
WHERE user_id = ?
),
content_scores AS (
-- Content-based logic (abbreviated)
SELECT movie_id, recommendation_score AS content_score
FROM content_recommendations
WHERE user_id = ?
),
hybrid_recommendations AS (
SELECT
m.movie_id,
m.title,
m.genre,
m.release_year,
COALESCE(cs.collab_score, 0) AS collab_score,
COALESCE(ct.content_score, 0) AS content_score,
-- Weighted hybrid score: 60% collaborative, 40% content-based
(COALESCE(cs.collab_score, 0) * 0.6 +
COALESCE(ct.content_score, 0) * 0.4) AS hybrid_score,
m.avg_rating,
m.popularity_score,
CASE
WHEN cs.collab_score IS NOT NULL AND ct.content_score IS NOT NULL THEN 'Hybrid'
WHEN cs.collab_score IS NOT NULL THEN 'Collaborative'
WHEN ct.content_score IS NOT NULL THEN 'Content-Based'
ELSE 'Popular'
END AS recommendation_type
FROM movies m
LEFT JOIN collaborative_scores cs ON m.movie_id = cs.movie_id
LEFT JOIN content_scores ct ON m.movie_id = ct.movie_id
LEFT JOIN ratings r ON m.movie_id = r.movie_id AND r.user_id = ?
WHERE r.movie_id IS NULL -- User hasn't seen this movie
AND (cs.collab_score IS NOT NULL OR ct.content_score IS NOT NULL OR m.popularity_score > 7.5)
)
SELECT
title,
genre,
release_year,
ROUND(hybrid_score, 2) AS final_score,
avg_rating,
recommendation_type
FROM hybrid_recommendations
ORDER BY hybrid_score DESC, avg_rating DESC
LIMIT 15;
-- Genre popularity and rating analysis
SELECT
genre,
COUNT(*) AS movie_count,
ROUND(AVG(avg_rating), 2) AS avg_genre_rating,
ROUND(AVG(popularity_score), 2) AS avg_popularity,
COUNT(DISTINCT CASE WHEN release_year >= 2020 THEN movie_id END) AS recent_movies
FROM movies
GROUP BY genre
ORDER BY avg_genre_rating DESC, movie_count DESC;
-- User engagement and preference patterns
SELECT
u.user_id,
u.username,
COUNT(r.rating) AS total_ratings,
ROUND(AVG(r.rating), 2) AS avg_user_rating,
MODE() WITHIN GROUP (ORDER BY m.genre) AS favorite_genre,
COUNT(DISTINCT m.genre) AS genre_diversity,
MAX(r.rating_date) AS last_activity
FROM users u
LEFT JOIN ratings r ON u.user_id = r.user_id
LEFT JOIN movies m ON r.movie_id = m.movie_id
GROUP BY u.user_id, u.username
ORDER BY total_ratings DESC;
-- Recommendation success rate analysis
WITH recommendation_performance AS (
SELECT
rec.user_id,
rec.movie_id,
rec.recommendation_score,
rec.recommendation_type,
CASE
WHEN r.rating >= 4 THEN 1
WHEN r.rating IS NOT NULL THEN 0
ELSE NULL
END AS success_flag
FROM recommendations rec
LEFT JOIN ratings r ON rec.user_id = r.user_id AND rec.movie_id = r.movie_id
WHERE rec.created_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)
)
SELECT
recommendation_type,
COUNT(*) AS total_recommendations,
COUNT(success_flag) AS rated_recommendations,
SUM(success_flag) AS successful_recommendations,
ROUND(SUM(success_flag) * 100.0 / COUNT(success_flag), 2) AS success_rate_percent
FROM recommendation_performance
WHERE success_flag IS NOT NULL
GROUP BY recommendation_type
ORDER BY success_rate_percent DESC;