🎬 Movie Recommendation Engine

Advanced SQL-powered movie discovery and recommendation system

👤 User Profile & Preferences

Movies Watched: 0
Average Rating: 0.0
Favorite Genre: -

🎯 Recommendation Filters

🎯 Personalized Recommendations


📊 Recommendation Analytics

👥 User Preferences Distribution

⭐ Rating Distribution by Genre

🎬 Movie Popularity Trends

🔗 Recommendation Success Rate

💻 SQL Queries Powering the Engine

🤝 Collaborative Filtering Algorithm

-- Find users with similar preferences and recommend their liked movies
WITH user_similarities AS (
    SELECT 
        r1.user_id AS user1,
        r2.user_id AS user2,
        ROUND(
            SUM(r1.rating * r2.rating) / 
            (SQRT(SUM(r1.rating * r1.rating)) * SQRT(SUM(r2.rating * r2.rating))), 4
        ) AS similarity_score
    FROM ratings r1
    JOIN ratings r2 ON r1.movie_id = r2.movie_id
    WHERE r1.user_id != r2.user_id
    GROUP BY r1.user_id, r2.user_id
    HAVING similarity_score > 0.5
),
similar_users_movies AS (
    SELECT 
        us.user1 AS target_user,
        r.movie_id,
        AVG(r.rating * us.similarity_score) AS predicted_rating
    FROM user_similarities us
    JOIN ratings r ON us.user2 = r.user_id
    LEFT JOIN ratings ur ON ur.user_id = us.user1 AND ur.movie_id = r.movie_id
    WHERE ur.movie_id IS NULL  -- User hasn't seen this movie
    GROUP BY us.user1, r.movie_id
)
SELECT 
    m.title,
    m.genre,
    m.release_year,
    ROUND(sum.predicted_rating, 2) AS predicted_rating,
    m.avg_rating,
    m.popularity_score
FROM similar_users_movies sum
JOIN movies m ON sum.movie_id = m.movie_id
WHERE sum.target_user = ?
ORDER BY sum.predicted_rating DESC
LIMIT 10;

🎭 Content-Based Recommendations

-- Recommend movies based on user's genre and actor preferences
WITH user_preferences AS (
    SELECT 
        r.user_id,
        m.genre,
        ma.actor_name,
        AVG(r.rating) AS avg_preference_rating,
        COUNT(*) AS interaction_count
    FROM ratings r
    JOIN movies m ON r.movie_id = m.movie_id
    JOIN movie_actors ma ON m.movie_id = ma.movie_id
    WHERE r.rating >= 4  -- Only consider liked movies
    GROUP BY r.user_id, m.genre, ma.actor_name
),
content_scores AS (
    SELECT 
        m.movie_id,
        m.title,
        m.genre,
        SUM(CASE 
            WHEN up_genre.avg_preference_rating IS NOT NULL THEN up_genre.avg_preference_rating * 0.6
            ELSE 0 
        END) +
        SUM(CASE 
            WHEN up_actor.avg_preference_rating IS NOT NULL THEN up_actor.avg_preference_rating * 0.4
            ELSE 0 
        END) AS content_score
    FROM movies m
    LEFT JOIN user_preferences up_genre ON m.genre = up_genre.genre AND up_genre.user_id = ?
    LEFT JOIN movie_actors ma ON m.movie_id = ma.movie_id
    LEFT JOIN user_preferences up_actor ON ma.actor_name = up_actor.actor_name AND up_actor.user_id = ?
    LEFT JOIN ratings r ON m.movie_id = r.movie_id AND r.user_id = ?
    WHERE r.movie_id IS NULL  -- User hasn't seen this movie
    GROUP BY m.movie_id, m.title, m.genre
    HAVING content_score > 0
)
SELECT 
    cs.title,
    cs.genre,
    m.release_year,
    ROUND(cs.content_score, 2) AS recommendation_score,
    m.avg_rating,
    m.popularity_score
FROM content_scores cs
JOIN movies m ON cs.movie_id = m.movie_id
ORDER BY cs.content_score DESC, m.avg_rating DESC
LIMIT 10;

🔄 Hybrid Recommendation Algorithm

-- Combine collaborative and content-based approaches
WITH collaborative_scores AS (
    -- Collaborative filtering logic (abbreviated)
    SELECT movie_id, predicted_rating AS collab_score
    FROM collaborative_recommendations 
    WHERE user_id = ?
),
content_scores AS (
    -- Content-based logic (abbreviated)
    SELECT movie_id, recommendation_score AS content_score
    FROM content_recommendations 
    WHERE user_id = ?
),
hybrid_recommendations AS (
    SELECT 
        m.movie_id,
        m.title,
        m.genre,
        m.release_year,
        COALESCE(cs.collab_score, 0) AS collab_score,
        COALESCE(ct.content_score, 0) AS content_score,
        -- Weighted hybrid score: 60% collaborative, 40% content-based
        (COALESCE(cs.collab_score, 0) * 0.6 + 
         COALESCE(ct.content_score, 0) * 0.4) AS hybrid_score,
        m.avg_rating,
        m.popularity_score,
        CASE 
            WHEN cs.collab_score IS NOT NULL AND ct.content_score IS NOT NULL THEN 'Hybrid'
            WHEN cs.collab_score IS NOT NULL THEN 'Collaborative'
            WHEN ct.content_score IS NOT NULL THEN 'Content-Based'
            ELSE 'Popular'
        END AS recommendation_type
    FROM movies m
    LEFT JOIN collaborative_scores cs ON m.movie_id = cs.movie_id
    LEFT JOIN content_scores ct ON m.movie_id = ct.movie_id
    LEFT JOIN ratings r ON m.movie_id = r.movie_id AND r.user_id = ?
    WHERE r.movie_id IS NULL  -- User hasn't seen this movie
    AND (cs.collab_score IS NOT NULL OR ct.content_score IS NOT NULL OR m.popularity_score > 7.5)
)
SELECT 
    title,
    genre,
    release_year,
    ROUND(hybrid_score, 2) AS final_score,
    avg_rating,
    recommendation_type
FROM hybrid_recommendations
ORDER BY hybrid_score DESC, avg_rating DESC
LIMIT 15;

📈 Analytics and Insights Queries

-- Genre popularity and rating analysis
SELECT 
    genre,
    COUNT(*) AS movie_count,
    ROUND(AVG(avg_rating), 2) AS avg_genre_rating,
    ROUND(AVG(popularity_score), 2) AS avg_popularity,
    COUNT(DISTINCT CASE WHEN release_year >= 2020 THEN movie_id END) AS recent_movies
FROM movies
GROUP BY genre
ORDER BY avg_genre_rating DESC, movie_count DESC;

-- User engagement and preference patterns
SELECT 
    u.user_id,
    u.username,
    COUNT(r.rating) AS total_ratings,
    ROUND(AVG(r.rating), 2) AS avg_user_rating,
    MODE() WITHIN GROUP (ORDER BY m.genre) AS favorite_genre,
    COUNT(DISTINCT m.genre) AS genre_diversity,
    MAX(r.rating_date) AS last_activity
FROM users u
LEFT JOIN ratings r ON u.user_id = r.user_id
LEFT JOIN movies m ON r.movie_id = m.movie_id
GROUP BY u.user_id, u.username
ORDER BY total_ratings DESC;

-- Recommendation success rate analysis
WITH recommendation_performance AS (
    SELECT 
        rec.user_id,
        rec.movie_id,
        rec.recommendation_score,
        rec.recommendation_type,
        CASE 
            WHEN r.rating >= 4 THEN 1 
            WHEN r.rating IS NOT NULL THEN 0
            ELSE NULL 
        END AS success_flag
    FROM recommendations rec
    LEFT JOIN ratings r ON rec.user_id = r.user_id AND rec.movie_id = r.movie_id
    WHERE rec.created_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)
)
SELECT 
    recommendation_type,
    COUNT(*) AS total_recommendations,
    COUNT(success_flag) AS rated_recommendations,
    SUM(success_flag) AS successful_recommendations,
    ROUND(SUM(success_flag) * 100.0 / COUNT(success_flag), 2) AS success_rate_percent
FROM recommendation_performance
WHERE success_flag IS NOT NULL
GROUP BY recommendation_type
ORDER BY success_rate_percent DESC;

🗄️ Database Schema

📋 Tables Structure

👥 users

  • user_id (PK)
  • username
  • email
  • registration_date
  • preferences

🎬 movies

  • movie_id (PK)
  • title
  • genre
  • release_year
  • director
  • avg_rating
  • popularity_score

⭐ ratings

  • rating_id (PK)
  • user_id (FK)
  • movie_id (FK)
  • rating
  • review
  • rating_date

🎭 movie_actors

  • movie_id (FK)
  • actor_name
  • role
  • character_name