-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathadvanced_sql_analytics_examples.sql
More file actions
169 lines (156 loc) · 5 KB
/
Copy pathadvanced_sql_analytics_examples.sql
File metadata and controls
169 lines (156 loc) · 5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
-- advanced_sql_analytics_examples.sql
-- SQL portfolio file for GitHub language detection.
-- Demonstrates advanced SQL analytics skills:
-- schema design, INSERT statements, joins, CTEs, aggregations,
-- CASE logic, date functions, subqueries, and window functions.
DROP TABLE IF EXISTS sales;
DROP TABLE IF EXISTS products;
DROP TABLE IF EXISTS customers;
CREATE TABLE customers (
customer_id INTEGER PRIMARY KEY,
customer_name VARCHAR(100),
state VARCHAR(2),
customer_segment VARCHAR(50),
signup_date DATE
);
CREATE TABLE products (
product_id INTEGER PRIMARY KEY,
product_name VARCHAR(100),
category VARCHAR(50),
unit_price DECIMAL(10, 2)
);
CREATE TABLE sales (
sale_id INTEGER PRIMARY KEY,
customer_id INTEGER,
product_id INTEGER,
sale_date DATE,
quantity INTEGER,
discount_rate DECIMAL(5, 2),
FOREIGN KEY (customer_id) REFERENCES customers(customer_id),
FOREIGN KEY (product_id) REFERENCES products(product_id)
);
INSERT INTO customers VALUES
(1, 'Avery Johnson', 'MD', 'Healthcare', '2023-01-15'),
(2, 'Jordan Smith', 'VA', 'Education', '2023-02-10'),
(3, 'Taylor Brown', 'DC', 'Government', '2023-03-22'),
(4, 'Morgan Davis', 'MD', 'Retail', '2023-04-18');
INSERT INTO products VALUES
(101, 'Analytics Dashboard', 'Software', 499.00),
(102, 'Data Integration Tool', 'Software', 799.00),
(103, 'Training Workshop', 'Services', 1200.00),
(104, 'Reporting Template', 'Services', 250.00);
INSERT INTO sales VALUES
(1001, 1, 101, '2024-01-05', 2, 0.10),
(1002, 1, 103, '2024-02-14', 1, 0.00),
(1003, 2, 102, '2024-03-03', 1, 0.05),
(1004, 3, 101, '2024-03-18', 3, 0.15),
(1005, 4, 104, '2024-04-20', 5, 0.00),
(1006, 2, 103, '2024-05-11', 1, 0.10),
(1007, 3, 102, '2024-06-09', 2, 0.05),
(1008, 4, 101, '2024-06-25', 1, 0.00);
-- 1. Revenue by customer using joins and calculated fields
SELECT
c.customer_id,
c.customer_name,
c.state,
c.customer_segment,
SUM(s.quantity * p.unit_price * (1 - s.discount_rate)) AS net_revenue
FROM customers AS c
INNER JOIN sales AS s
ON c.customer_id = s.customer_id
INNER JOIN products AS p
ON s.product_id = p.product_id
GROUP BY
c.customer_id,
c.customer_name,
c.state,
c.customer_segment
ORDER BY net_revenue DESC;
-- 2. Category-level performance summary
SELECT
p.category,
COUNT(s.sale_id) AS number_of_sales,
SUM(s.quantity) AS units_sold,
ROUND(SUM(s.quantity * p.unit_price * (1 - s.discount_rate)), 2) AS net_revenue,
ROUND(AVG(s.quantity * p.unit_price * (1 - s.discount_rate)), 2) AS average_sale_value
FROM products AS p
INNER JOIN sales AS s
ON p.product_id = s.product_id
GROUP BY p.category
HAVING SUM(s.quantity * p.unit_price * (1 - s.discount_rate)) > 1000
ORDER BY net_revenue DESC;
-- 3. Customer segmentation with CASE logic
WITH customer_revenue AS (
SELECT
c.customer_id,
c.customer_name,
SUM(s.quantity * p.unit_price * (1 - s.discount_rate)) AS total_revenue
FROM customers AS c
INNER JOIN sales AS s
ON c.customer_id = s.customer_id
INNER JOIN products AS p
ON s.product_id = p.product_id
GROUP BY c.customer_id, c.customer_name
)
SELECT
customer_id,
customer_name,
total_revenue,
CASE
WHEN total_revenue >= 2000 THEN 'High Value'
WHEN total_revenue >= 1000 THEN 'Medium Value'
ELSE 'Low Value'
END AS revenue_segment
FROM customer_revenue
ORDER BY total_revenue DESC;
-- 4. Window function for customer ranking by state
WITH state_revenue AS (
SELECT
c.state,
c.customer_name,
SUM(s.quantity * p.unit_price * (1 - s.discount_rate)) AS total_revenue
FROM customers AS c
INNER JOIN sales AS s
ON c.customer_id = s.customer_id
INNER JOIN products AS p
ON s.product_id = p.product_id
GROUP BY c.state, c.customer_name
)
SELECT
state,
customer_name,
total_revenue,
RANK() OVER (
PARTITION BY state
ORDER BY total_revenue DESC
) AS state_rank
FROM state_revenue
ORDER BY state, state_rank;
-- 5. Subquery to find above-average sales
SELECT
s.sale_id,
c.customer_name,
p.product_name,
s.quantity,
p.unit_price,
s.quantity * p.unit_price * (1 - s.discount_rate) AS net_sale_amount
FROM sales AS s
INNER JOIN customers AS c
ON s.customer_id = c.customer_id
INNER JOIN products AS p
ON s.product_id = p.product_id
WHERE s.quantity * p.unit_price * (1 - s.discount_rate) > (
SELECT AVG(s2.quantity * p2.unit_price * (1 - s2.discount_rate))
FROM sales AS s2
INNER JOIN products AS p2
ON s2.product_id = p2.product_id
)
ORDER BY net_sale_amount DESC;
-- 6. Data validation checks
SELECT
COUNT(*) AS total_sales_records,
SUM(CASE WHEN customer_id IS NULL THEN 1 ELSE 0 END) AS missing_customer_id,
SUM(CASE WHEN product_id IS NULL THEN 1 ELSE 0 END) AS missing_product_id,
SUM(CASE WHEN quantity <= 0 THEN 1 ELSE 0 END) AS invalid_quantity,
SUM(CASE WHEN discount_rate < 0 OR discount_rate > 1 THEN 1 ELSE 0 END) AS invalid_discount_rate
FROM sales;