Run Code
|
API
|
Code Wall
|
Misc
|
Feedback
|
Login
|
Theme
|
Privacy
|
Patreon
Ecommerce Website Analysis by SQL
-- E-commerce Website SQL Analysis Conducted by Ziying Yan ------------------------------------------------------------------------------------------ -- Database Script CREATE TABLE Ecommerce_customers ("customerid" int, "country" varchar(11)) ; INSERT INTO Ecommerce_customers ("customerid", "country") VALUES (1, 'Germany'), (2, 'Mexico'), (3, 'Mexico'), (4, 'UK'), (5, 'Sweden'), (6, 'Germany'), (7, 'France'), (8, 'Spain'), (9, 'France'), (10, 'Canada'), (11, 'UK'), (12, 'Argentina'), (13, 'Mexico'), (14, 'Switzerland'), (15, 'Brazil'), (16, 'UK'), (17, 'Germany'), (18, 'France'), (19, 'UK'), (20, 'Austria') ; CREATE TABLE Ecommerce_categories ("categoryid" int, "categoryname" varchar(14)) ; INSERT INTO Ecommerce_categories ("categoryid", "categoryname") VALUES (1, 'Beverages'), (2, 'Condiments'), (3, 'Confections'), (4, 'Dairy Products'), (5, 'Grains/Cereals'), (6, 'Meat/Poultry'), (7, 'Produce'), (8, 'Seafood') ; CREATE TABLE Ecommerce_orderdetails ("orderid" int, "productid" int, "quantity" int) ; INSERT INTO Ecommerce_orderdetails ("orderid", "productid", "quantity") VALUES (10248, 11, 12), (10248, 42, 10), (10248, 72, 5), (10249, 14, 9), (10249, 51, 40), (10250, 41, 10), (10250, 51, 35), (10250, 65, 15), (10251, 22, 6), (10251, 57, 15), (10251, 65, 20), (10252, 20, 40), (10252, 33, 25), (10252, 60, 40), (10253, 31, 20), (10253, 39, 42), (10253, 49, 40), (10254, 24, 15), (10254, 55, 21), (10254, 74, 21), (10255, 2, 20), (10255, 16, 35), (10255, 36, 25), (10255, 59, 30), (10256, 53, 15), (10256, 77, 12), (10257, 27, 25), (10257, 39, 6), (10257, 77, 15), (10258, 2, 50), (10258, 5, 65), (10258, 32, 6), (10259, 21, 10), (10259, 37, 1), (10260, 41, 16), (10260, 57, 50), (10260, 62, 15), (10260, 70, 21), (10261, 21, 20), (10261, 35, 20), (10262, 5, 12), (10262, 7, 15), (10262, 56, 2), (10263, 16, 60), (10263, 24, 28), (10263, 30, 60), (10263, 74, 36), (10264, 2, 35), (10264, 41, 25), (10265, 17, 30), (10265, 70, 20), (10266, 12, 12), (10267, 40, 50), (10267, 59, 70), (10267, 76, 15), (10268, 29, 10), (10268, 72, 4), (10269, 33, 60), (10269, 72, 20), (10270, 36, 30), (10270, 43, 25), (10271, 33, 24), (10272, 20, 6), (10272, 31, 40), (10272, 72, 24), (10273, 10, 24), (10273, 31, 15), (10273, 33, 20), (10273, 40, 60), (10273, 76, 33), (10274, 71, 20), (10274, 72, 7), (10275, 24, 12), (10275, 59, 6), (10276, 10, 15), (10276, 13, 10), (10277, 28, 20), (10277, 62, 12), (10278, 44, 16), (10278, 59, 15), (10278, 63, 8), (10278, 73, 25), (10279, 17, 15), (10280, 24, 12), (10280, 55, 20), (10280, 75, 30), (10281, 19, 1), (10281, 24, 6), (10281, 35, 4), (10282, 30, 6), (10282, 57, 2), (10283, 15, 20), (10283, 19, 18), (10283, 60, 35), (10283, 72, 3), (10284, 27, 15), (10284, 44, 21), (10284, 60, 20), (10284, 67, 5) ; CREATE TABLE Ecommerce_orders ("orderid" int, "customerid" int) ; INSERT INTO Ecommerce_orders ("orderid", "customerid") VALUES (10248, 90), (10249, 81), (10250, 34), (10251, 84), (10252, 76), (10253, 34), (10254, 14), (10255, 68), (10256, 88), (10257, 35), (10258, 20), (10259, 13), (10260, 55), (10261, 61), (10262, 65), (10263, 20), (10264, 24), (10265, 7), (10266, 87) ; CREATE TABLE Ecommerce_products ("productid" int, "categoryid" int, "price" int) ; INSERT INTO Ecommerce_products ("productid", "categoryid", "price") VALUES (1, 1, 18), (2, 1, 19), (3, 2, 10), (4, 2, 22), (5, 2, 21.35), (6, 2, 25), (7, 7, 30), (8, 2, 40), (9, 6, 97), (10, 8, 31), (11, 4, 21), (12, 4, 38), (13, 8, 6), (14, 7, 23.25), (15, 2, 15.5), (16, 3, 17.45), (17, 6, 39), (18, 8, 62.5), (19, 3, 9.2), (20, 3, 81), (21, 3, 10), (22, 5, 21), (23, 5, 9), (24, 1, 4.5), (25, 3, 14), (26, 3, 31.23), (27, 3, 43.9), (28, 7, 45.6), (29, 6, 123.79), (30, 8, 25.89), (31, 4, 12.5), (32, 4, 32), (33, 4, 2.5), (34, 1, 14), (35, 1, 18), (36, 8, 19), (37, 8, 26), (38, 1, 263.5), (39, 1, 18), (40, 8, 18.4), (41, 8, 9.65), (42, 5, 14), (43, 1, 46), (44, 2, 19.45), (45, 8, 9.5), (46, 8, 12), (47, 3, 9.5), (48, 3, 12.75), (49, 3, 20) ; ------------------------------------------------------------------------------------------ -- Insight Finding Script --#0. View Tables select * from Ecommerce_customers limit 10; select * from Ecommerce_categories limit 10; select * from Ecommerce_orderdetails limit 10; select * from Ecommerce_orders limit 10; select * from Ecommerce_products limit 10; --#1. union & union all difference: duplicate values select customerid from Ecommerce_customers union select customerid from Ecommerce_orders; select count(distinct customerid) from Ecommerce_customers; select count(distinct customerid) from Ecommerce_orders; select count(distinct customerid) from ( select customerid from Ecommerce_customers union select customerid from Ecommerce_orders) as temp; ----most of customerid are not same between Ecommerce_customers and Ecommerce_orders tables --#2. With Clause: create common expression table with cte1 as ( select customerid,country from Ecommerce_customers ) , cte2 as ( select customerid,orderid from Ecommerce_orders ) select cte1.customerid,orderid,country from cte1 join cte2 on cte1.customerid = cte2.customerid; --#3. Window function: row_number, rank, dense_rank --#3.1 row_number() -- Top 2 product by sales per category -- how to define sales? sales = price * quantity -- how to define top 2, side by side? want only 2 products for each category -> row_number() -- output -> product id, category id, sales, and rank for top 2 -- input -> product id, category id, price, quantity -- tables -> Ecommerce_orderdetails, Ecommerce_products, join, foregin key: product id select categoryid, productid, sales, row_num from ( select categoryid, Ecommerce_orderdetails.productid as productid, price*quantity as sales, row_number() over (partition by categoryid order by price*quantity desc) as row_num from Ecommerce_orderdetails join Ecommerce_products on Ecommerce_orderdetails.productid = Ecommerce_products.productid) as temp where row_num <= 2; --#3.2 rank() -- Top 2 product by sales per category -- how to define sales? sales = price * quantity -- how to define top 2, side by side? there are more than 2 products side by side, want the top 1 -> rank() -- output -> product id, category id, sales, and rank for top 2 -- input -> product id, category id, price, quantity -- tables -> Ecommerce_orderdetails, Ecommerce_products, join, foregin key: product id select categoryid, productid, sales, rank_num from ( select categoryid, Ecommerce_orderdetails.productid as productid, price*quantity as sales, rank() over (partition by categoryid order by price*quantity desc) as rank_num from Ecommerce_orderdetails join Ecommerce_products on Ecommerce_orderdetails.productid = Ecommerce_products.productid) as temp where rank_num <= 2; --#3.3 dense_rank() -- Top 2 product by sales per category -- how to define sales? sales = price * quantity -- how to define top 2, side by side? want all the top 2 products for each category -> dense_rank() -- output -> product id, category id, sales, and rank for top 2 -- input -> product id, category id, price, quantity -- tables -> Ecommerce_orderdetails, Ecommerce_products, join, foregin key: product id select categoryid, productid, sales, d_rank_num from ( select categoryid, Ecommerce_orderdetails.productid as productid, price*quantity as sales, dense_rank() over (partition by categoryid order by price*quantity desc) as d_rank_num from Ecommerce_orderdetails join Ecommerce_products on Ecommerce_orderdetails.productid = Ecommerce_products.productid) as temp where d_rank_num <= 2; --#4. Window funtion: ntile() -- set 2 buckets for each product order by quantity desc select orderid, productid, quantity, ntile (2) over (partition by productid order by quantity desc) as buckets from Ecommerce_orderdetails; -- set 2 buckets for the whole table/ orderid order by quantity desc select orderid, productid, quantity, ntile (2) over (order by quantity desc) as buckets from Ecommerce_orderdetails; --#5. Window + Aggregation funtion: sum() over(), avg() over, ... -- calculate the running total sales quantity for all the orders, rank product quantity in each order -- denfine rank -> dense_rank select orderid, productid, quantity, sum(quantity) over (order by orderid) as running_order_total, dense_rank() over (partition by orderid order by quantity desc) as d_rank_num from Ecommerce_orderdetails; -- calculate the average sales quantity for each order select orderid, productid, quantity, avg(quantity) over (partition by orderid order by quantity) as running_order_avg from Ecommerce_orderdetails; --#6. set restriction to running total: using cte select orderid, productid, quantity, running_order_total, running_order_total from(select orderid, productid, quantity, sum(quantity) over (order by orderid) as running_order_total from Ecommerce_orderdetails) as cte where running_order_total >= 1000; -- #7. Case when: -- basic: labeling select country, case when country in ('UK') then 'main market' when country in ('Mexico') then 'emerging market' else 'other markets' end as country_flag from Ecommerce_customers; -- Case + aggregation: calculate several metrics at one time join -- to find each order, count total products and how many beverages sales -- ouput: orderid, total products, sales for beverages -- when sales is null, output is null -- clarify: sales = price * quantity -- input: orderid, productsid, categoryid, price, quantity -> Ecommerce_orderdetails, Ecommerce_products -> foreign key: productid with cte as ( select tb1.productid as productid, orderid, categoryid, price*quantity as sales from Ecommerce_orderdetails as tb1 join Ecommerce_products as tb2 on tb1.productid = tb2.productid) select orderid, count(productid) as product_count, sum(case when categoryid = 1 then sales end) as beverages_sales from cte group by 1 order by 3 desc; --#Q8. Total sales(quantity) by country: -- output: country, total quantity -- when quantity is null, output becomes 0 -> case when -- input: country, quantity -> Ecommerce_customers, Ecommerce_orders, Ecommerce_orderdetails -> foreign key: customerid, orderid -- left join, groupby country select country, sum(case when quantity is not null then quantity else 0 end) as sales from Ecommerce_customers as tb1 left join Ecommerce_orders as tb2 on tb1.customerid = tb2.customerid left join Ecommerce_orderdetails as tb3 on tb2.orderid = tb3.orderid group by 1 order by 2 desc; --#9. Revenue by country -- output: country, revenue -- define revenue: quantity * price -- input: country, quantity, price -- Ecommerce_customers, Ecommerce_orderdetails, Ecommerce_orders, Ecommerce_products -- left join: Ecommerce_customers - customerid - Ecommerce_orders - orderid - Ecommerce_orderdetails - productid - Ecommerce_products select country, sum(quantity * price) as revenue from Ecommerce_customers as tb1 left join Ecommerce_orders as tb2 on tb1.customerid = tb2.customerid left join Ecommerce_orderdetails as tb3 on tb2.orderid = tb3.orderid left join Ecommerce_products as tb4 on tb3.productid = tb4.productid group by 1 order by 2 desc; -- the better way to replace null is to use case when: sum(case when a*b is not null then a*b else 0 end) select country, sum(case when quantity * price is not null then quantity * price else 0 end) as revenue from Ecommerce_customers as tb1 left join Ecommerce_orders as tb2 on tb1.customerid = tb2.customerid left join Ecommerce_orderdetails as tb3 on tb2.orderid = tb3.orderid left join Ecommerce_products as tb4 on tb3.productid = tb4.productid group by 1 order by 2 desc; -- #10. Revenue by category in Austria -- output: category, revenue -> quantity * price -- input: category, country(=Austria), quantity, price -- Ecommerce_customers, Ecommerce_orderdetails, Ecommerce_orders, Ecommerce_products -- with clause: select revenue in Austria -- group by category -- in the with clause: Ecommerce_products - productid - Ecommerce_orderdetails - orderid - Ecommerce_orders - customerid - Ecommerce_customers with cte as ( select categoryid, case when quantity * price is not null then quantity * price else 0 end as revenue, country from Ecommerce_products as tb1 left join Ecommerce_orderdetails as tb2 on tb1.productid = tb2.productid left join Ecommerce_orders as tb3 on tb2.orderid = tb3.orderid left join Ecommerce_customers as tb4 on tb3.customerid = tb4.customerid) select categoryid, sum(revenue) as revenue from cte where country = 'Austria' group by 1 order by 2 desc; -- Method2: -- country, category, revnue -- c,p,ordetails --c&o:customerid --ord&orderdetails:orderid --ordertail&products:productid select country, categoryid, sum(price * quantity) as revenue from Ecommerce_customers as tb1 join Ecommerce_orders as tb2 on tb1.customerid = tb2.customerid join Ecommerce_orderdetails as tb3 on tb2.orderid = tb3.orderid join Ecommerce_products as tb4 on tb3.productid = tb4.productid where country = 'Austria' group by 1,2 order by 1,2; --#11. for each customer, what's the avg spend? -- output: customerid, avg spend -- clarify: total spend/ total orders -> sum(quantity * price) / count(orders) -- input: customerid, quanity, price, orderid -- Ecommerce_customers, Ecommerce_orderdetails, Ecommerce_orders, Ecommerce_products select tb1.customerid as customerid, sum(quantity * price)/ count(distinct tb2.orderid) as avg_spend from Ecommerce_customers as tb1 join Ecommerce_orders as tb2 on tb1.customerid = tb2.customerid join Ecommerce_orderdetails as tb3 on tb2.orderid = tb3.orderid join Ecommerce_products as tb4 on tb3.productid = tb4.productid group by 1 order by 2 desc; -- safe_divide better version select tb1.customerid, case when count(distinct tb2.orderid) > 0 then sum(price * quantity)/count(distinct tb2.orderid) else null end as avg_spend from Ecommerce_customers as tb1 join Ecommerce_orders as tb2 on tb1.customerid = tb2.customerid join Ecommerce_orderdetails as tb3 on tb2.orderid = tb3.orderid join Ecommerce_products as tb4 on tb3.productid = tb4.productid group by 1 order by 2 desc; -- #12. top 5 most popular product in each category -- output: category, product -> top 5 clarify: only 5 records - row_number -- input: category, product, sales (quantity) -> group by productid - row_number -- tables: Ecommerce_orderdetails, Ecommerce_products select * from ( select categoryid, productid, sales, row_number() over (partition by categoryid order by sales desc) as row_num from( select categoryid, tb1.productid as productid, sum(case when quantity is not null then quantity else 0 end) as sales from Ecommerce_products tb1 left join Ecommerce_orderdetails tb2 on tb1.productid = tb2.productid group by 1,2 order by 3 ) as temp1 ) as temp2 where row_num <= 5; --#12.2 method2: select * from ( select categoryid, tb1.productid, sum(case when quantity is not null then quantity else 0 end) as sales, row_number() over (partition by categoryid order by sum(case when quantity is not null then quantity else 0 end) desc) as row_num from Ecommerce_products tb1 left join Ecommerce_orderdetails tb2 on tb1.productid = tb2.productid group by 1,2 order by 1,4) as temp where row_num <= 5; --#Q13: reporting: each customer quantity on beverage, condiments and others -- ouput: customerid, categoryname(beverages, condiments, others), quantity -- input: customerid, categoryname, quantity -> case when(categoryname) -- tables: Ecommerce_orders - orderid - Ecommerce_orderdetails - productid - Ecommerce_products - categoryid - Ecommerce_categories select customerid, case when categoryname in ('Beverages','Condiments') then categoryname else 'Others' end as categoryname, sum(quantity) as quantity from Ecommerce_orders tb1 left join Ecommerce_orderdetails tb2 on tb1.orderid = tb2.orderid left join Ecommerce_products tb3 on tb2.productid = tb3.productid left join Ecommerce_categories tb4 on tb3.categoryid = tb4.categoryid group by 1,2 order by 2,3 desc; -- not very clear -- another method: let every category quantity become a column for each customer - sum(case when) select customerid, sum(case when categoryname = 'Beverages' then quantity else 0 end) as Beverages_quantity, sum(case when categoryname = 'Condiments' then quantity else 0 end) as Condiments_quantity, sum(case when categoryname not in ('Beverages','Condiments') then quantity else 0 end) as Others_quantity from Ecommerce_orders tb1 left join Ecommerce_orderdetails tb2 on tb1.orderid = tb2.orderid left join Ecommerce_products tb3 on tb2.productid = tb3.productid left join Ecommerce_categories tb4 on tb3.categoryid = tb4.categoryid group by 1 order by 1 desc; --#Q14: median quantity value of each category and product -- output: categoryid, productid, quantity(median) -- how to calculate median? -- number of values is odds -> middle value -- number of values is even -> avg (number of values / 2, number of values /2 +1) -- method1: -- use row_number, one is desc, one is asc, when abs(row_num_d-row_num_a) <= 1 -> avg -- input: categoryid, productid, quantity -- tables: Ecommerce_orderdetails - productid - Ecommerce_products select categoryid, productid, round(avg(quantity)) as median from ( select tb1.categoryid as categoryid, tb2.productid as productid, quantity, row_number() over (partition by tb1.categoryid, tb2.productid order by quantity asc) as row_num_a, row_number() over (partition by tb1.categoryid, tb2.productid order by quantity desc) as row_num_d from Ecommerce_categories tb1 join Ecommerce_products tb2 on tb1.categoryid = tb2.categoryid join Ecommerce_orderdetails tb3 on tb2.productid = tb3.productid) as temp where abs(row_num_a - row_num_d) <= 1 group by 1,2 order by 1,2; -- method2: -- use row_number, and count -> row_number in avg(case when row_number between (count/2, count/2+1)) -- input: categoryid, productid, quantity -- tables: Ecommerce_orderdetails - productid - Ecommerce_products with cte1 as ( select tb1.categoryid, tb2.productid as productid, count(quantity) as count_num from Ecommerce_categories tb1 join Ecommerce_products tb2 on tb1.categoryid = tb2.categoryid join Ecommerce_orderdetails tb3 on tb2.productid = tb3.productid group by 1,2), cte2 as ( select tb1.categoryid, tb2.productid as productid, quantity, row_number() over(partition by tb1.categoryid, tb2.productid order by quantity asc) as row_num from Ecommerce_categories tb1 join Ecommerce_products tb2 on tb1.categoryid = tb2.categoryid join Ecommerce_orderdetails tb3 on tb2.productid = tb3.productid) select cte1.categoryid, cte1.productid, round(avg(quantity)) as median from cte1 join cte2 on cte1.categoryid = cte2.categoryid and cte1.productid = cte2.productid where row_num between count_num/2 and (count_num/2+1) group by 1,2 order by 1,2;
run
|
edit
|
history
|
help
0
PostreSQL: Full Outer Join
Select all countries that can be found by recursively adding all neighbouring countries (PostgreSQL)
SQL social network practice by Han Wang 20200720
1127. User Purchase Platform
SQL social network practice by Han Wang 20200720 - 2
funkcia_transakcie
loggin_trigger-audit
Ass2 q2
renaming JSON field
1