Run Code
|
API
|
Code Wall
|
Misc
|
Feedback
|
Login
|
Theme
|
Privacy
|
Patreon
sql_fb
--- FB Spam friend request--- CREATE TABLE users ([user_id] int, [country] varchar(2), [facebook_age] int) ; INSERT INTO users ([user_id], [country], [facebook_age]) VALUES (1001, 'US', 120), (1002, 'CA', 400), (1003, 'MX', 6), (5555, 'US', 8) ; CREATE TABLE friend_requests ([date] datetime, [sender_id] int, [receiver_id] int, [result] int) ; INSERT INTO friend_requests ([date], [sender_id], [receiver_id], [result]) VALUES ('2020-01-01 00:00:00', 1001, 9991, 0), ('2020-01-01 00:00:00', 1001, 1032, 1), ('2020-01-01 00:00:00', 1002, 5555, 2), ('2020-01-01 00:00:00', 1002, 1234, 1), ('2020-01-02 00:00:00', 1003, 1002, 1) ; -- Q1: For each month in 2020, what is the friend request acceptance rate? /* SELECT month(date), SUM(CASE WHEN result=1 THEN 1.00 ELSE 0 END)/COUNT(*) AS accept_rate FROM friend_requests GROUP BY month(date) */ ---- /* WITH month_table AS ( SELECT '2020-01-01' AS month UNION SELECT '2020-02-01' AS month UNION SELECT '2020-03-01' AS month UNION SELECT '2020-04-01' AS month UNION SELECT '2020-05-01' AS month UNION SELECT '2020-06-01' AS month UNION SELECT '2020-07-01' AS month UNION SELECT '2020-08-01' AS month UNION SELECT '2020-09-01' AS month UNION SELECT '2020-10-01' AS month UNION SELECT '2020-11-01' AS month UNION SELECT '2020-12-01' AS month), monthly_request AS ( SELECT a.month, SUM(CASE WHEN b.result=1 THEN 1.00 ELSE 0 END) AS accepts, COUNT(b.result) AS requests FROM month_table AS a LEFT JOIN friend_requests AS b ON month(a.month)=month(b.date) GROUP BY a.month) SELECT month, CASE WHEN requests = 0 then 0.00 ELSE accepts/requests END AS accept_rate FROM monthly_request; -- Q2: What percent of users in the United States accepted a friend request last week? WITH R7users AS ( SELECT * FROM users Where facebook_age>=7 AND country='US') SELECT CAST(COUNT(DISTINCT receiver_id)/COUNT(DISTINCT a.user_id) AS DECIMAL(5,2)) AS US_acceptedRateUser FROM R7users AS a LEFT JOIN (SELECT DISTINCT receiver_id FROM friend_requests WHERE date >= '2020-01-01' AND result=1) AS b ON a.user_id = b.receiver_id */ --- Dwell time --- CREATE TABLE user_sessions ([date] datetime, [user_id] int, [start_time] int, [end_time] int, [end_reason] varchar(9)) ; INSERT INTO user_sessions ([date], [user_id], [start_time], [end_time], [end_reason]) VALUES ('2020-09-01 00:00:00', 1, 1964783746, 1964783924, 'close_app'), ('2020-09-01 00:00:00', 1, 1964783528, 1964783809, 'crash'), ('2020-09-02 00:00:00', 2, 1964783123, 1964783345, 'close_app'), ('2020-09-02 00:00:00', 3, 1964783252, 1964783658, 'crash') ; CREATE TABLE dim_all_users ([user_id] int, [country] varchar(10), [is_active] varchar(5)) ; INSERT INTO dim_all_users ([user_id], [country], [is_active]) VALUES (1, 'US', 'true'), (2, 'US', 'false'), (3, 'CANADA', 'true') ; /* WITH date_tbl AS ( SELECT date FROM user_sessions GROUP BY date) , user_date AS ( SELECT user_id, country, is_active, date FROM dim_all_users AS a CROSS JOIN date_tbl AS b) SELECT a.user_id, a.date, SUM(CASE WHEN b.start_time IS NULL THEN 0.00 ELSE b.end_time-start_time END)/1000 AS dwell_time FROM user_date AS a LEFT JOIN user_sessions AS b ON a.user_id = b.user_id AND a.date = b.date GROUP BY a.user_id, a.date; */ -- 2. What is the percentage of active users who crashed when using Facebook from the US -- area on a specific date? /* WITH user_sessions1 AS ( SELECT a.* FROM user_sessions AS a INNER JOIN dim_all_users AS b ON a.user_id = b.user_id WHERE date = '2020-09-01' AND country = 'US') SELECT CAST(COUNT(DISTINCT b.user_id)/COUNT(DISTINCT a.user_id) AS decimal(4,2)) AS crash_rate_us FROM user_sessions1 AS a LEFT JOIN (SELECT * FROM user_sessions1 WHERE end_reason='crash') AS b on a.user_id = b.user_id */ -- 3. Calculate the average dwell time in seconds across all sessions (i.e. return one number)? Dwell time is the length of time between opening and closing the menu. -- SELECT (SUM(end_time)-SUM(start_time))/COUNT(*) AS avg_dwell_time -- FROM user_sessions ------------ Portal ----------- CREATE TABLE video_calls ([caller] int, [recepient] int, [date] datetime, [call_id] int, [duration] int) ; INSERT INTO video_calls ([caller], [recepient], [date], [call_id], [duration]) VALUES (1111, 1112, '2020-01-01 00:00:00', 1, 10), (1111, 1003, '2020-01-01 00:00:00', 2, 15), (1112, 1145, '2020-01-01 00:00:00', 3, 5) ; CREATE TABLE fb_dau ([user_id] int, [dau_fg] int, [date] datetime, [country] varchar(2)) ; INSERT INTO fb_dau ([user_id], [dau_fg], [date], [country]) VALUES (1111, 1, '2020-01-01 00:00:00', 'US'), (1112, 0, '2020-01-01 00:00:00', 'CA'), (1003, 0, '2020-01-01 00:00:00', 'US') ; -- Q1: On 2020-01-01 how many people initiated multiple calls? SELECT COUNT(*) FROM( SELECT caller FROM video_calls WHERE date = '2020-01-01' GROUP BY caller HAVING COUNT(DISTINCT call_id)>1) AS t; -- Q2: % of DAU used the video calls function by each country on 2020-01-01? WITH dau AS ( SELECT country, user_id FROM fb_dau WHERE date='2020-01-01'), video_user AS ( SELECT caller AS video_user FROM video_calls GROUP BY caller UNION SELECT recepient AS video_user FROM video_calls GROUP BY recepient) SELECT country, CAST(COUNT(DISTINCT video_user)/COUNT(DISTINCT user_id) AS decimal(4,2)) AS percent_video_users FROM dau AS a LEFT JOIN video_user AS b ON a.user_id = b.video_user GROUP BY country; -- CONVERT(date, xxx) AS date ----------- posts --------- CREATE TABLE user_actions ([date] datetime, [actor_id] int, [post_id] int, [relationship] varchar(7), [interaction] varchar(7)) ; INSERT INTO user_actions ([date], [actor_id], [post_id], [relationship], [interaction]) VALUES ('2021-03-20 00:00:00', 1001, 2057, 'friend', 'like'), ('2021-03-21 00:00:00', 2347, 1245, 'folower', 'dislike'), ('2021-03-21 00:00:00', 2348, 2057, 'friend', 'love'), ('2021-03-21 00:00:00', 3333, 1245, 'friend', 'like') ; CREATE TABLE user_posts ([date] datetime, [poster_id] int, [post_id] int) ; INSERT INTO user_posts ([date], [poster_id], [post_id]) VALUES ('2021-03-20 00:00:00', 1111, 2057), ('2021-03-20 00:00:00', 2222, 1245), ('2021-03-21 00:00:00', 1111, 1250) ; WITH t AS ( SELECT post_id FROM user_posts WHERE poster_id=1111 AND date BETWEEN GETDATE()-7 AND GETDATE()) SELECT SUM(CASE WHEN interaction = 'like' THEN 1.00 ELSE 0 END)/COUNT(DISTINCT t.post_id) AS avg_like FROM t LEFT JOIN user_actions AS t1 ON t.post_id = t1.post_id; ------------------------------------------------------- CREATE TABLE user_network_requests ([user_id] int, [timestamp] int, [data_center] varchar(1), [success] int) ; INSERT INTO user_network_requests ([user_id], [timestamp], [data_center], [success]) VALUES (10032, 15009, 'A', 1), (10032, 15097, 'C', 0), (10033, 15000, 'A', 1), (10033, 16000, 'A', 0), (10033, 16700, 'C', 1), (15009, 16800, 'C', 1) ; CREATE TABLE user_country ([user_id] int, [country] varchar(2)) ; INSERT INTO user_country ([user_id], [country]) VALUES (10032, 'US'), (15009, 'US') ; -- 1. request failure rate for each data center /* SELECT data_center, 1-SUM(success)*1.00/COUNT(*) AS failure_rate FROM user_network_requests GROUP BY data_center -- 2. request failure rate per country SELECT country, 1-SUM(success)*1.00/COUNT(*) AS failure_rate */ -- 3. for each country, how many uers never had a failed request /* WITH t AS ( SELECT country, a.user_id, SUM(success) AS success_cnt, COUNT(*) AS requests FROM user_network_requests AS a INNER JOIN user_country AS b ON a.user_id = b.user_id GROUP BY country, a.user_id) SELECT country, COUNT(DISTINCT user_id) AS users FROM t WHERE success_cnt = requests GROUP BY country */ ----------------------- ads ---------------------------- CREATE TABLE ads ([ad_id] int, [user_id] int, [status] varchar(5), [timestamp] datetime) ; INSERT INTO ads ([ad_id], [user_id], [status], [timestamp]) VALUES (1111, 4324, 'click', '2020-01-01 00:00:00'), (1112, 4324, 'click', '2020-01-01 00:00:00'), (1112, 32646, 'click', '2020-01-03 00:00:00'), (1111, 4542, 'hide', '2020-01-02 00:00:00'), (1112, 4542, 'click', '2020-01-01 00:00:00'), (1112, 2349, 'hide', '2020-01-02 00:00:00') ; /* -- 1. how to measure ad performance? SELECT ad_id, SUM(CASE WHEN status NOT IN ('hide') THEN 1.00 ELSE 0 END)/COUNT(*) AS engagement_rate FROM ads GROUP BY ad_id; -- 2. 如果有个user click其中一个ad, 写SQL推荐下一个show给这个user的ad. WITH t AS ( SELECT * FROM ads WHERE status = 'click' ), t1 AS ( SELECT * FROM ads WHERE status !='hide') SELECT a.user_id, t1.ad_id FROM ( SELECT t.user_id, t.ad_id, t1.user_id AS rec_user FROM t INNER JOIN t1 ON t.ad_id = t1.ad_id WHERE t.user_id != t1.user_id) AS a INNER JOIN t1 ON a.rec_user = t1.user_id WHERE a.ad_id != t1.ad_id */ ------------- spam ------------------ DROP TABLE user_actions; CREATE TABLE user_actions ([date] datetime, [user_id] int, [post_id] int, [action] varchar(6)) ; INSERT INTO user_actions ([date], [user_id], [post_id], [action]) VALUES ('2020-01-01 00:00:00', 1234, 1111, 'view'), ('2020-01-01 00:00:00', 2345, 1111, 'report'), ('2020-01-02 00:00:00', 2314, 2111, 'view'), ('2020-01-02 00:00:00', 4235, 2111, 'view'), ('2020-01-02 00:00:00', 543, 1324, 'view'), ('2020-01-02 00:00:00', 2346, 1324, 'view'), ('2020-01-02 00:00:00', 2314, 1324, 'report') ; CREATE TABLE review ([date] datetime, [reviewer_id] int, [post_id] int) ; INSERT INTO review ([date], [reviewer_id], [post_id]) VALUES ('2020-01-03 00:00:00', 567990, 1111), ('2020-01-03 00:00:00', 567990, 2111) ; -- Q1: What percent of the content views was reported for spam each day? SELECT a.date, SUM(CASE WHEN a.action = 'view' AND b.post_id IS NOT NULL THEN 1.00 ELSE 0 END)/SUM(CASE WHEN a.action='view' THEN 1 ELSE 0 END) AS percent_spam FROM user_actions AS a LEFT JOIN review AS b ON a.post_id = b.post_id GROUP BY a.date -- Q2: What percentage of the posts viewedby the users was actually spam each day? ---------------------- comment on post ---------------------------------- CREATE TABLE comments ([user_id] int, [date] datetime, [comments] int) ; INSERT INTO comments ([user_id], [date], [comments]) VALUES (1111, '2021-03-01 00:00:00', 3), (1111, '2021-03-02 00:00:00', 2), (1112, '2021-03-01 00:00:00', 5), (1112, '2021-03-03 00:00:00', 3), (1112, '2021-03-05 00:00:00', 7), (1113, '2021-03-05 00:00:00', 2) ; CREATE TABLE ActiveUsers ([user_id] int, [country] varchar(2)) ; INSERT INTO ActiveUsers ([user_id], [country]) VALUES (1111, 'US'), (1112, 'CA'), (1113, 'MX'), (1115, 'US') ; -- total number of comments each user in past 30 days, -- percent of active users who comment, SELECT user_id, SUM(comments) AS total_comments FROM comments WHERE date >= GETDATE()-30 --DATEDIFF(day, date, CONVERT(date, GETDATE()))<=30 GROUP BY user_id; SELECT COUNT(b.user_id)*1.00/COUNT(a.user_id) AS percent_comment_active FROM ActiveUsers AS a LEFT JOIN (SELECT user_id FROM comments WHERE date>=GETDATE()-30 GROUP BY user_id) AS b ON a.user_id = b.user_id ----------- search ---------------- -- sql:search/search_result -- 1)#of users have made more than 1/10 searches in last 7 days -- 2) % of uders clicked the search result of event CREATE TABLE searches ([search_id] int, [user_id] int, [date] datetime) ; INSERT INTO searches ([search_id], [user_id], [date]) VALUES (1, 213, '2021-01-01 00:00:00'), (2, 34567, '2021-01-01 00:00:00'), (3, 21312, '2021-01-01 00:00:00'), (4, 213, '2021-01-02 00:00:00'), (5, 345, '2021-01-02 00:00:00') ; /* SELECT COUNT(*) AS cnt FROM( SELECT user_id FROM searches GROUP BY user_id HAVING COUNT(*)>(SELECT 0.1*COUNT(*) FROM searches WHERE date >=GETDATE()-7)) AS t */ ------------------ friend request ----------------------- CREATE TABLE friendRequests ([user_id_send] int, [user_id_receive] int, [date] datetime, [action] varchar(6)) ; INSERT INTO friendRequests ([user_id_send], [user_id_receive], [date], [action]) VALUES (123, 234, '2021-01-01 00:00:00', 'send'), (234, 345, '2021-01-01 00:00:00', 'send'), (123, 345, '2021-01-01 00:00:00', 'send'), (234, 123, '2021-01-02 00:00:00', 'accept'), (345, 234, '2021-01-03 00:00:00', 'accept') ; WITH send_table AS( SELECT * FROM friendRequests WHERE action = 'send'), accept_table AS ( SELECT * FROM friendRequests WHERE action = 'accept') SELECT COUNT(b.user_id_send)*1.00/COUNT(a.user_id_send) AS request_accept_rate FROM send_table AS a LEFT JOIN accept_table AS b ON a.user_id_send = b.user_id_receive AND a.user_id_receive = b.user_id_send ------------------ interactions --------- CREATE TABLE Interactions ([user_a] int, [user_b] int, [date] datetime) ; INSERT INTO Interactions ([user_a], [user_b], [date]) VALUES (123, 234, '2020-01-01 00:00:00'), (233, 345, '2020-01-01 00:00:00'), (234, 3245, '2020-01-01 00:00:00'), (123, 546, '2020-01-01 00:00:00') ; -- given a table of interaction between users (user_a | user_b | day), find number of users who had more than 5 interactions yesterday (assume there is only one unique interaction between a pair of users per day). SELECT user_id, SUM(user_cnt) AS interactions FROM( SELECT user_a AS user_id, COUNT(DISTINCT user_b) AS user_cnt FROM Interactions GROUP BY user_a UNION ALL SELECT user_b AS user_id, COUNT(DISTINCT user_a) AS user_cnt FROM Interactions GROUP BY user_b) AS t GROUP BY user_id; ------------------- SELECT 1+NULL ----------------------------------- -- We have a table called ad_accounts(account_id, date, status). Status can be active/closed/fraud. -- A) what percent of active accounts are fraud? -- B) How many accounts became fraud today for the first time? -- C) What would be the financial impact of letting fraud accounts become active (how would you approach this question)? CREATE TABLE ad_accounts ([account_id] int, [status] varchar(6), [date] datetime) ; INSERT INTO ad_accounts ([account_id], [status], [date]) VALUES (1111, 'fraud', '2021-01-01 00:00:00'), (1111, 'active', '2021-01-02 00:00:00'), (1101, 'active', '2021-01-03 00:00:00'), (1102, 'fraud', '2021-01-02 00:00:00'), (1102, 'active', '2021-01-03 00:00:00'), (1111, 'fraud', '2021-01-03 00:00:00'), (1112, 'fraud', '2021-01-03 00:00:00') ; WITH t AS ( SELECT account_id, status, ROW_NUMBER() OVER(PARTITION BY account_id ORDER BY date DESC) AS account_rank FROM ad_accounts), latest AS ( SELECT account_id FROM t WHERE account_rank=1 AND status = 'active') SELECT COUNT(b.account_id)*1.00/COUNT(a.account_id) AS fraud_perc FROM latest AS a LEFT JOIN (SELECT * FROM ad_accounts WHERE status='fraud') AS b ON a.account_id = b.account_id; SELECT account_id FROM ad_accounts WHERE status='fraud' AND date = '2021-01-03' AND account_id IN (SELECT account_id FROM ad_accounts WHERE status='fraud' GROUP BY account_id HAVING COUNT(*)=1)
run
|
edit
|
history
|
help
0
megha
NOT NULL field from SELECT INTO
forming date as nvarchar
ss sqlpractice2014
DGDFGDFG
Teacher
customers table
Rno 37 boat
20181CSE0041
TABLE