Run Code
|
API
|
Code Wall
|
Misc
|
Feedback
|
Login
|
Theme
|
Privacy
|
Patreon
SQL - Social Network Analysis by Patrick Lin
-- High School Social Media Analysis with SQL by Patrick Lin --------------- Database Script ---------------- drop table if exists p_highschooler; drop table if exists p_friend; drop table if exists p_likes; create table p_highschooler("id" int, "name" varchar(30), "grade" int); create table p_friend("id1" int, "id2" int); create table p_likes("id1" int, "id2" int); insert into p_highschooler("id","name","grade") values (1510, 'Jordan', 9), (1689, 'Gabriel', 9), (1381, 'Tiffany', 9), (1709, 'Cassandra', 9), (1101, 'Haley', 10), (1782, 'Andrew', 10), (1468, 'Kris', 10), (1641, 'Brittany', 10), (1247, 'Alexis', 11), (1316, 'Austin', 11), (1911, 'Gabriel', 11), (1501, 'Jessica', 11), (1304, 'Jordan', 12), (1025, 'John', 12), (1934, 'Kyle', 12), (1661, 'Logan', 12); insert into p_friend("id1","id2") values (1510, 1381), (1101, 1501), (1510, 1689), (1689, 1709), (1381, 1247), (1709, 1247), (1689, 1782), (1782, 1468), (1782, 1316), (1782, 1304), (1468, 1101), (1468, 1641), (1101, 1641), (1247, 1911), (1247, 1501), (1911, 1501), (1501, 1934), (1316, 1934), (1934, 1304), (1304, 1661), (1661, 1025), (1689, 1709), (1709, 1689), (1782, 1709), (1911, 1247), (1247, 1468), (1641, 1468), (1316, 1304), (1501, 1934), (1934, 1501), (1025, 1101); insert into p_likes("id1","id2") values (1689, 1709), (1709, 1689), (1782, 1709), (1911, 1247), (1247, 1468), (1641, 1468), (1316, 1304), (1501, 1934), (1934, 1501), (1025, 1101), (1101, 1381), (1025, 1641); ------------------------------------------------ --------------- Analysis Script ---------------- -- Q1: Find the names of all students who are friends with someone names Gabriel /* rationale: output table schema: id, name tables needed: p_highschooler, p_friend methods: 1.in p_highschooler, get id(s) of name Gabriel; 2.in p_friend, get all distinct id whose friended id in id(s) from step 1; 3.join p_highschooler */ with t1 as ( select id1 as id from p_friend where id2 in (select distinct id from p_highschooler where name = 'Gabriel') union distinct select id2 as id from p_friend where id1 in (select distinct id from p_highschooler where name = 'Gabriel') ) select t1.id, t2.name from t1 join p_highschooler as t2 on t1.id = t2.id order by 2; -- Q2: For every student who likes someone 2 or more grades younger than themselves, -- return that student's name and grade, and the name and grade of the student they like /* rationale: output table schema: like_id, like_name, like_grade, liked_id, liked_name, liked_grade tables needed: p_highschooler, p_likes methods: 1.join p_highschooler, p_likes; 2.query rows where id1 corresponding grade - id2 grade >= 2 */ select distinct h1.id as id1, h1.name as like_name, h1.grade as like_grade, h2.id as id2, h2.name as liked_name, h2.grade as liked_grade from p_likes as l join p_highschooler as h1 on l.id1 = h1.id join p_highschooler as h2 on l.id2 = h2.id where h1.grade - h2.grade >= 2; -- Q3: For every pair of students who both like each other, return the name and grade of both students. -- Include each pair only once, with the two names in alphabetical order /* rationale: output table schema: id1, name1, grade1, id2, name2, grade2 tables needed: p_highschooler, p_likes logic: -- like, unlike friend, is not mutual, but one-way -- 2 main tasks here: 1.how to find the pair; 2.how to ensure pair only appear once methods: 1.self-join to find all pairs (however, each pair will appear twice) 2.with conditional statement id1 > id2 to enforce order so that each pair will appear only once in output */ with t1 as (select l1.id1, l1.id2 from p_likes l1 join p_likes l2 on l1.id1 = l2.id2 and l1.id2 = l2.id1 where l1.id1 > l1.id2) select t1.id1, t2.name name1, t2.grade grade1, t1.id2, t3.name name3, t3.grade grade3 from t1 join p_highschooler t2 on t1.id1 = t2.id join p_highschooler t3 on t1.id2 = t3.id order by 2, 5; -- Q4: Find all students who do not appear in the likes table -- (as a student who likes or is liked) and return their names and grades /* rationale: find distinct ids from p_highschooler that are not in p_likes as id1 nor id2 easiest way to work is union distinct id1 and id2 from p_likes and get ids from p_highschooler that are not in the union dataset */ with t1 as (select id1 as id from p_likes union distinct select id2 as id from p_likes) select distinct name, grade from p_highschooler where id not in (select id from t1); -- Q5: For every situation where student A likes student B, but we have no information about whom B likes -- return A and B's names and grades /* rationale: qualified are the situation that B appears as an id2 in the likes table, but never appear as an id1 */ with t1 as (select id1, id2 from p_likes where id2 not in (select distinct id1 from p_likes)) select t2.name name1, t2.grade grade1, t3.name name2, t3.grade grade2 from t1 join p_highschooler t2 on t1.id1 = t2.id join p_highschooler t3 on t1.id2 = t3.id; -- Q6: Find names and grades of students who only have friends in the same grade -- Return the result sorted by grade /* rationale: step 1. join p_highschooler and p_friend to get dimensions of id1 and id2 step 2. use case statement to label rows with same grade as '0' and rows without as '1' step 3. group by id1 and get the sum of different grades step 4. group by id2 and get the sum of different grades step 5. union all id1 and id2 as id step 6. group by id and query only those with sum = 0 (meaning the id with only friends in same grade)*/ with t1 as (select f.id1, h1.name name1, h1.grade grade1, f.id2, h2.name name2, h2.grade grade2, case when h1.grade = h2.grade then 0 else 1 end as diff from p_friend f join p_highschooler h1 on f.id1 = h1.id join p_highschooler h2 on f.id2 = h2.id), t2 as (select id1 as id, name1 as name, grade1 as grade, sum(diff) as total from t1 group by 1, 2, 3 union all select id2 as id, name2 as name, grade2 as grade, sum(diff) as total from t1 group by 1, 2, 3) select id, name, grade from t2 group by 1, 2, 3 having sum(total) = 0 order by 3; -- Q7: Find the name and grade of all students who are liked by more than 1 other student /* rationale: the trick here is to filter out the scienario where student likes himself/herself */ -- solution 1: group select l.id2, h.name, h.grade from p_likes l join p_highschooler h on l.id2 = h.id where l.id2 != l.id1 group by 1,2,3 having count(l.id1) > 1 order by 3; -- solution 2: window function row_number() with t1 as (select id2, row_number() over(partition by id2 order by id1) row_num from p_likes where id1 != id2) select distinct h.id, h.name, h.grade from p_highschooler h join t1 on t1.id2 = h.id where t1.row_num > 1 order by 3;
run
|
edit
|
history
|
help
0
Pivot the Hard Way..
POSTGRESQL_CLOSEST_VALUE
Correlated subquery in SELECT clause, and rewritten for Netezza
1225. Report Contiguous Dates
Assignment 1(SET A)
revenue average per current month, last 3, 6 and 12 months
Cilinica_X
SQL Lab
PostreSQL: Full Outer Join
cte