The football World Cup is just under a week away so I thought we deserved a World Cup graph for the occasion. It’s still a work in progress but here’s what I’ve got so far.
CREATE INDEX ON :Match(id);CREATE INDEX ON :WorldCup(name);CREATE INDEX ON :Stadium(stadium);CREATE INDEX ON :Phase(phase);CREATE INDEX ON :Country(name);CREATE INDEX ON :Time(time);CREATE INDEX ON :MatchNumber(value);MATCH (n) OPTIONAL MATCH (n)-[r]-() DELETE n,r;USING PERIODIC COMMIT 1000
LOAD CSV WITH HEADERS FROM "https://dl.dropboxusercontent.com/u/7619809/matches.csv" AS csvLine
WITH csvLine, toInt(csvLine.match_number) AS matchNumber
WITH csvLine,
CASE WHEN csvLine.phase = ""
THEN
CASE WHEN matchNumber <= 48 THEN "Group matches"
WHEN matchNumber > 48 AND matchNumber <= 56 THEN "Round of 16"
WHEN matchNumber > 56 AND matchNumber <= 60 THEN "Quarter-finals"
WHEN matchNumber > 60 AND matchNumber <= 62 THEN "Semi-finals"
WHEN matchNumber = 63 THEN "Third place"
ELSE "Final"
END
ELSE
csvLine.phase
END AS phase, matchNumber
MERGE (match:Match {id: csvLine.id})
SET match.h_score = csvLine.h_score,
match.a_score = csvLine.a_score,
match.attendance = csvLine.attendance,
match.date = csvLine.date,
match.description = csvLine.home + " vs. " + csvLine.away
MERGE (host:Country {name: csvLine.host})
MERGE (home:Country {name: csvLine.home})
MERGE (match)-[:HOME_TEAM]->(home)
MERGE (away:Country {name: csvLine.away})
MERGE (match)-[:AWAY_TEAM]->(away)
MERGE (year:Year {year: toInt(csvLine.year)})
MERGE (worldCup:WorldCup {name: csvLine.world_cup})
MERGE (match)<-[:CONTAINS_MATCH]-(worldCup)
MERGE (host)<-[:HOSTED_BY]-(worldCup)
MERGE (year)<-[:IN_YEAR]-(worldCup)
MERGE (stadium:Stadium {name: csvLine.stadium})
MERGE (match)-[:PLAYED_IN_STADIUM]->(stadium)
MERGE (p:Phase {name: phase})
MERGE (match)-[:IN_PHASE]->(p)
MERGE (mn:MatchNumber {value: matchNumber})
MERGE (match)-[:HAS_MATCH_NUMBER]->(mn)
MERGE (time:Time {time: csvLine.time})
MERGE (match)-[:PLAYED_AT_TIME]->(time)
RETURN match;We’ve got teams, matches, stadiums and hosts at the moment so let’s write some queries against the graph.
MATCH (stadium:Stadium)<-[:PLAYED_IN_STADIUM]-()<-[:CONTAINS_MATCH]-(wc)-[:HOSTED_BY]-(host),
(wc)-[:IN_YEAR]-(year)
WITH stadium, host, COUNT(*) as count, COLLECT(DISTINCT year.year) AS years
UNWIND years as year
WITH stadium, host, count, year
ORDER BY stadium.name, host.name, year
RETURN stadium.name, host.name, COLLECT(year) AS years, count
ORDER BY count DESC
LIMIT 5MATCH (host:Country)<-[:HOSTED_BY]-()-[:IN_YEAR]->(year)
WITH host, COUNT(*) AS times, COLLECT(year.year) AS years
UNWIND years AS year
WITH host, times, year
ORDER BY times DESC, year
RETURN host.name, times, COLLECT(year) AS years
ORDER BY times DESC