CREATE INDEX ON :Match(id);
CREATE INDEX ON :WorldCup(name);
CREATE INDEX ON :Stadium(name);
CREATE INDEX ON :Phase(phase);
CREATE INDEX ON :Country(name);
CREATE INDEX ON :Time(time);
CREATE INDEX ON :MatchNumber(value);
USING PERIODIC COMMIT 1000
LOAD CSV WITH HEADERS FROM "https://dl.dropboxusercontent.com/u/7619809/matches.csv" AS csvLine
WITH csvLine, toInt(csvLine.match_number) AS matchNumber
WITH csvLine,
CASE WHEN csvLine.phase = ""
THEN
CASE WHEN matchNumber <= 48 THEN "Group matches"
WHEN matchNumber > 48 AND matchNumber <= 56 THEN "Round of 16"
WHEN matchNumber > 56 AND matchNumber <= 60 THEN "Quarter-finals"
WHEN matchNumber > 60 AND matchNumber <= 62 THEN "Semi-finals"
WHEN matchNumber = 63 THEN "Third place"
ELSE "Final"
END
ELSE
csvLine.phase
END AS phase, matchNumber
MERGE (match:Match {id: csvLine.id})
SET match.h_score = csvLine.h_score,
match.a_score = csvLine.a_score,
match.attendance = csvLine.attendance,
match.date = csvLine.date,
match.description = csvLine.home + " vs. " + csvLine.away
MERGE (host:Country {name: csvLine.host})
MERGE (home:Country {name: csvLine.home})
MERGE (match)-[:HOME_TEAM]->(home)
MERGE (away:Country {name: csvLine.away})
MERGE (match)-[:AWAY_TEAM]->(away)
MERGE (year:Year {year: toInt(csvLine.year)})
MERGE (worldCup:WorldCup {name: csvLine.world_cup})
MERGE (match)<-[:CONTAINS_MATCH]-(worldCup)
MERGE (host)<-[:HOSTED_BY]-(worldCup)
MERGE (year)<-[:IN_YEAR]-(worldCup)
MERGE (stadium:Stadium {name: csvLine.stadium})
MERGE (match)-[:PLAYED_IN_STADIUM]->(stadium)
MERGE (p:Phase {name: phase})
MERGE (match)-[:IN_PHASE]->(p)
MERGE (mn:MatchNumber {value: matchNumber})
MERGE (match)-[:HAS_MATCH_NUMBER]->(mn)
MERGE (time:Time {time: csvLine.time})
MERGE (match)-[:PLAYED_AT_TIME]->(time)
RETURN count(*) as matches;The graph is too large to visualize nicely, that’s why here only a few matches from 2010 as example.
MATCH (stadium:Stadium)<-[:PLAYED_IN_STADIUM]-(match)<-[:CONTAINS_MATCH]-(worldCup)-[:HOSTED_BY]-(host),
(worldCup)-[:IN_YEAR]-(year:Year {year:2010})
RETURN *MATCH (stadium:Stadium)<-[:PLAYED_IN_STADIUM]-()<-[:CONTAINS_MATCH]-(worldCup:WorldCup)-[:HOSTED_BY]-(host:Country),
(worldCup)-[:IN_YEAR]-(year:Year)
RETURN worldCup.name,year.year,host.name,collect(distinct stadium.name)
ORDER BY year.year ASCMATCH (stadium:Stadium)<-[:PLAYED_IN_STADIUM]-()<-[:CONTAINS_MATCH]-(wc)-[:HOSTED_BY]-(host),
(wc)-[:IN_YEAR]-(year)
WITH stadium, host, COUNT(*) as count, COLLECT(DISTINCT year.year) AS years
UNWIND years as year
WITH stadium, host, count, year
ORDER BY stadium.name, host.name, year
RETURN stadium.name, host.name, COLLECT(year) AS years, count
ORDER BY count DESC
LIMIT 5MATCH (host:Country)<-[:HOSTED_BY]-()-[:IN_YEAR]->(year)
WITH host, COUNT(*) AS times, COLLECT(year.year) AS years
UNWIND years AS year
WITH host, times, year
ORDER BY times DESC, year
RETURN host.name, times, COLLECT(year) AS years
ORDER BY times DESC