require './find_final_one' require 'csv' imdb_csv = CSV.read("imdb.csv", :headers => true) rotten_tomatoes_csv = CSV.read("rotten_tomatoes.csv", :headers => true) g = [["imdb.Name", "rt.Name"],["imdb.YearRange", "rt.Year"],["imdb.ReleaseDate", "rt.ReleaseDate"],["imdb.Director", "rt.Director"],["imdb.Creator", "rt.Creator"], ["imdb.Cast", "rt.Cast"],["imdb.Duration", "rt.Duration"],["imdb.RatingValue", "rt.RatingValue"],["imdb.Genre", "rt.Genre"],["imdb.Description", "rt.Description"]] imdb = imdb_csv.to_a rt = rotten_tomatoes_csv.to_a def get_list(imdb, rt, g) i_row = imdb.count/imdb[0].count i_col = imdb[0].count r_row = rt.count/rt[0].count r_col = rt[0].count count = -1 temp = [] list = [] for j in 0..(i_col-1) do d = 0 for l in 0..(r_col-1) do sim_total = 0 for i in 1..(i_row-1) do str1 = imdb[i][j] length1 = str1.nil? ? 0 : str1.length sim_single = 0 st1_type = str1.class for k in 1..(r_row-1) do str2 = rt[k][l] length2 = str2.nil? ? 0 : str2.length distance = 0 st2_type = str2.class m = 0 while m < length1 and m < length2 do if str1[m] != str2[m] distance = distance + 1 end m = m + 1 end if length1 != length2 distance = distance + (length1-length2).abs end type = 0 if st1_type == st2_type type = 1 end similarity = 0 if distance <= length1/2 similarity = 1 end sim_single = sim_single + similarity + type end sim_total = sim_total + sim_single end if sim_total > d and sim_total >= i_row/2 list << ["imdb.#{imdb[0][j]}", "rt.#{rt[0][l]}"] d = sim_total end end end find_final_one(list, g) end p get_list(imdb, rt, g)