Skip to content

Instantly share code, notes, and snippets.

@rinn7e
Created May 5, 2017 05:05
Show Gist options
  • Select an option

  • Save rinn7e/a3e6c94da6541b317db06f76ac8dacd3 to your computer and use it in GitHub Desktop.

Select an option

Save rinn7e/a3e6c94da6541b317db06f76ac8dacd3 to your computer and use it in GitHub Desktop.
module Calc exposing (..)
import Html exposing (..)
main =
div []
[ p [] [ text (toString (tf_idf 27 806791 18165)) ]
, p [] [ text (toString (tf_idf 4 806791 18165)) ]
, p [] [ text (toString (tf_idf 34 806791 18165)) ]
, p [] [ text "--" ]
, p [] [ text (toString (tf_idf 3 806791 6723)) ]
, p [] [ text (toString (tf_idf 33 806791 6723)) ]
, p [] [ text (toString (tf_idf 0 806791 6723)) ]
, p [] [ text "--" ]
, p [] [ text (toString (tf_idf 0 806791 19241)) ]
, p [] [ text (toString (tf_idf 33 806791 19241)) ]
, p [] [ text (toString (tf_idf 29 806791 19241)) ]
, p [] [ text "--" ]
, p [] [ text (toString (tf_idf 14 806791 25235)) ]
, p [] [ text (toString (tf_idf 0 806791 25235)) ]
, p [] [ text (toString (tf_idf 17 806791 25235)) ]
, h1 [] [ text "--LENGTH--" ]
, p [] [ text (toString (len 31.5 17.9 0 24)) ]
, p [] [ text (toString (len 16.4 41.7 32.6 0)) ]
, p [] [ text (toString (len 33.3 0 31.6 25.4)) ]
, h1 [] [ text "--Cosine--" ]
, p [] [ text (toString (cosine ( 2, 2 ) ( 0.5, 1.5 ))) ]
, p [] [ text (toString (cosine ( 2, 2 ) ( 6, 6 ))) ]
, p [] [ text (toString (cosine ( 2, 2 ) ( 12, 9 ))) ]
, h1 [] [ text "--Distance--" ]
, p [] [ text (toString (distance ( 2, 2 ) ( 0.5, 1.5 ))) ]
, p [] [ text (toString (distance ( 2, 2 ) ( 6, 6 ))) ]
, p [] [ text (toString (distance ( 2, 2 ) ( 12, 9 ))) ]
, h1 [] [ text "--Cosine 4--" ]
, p [] [ text (toString (cosine4 ( 1, 1, 1, 1 ) ( 12, 0, 3, 24 ))) ]
, p [] [ text (toString (cosine4 ( 1, 1, 1, 1 ) ( 10, 5, 20, 10 ))) ]
, p [] [ text (toString (cosine4 ( 1, 1, 1, 1 ) ( 0, 12, 9, 8 ))) ]
, h1 [] [ text "--Distance 4--" ]
, p [] [ text (toString (distance4 ( 1, 1, 1, 1 ) ( 12, 0, 3, 24 ))) ]
, p [] [ text (toString (distance4 ( 1, 1, 1, 1 ) ( 10, 5, 20, 10 ))) ]
, p [] [ text (toString (distance4 ( 1, 1, 1, 1 ) ( 0, 12, 9, 8 ))) ]
]
-- frequency of word in that doc, all doc, all doc that contain the keyword
tf_idf f nA nI =
(1 + logBase 2 f) * (logBase 2 (nA / nI))
len a1 a2 a3 a4 =
sqrt (a1 ^ 2 + a2 ^ 2 + a3 ^ 2 + a4 ^ 2)
distance ( x1, x2 ) ( a1, a2 ) =
sqrt ((x1 - a1) ^ 2 + (x2 - a2) ^ 2)
cosine ( x1, x2 ) ( a1, a2 ) =
(x1 * a1 + x2 * a2)
/ (sqrt (x1 ^ 2 + x2 ^ 2) * sqrt (a1 ^ 2 + a2 ^ 2))
distance4 ( x1, x2, x3, x4 ) ( a1, a2, a3, a4 ) =
sqrt ((x1 - a1) ^ 2 + (x2 - a2) ^ 2 + (x3 - a3) ^ 2 + (x4 - a4) ^ 2)
cosine4 ( x1, x2, x3, x4 ) ( a1, a2, a3, a4 ) =
(x1 * a1 + x2 * a2 + x3 * a3 + x4 * a4)
/ (sqrt (x1 ^ 2 + x2 ^ 2 + x3 ^ 2 + x4 ^ 2)
* sqrt (a1 ^ 2 + a2 ^ 2 + a3 ^ 2 + a4 ^ 2)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment