From a8608e2b796fd4f7885baddea391add5e308389a Mon Sep 17 00:00:00 2001 From: Brian Quinion Date: Tue, 19 Mar 2013 19:44:33 +0000 Subject: [PATCH] calculate the importance value --- utils/import_wikipedia.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/import_wikipedia.sh b/utils/import_wikipedia.sh index d40578dd..0a15e2d0 100755 --- a/utils/import_wikipedia.sh +++ b/utils/import_wikipedia.sh @@ -39,6 +39,8 @@ do echo "insert into wikipedia_article select '${i}', title, count, othercount, count+othercount from ${i}pagelinkcount;" | $psqlcmd done +echo "update wikipedia_article set importance = log(totalcount)/log((select max(totalcount) from wikipedia_article))" | $psqlcmd + # precalculated lat,lon from dbpedia wget http://downloads.dbpedia.org/current/en/geo_coordinates_en.nq.bz2 bzip2 -dc geo_coordinates_en.nq.bz2 | grep http://www.georss.org/georss/point | sed 's|]*> * "\(-\?[-0-9.E]\+\) \(-\?[-0-9.E]\+\)"@en .|update pagelinks set lat=\1, lon=\2 where language = '"'"'\3'"'"' and title = decode_url_part('"'"'\4'"'"');|g' | $psqlcmd -- 2.39.5