From dab59d59dea58da75ba3c6684ccfe6dfaee78e14 Mon Sep 17 00:00:00 2001 From: Brian Quinion Date: Mon, 9 Sep 2013 14:10:09 +0100 Subject: [PATCH] replace '_' and ' ' when matching wikipedia article names --- wikidata/import.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/wikidata/import.sh b/wikidata/import.sh index c701b032..97621f17 100755 --- a/wikidata/import.sh +++ b/wikidata/import.sh @@ -20,5 +20,9 @@ $PSQL -c "alter table entity add column description_en text" $PSQL -c "update entity set description_en = description from entity_description where entity.entity_id = entity_description.entity_id and language = 'en'" cat totals.txt | $PSQL -c "COPY import_link_hit from STDIN WITH CSV DELIMITER ' '" -$PSQL -c "insert into link_hit select target||'wiki', catch_decode_url_part(value), sum(hits) from import_link_hit group by target||'wiki', catch_decode_url_part(value)" +$PSQL -c "truncate link_hit" +$PSQL -c "insert into link_hit select target||'wiki', replace(catch_decode_url_part(value), '_', ' '), sum(hits) from import_link_hit where replace(catch_decode_url_part(value), '_', ' ') is not null group by target||'wiki', replace(dcatch_decode_url_part(value), '_', ' ')" +$PSQL -c "truncate entity_link_hit" $PSQL -c "insert into entity_link_hit select entity_id, target, value, coalesce(hits,0) from entity_link left outer join link_hit using (target, value)" +$PSQL -c "create table entity_hit as select entity_id,sum(hits) as hits from entity_link_hit group by entity_id" +$PSQL -c "create unique index idx_entity_hit on entity_hit using btree (entity_id)" -- 2.39.5