Usage
It s a solution for my specific case, but maybe with some modifications can help another people.
Usage
SELECT fix_wrong_encoding( LATIN1 );
Function
-- Convert words with wrong encoding
CREATE OR REPLACE FUNCTION fix_wrong_encoding(encoding_name VARCHAR)
RETURNS VOID
AS $$
DECLARE
r RECORD;
counter INTEGER;
token_id INTEGER;
BEGIN
counter = 0;
FOR r IN SELECT t.id, t.text FROM token t
LOOP
BEGIN
RAISE NOTICE Converting % , r.text;
r.text := convert_from(convert_to(r.text,encoding_name), UTF8 );
RAISE NOTICE Converted to % , r.text;
RAISE NOTICE Checking existence. ;
SELECT id INTO token_id FROM token WHERE text = r.text;
IF (token_id IS NOT NULL) THEN
BEGIN
RAISE NOTICE Token already exists. Updating ids in textblockhastoken ;
IF(token_id = r.id) THEN
RAISE NOTICE Token is the same. ;
CONTINUE;
END IF;
UPDATE textblockhastoken SET tokenid = token_id
WHERE tokenid = r.id;
RAISE NOTICE Removing current token. ;
DELETE FROM token WHERE id = r.id;
END;
ELSE
BEGIN
RAISE NOTICE Token don t exists. Updating text in token ;
UPDATE token SET text = r.text WHERE id = r.id;
END;
END IF;
EXCEPTION WHEN untranslatable_character THEN
--do nothing
WHEN character_not_in_repertoire THEN
--do nothing
END;
counter = counter + 1;
RAISE NOTICE % token converted , counter;
END LOOP;
END
$$
LANGUAGE plpgsql;