Wikipédia:Le Bistro/Passage a l'UTF-8/Script de conversion

Identification

modifier

Voici un petit script d'identification. Il suffit de la lancer dans le directory de base des images; il affiche tous les noms des fichiers à convertir. -- Looxix 24 mar 2004 à 20:47 (CET)

#!/usr/bin/python

import os, sys
import codecs



decode = codecs.getdecoder('iso-8859-1')
encode = codecs.getencoder('utf-8')

def cvt_file(path):
    try:
	uni, lu = decode(path)
	new, l8 = encode(uni)
	##print '"%s" -> "%s"' % (path, new)
	##os.rename(path, new)
	if path != new:
	    print '%s' % (os.path.basename(path))
	    ##print 'Image:%s' % (os.path.basename(path))
    except:
	##print >> sys.stderr, "can't convert \"%s\"" % (path)
	return



def do_dir(dirname):
    lst = os.listdir(dirname)
    for f in lst:
	path = os.path.join(dirname, f)
	if os.path.islink(path):
	    print >> sys.stderr, '"%s" is a symlink' % (path)
	    continue
	if os.path.isdir(path):
	    do_dir(path)
	    continue
	if os.path.isfile(path):
	    cvt_file(path)
	    continue
	print >> sys.stderr, '"%s" is of unknown type' % (path)

do_dir('.')

Conversion

modifier

Voici un petit script pour la conversion des images (en serpent, bien sûr). Il faut le lancer dans le directory de base des images et il affiche les commandes pour renommer les fichiers d'images sans lui-même faire ce renommage. Je ne sais pas ce qui advient des anciennes version de ces images. -- Looxix 24 mar 2004 à 22:48 (CET)

#!/usr/bin/python

import os, sys
import codecs
import hashlib



decode = codecs.getdecoder('iso-8859-1')
encode = codecs.getencoder('utf-8')

cvt_lst = []

def calc_hash(str):
    return hashlib.md5(str).hexdigest()[:2]

def get_path(name, hash):
    return os.path.join('.', hash[0], hash[:2], name)

def cvt_file(path):
    try:
	old_name = os.path.basename(path)
	old_hash = calc_hash(old_name)
	##print "hash = %s" % (old_hash)
	uni_name, lu = decode(old_name)
	new_name, l8 = encode(uni_name)
	new_hash = calc_hash(new_name)

	## paranoïd check
	old_path = get_path(old_name, old_hash)
	if old_path != path:
	    print >> sys.stderr, '"%s" != "%s"' % (old_path, path)
	    ##return
	
	new_path = get_path(new_name, new_hash)

	##print '"%s" -> "%s"' % (old_path, new_path)
	cvt_lst.append((old_path, new_path))

    except:
	print >> sys.stderr, "can't convert \"%s\"" % (path)
	raise


def do_dir(dirname):
    lst = os.listdir(dirname)
    for f in lst:
	path = os.path.join(dirname, f)
	if os.path.islink(path):
	    print >> sys.stderr, '"%s" is a symlink' % (path)
	    continue
	if os.path.isdir(path):
	    do_dir(path)
	    continue
	if os.path.isfile(path):
	    cvt_file(path)
	    continue
	print >> sys.stderr, '"%s" is of unknown type' % (path)


do_dir('.')
for old, new in cvt_lst:
    print 'mv "%s" "%s"' % (old, new)