Wikipédia:Le Bistro/Passage a l'UTF-8/Script de conversion
Identification
modifierVoici un petit script d'identification. Il suffit de la lancer dans le directory de base des images; il affiche tous les noms des fichiers à convertir. -- Looxix 24 mar 2004 à 20:47 (CET)
#!/usr/bin/python
import os, sys
import codecs
decode = codecs.getdecoder('iso-8859-1')
encode = codecs.getencoder('utf-8')
def cvt_file(path):
try:
uni, lu = decode(path)
new, l8 = encode(uni)
##print '"%s" -> "%s"' % (path, new)
##os.rename(path, new)
if path != new:
print '%s' % (os.path.basename(path))
##print 'Image:%s' % (os.path.basename(path))
except:
##print >> sys.stderr, "can't convert \"%s\"" % (path)
return
def do_dir(dirname):
lst = os.listdir(dirname)
for f in lst:
path = os.path.join(dirname, f)
if os.path.islink(path):
print >> sys.stderr, '"%s" is a symlink' % (path)
continue
if os.path.isdir(path):
do_dir(path)
continue
if os.path.isfile(path):
cvt_file(path)
continue
print >> sys.stderr, '"%s" is of unknown type' % (path)
do_dir('.')
Conversion
modifierVoici un petit script pour la conversion des images (en serpent, bien sûr). Il faut le lancer dans le directory de base des images et il affiche les commandes pour renommer les fichiers d'images sans lui-même faire ce renommage. Je ne sais pas ce qui advient des anciennes version de ces images. -- Looxix 24 mar 2004 à 22:48 (CET)
#!/usr/bin/python
import os, sys
import codecs
import hashlib
decode = codecs.getdecoder('iso-8859-1')
encode = codecs.getencoder('utf-8')
cvt_lst = []
def calc_hash(str):
return hashlib.md5(str).hexdigest()[:2]
def get_path(name, hash):
return os.path.join('.', hash[0], hash[:2], name)
def cvt_file(path):
try:
old_name = os.path.basename(path)
old_hash = calc_hash(old_name)
##print "hash = %s" % (old_hash)
uni_name, lu = decode(old_name)
new_name, l8 = encode(uni_name)
new_hash = calc_hash(new_name)
## paranoïd check
old_path = get_path(old_name, old_hash)
if old_path != path:
print >> sys.stderr, '"%s" != "%s"' % (old_path, path)
##return
new_path = get_path(new_name, new_hash)
##print '"%s" -> "%s"' % (old_path, new_path)
cvt_lst.append((old_path, new_path))
except:
print >> sys.stderr, "can't convert \"%s\"" % (path)
raise
def do_dir(dirname):
lst = os.listdir(dirname)
for f in lst:
path = os.path.join(dirname, f)
if os.path.islink(path):
print >> sys.stderr, '"%s" is a symlink' % (path)
continue
if os.path.isdir(path):
do_dir(path)
continue
if os.path.isfile(path):
cvt_file(path)
continue
print >> sys.stderr, '"%s" is of unknown type' % (path)
do_dir('.')
for old, new in cvt_lst:
print 'mv "%s" "%s"' % (old, new)