I need help to understand the best partice to remove illegal char from file and path name.
Windowz (FAT32, NTFS): Any Unicode except NUL, \, /, :, *, ", <, >, |
I found my way
Windowz (FAT32, NTFS): Any Unicode except NUL, \, /, :, *, ", <, >, |
# import libraries
import urllib2
from bs4 import BeautifulSoup
import urlparse, os
# get name movie
spage = 'http://www.imdb.com/title/tt2527336/?ref_=rlm'
page = urllib2.urlopen(spage)
soup = BeautifulSoup(page, 'html.parser')
movie = soup.find('h1', attrs={'itemprop':'name'})
title = movie.get_text(strip=True)
print title
# get cover movie
cover = soup.find(attrs={"class" : "poster"})
cover_url = (cover.find('img'))['src']
img = urllib2.urlopen(cover_url)
a = urlparse.urlparse(cover_url)
a.path
imgn = os.path.basename(a.path)
localFile = open(imgn, 'wb')
localFile.write(img.read())
localFile.close()
#get ext
ext = os.path.splitext(imgn)[1]
#get path & file name
x = os.path.split(os.path.abspath(imgn))
print x[0]
print x[1]
#rename file
old_file = os.path.join(x[0], x[1])
new_file = os.path.join(x[0], title+ext)
print old_file
print new_file
os.rename(old_file,new_file)#???????
os.remove(old_file)I found my way
# import libraries
import urllib2
from bs4 import BeautifulSoup
import urlparse, os
# get name movie
spage = 'http://www.imdb.com/title/tt2527336/?ref_=rlm'
page = urllib2.urlopen(spage)
soup = BeautifulSoup(page, 'html.parser')
movie = soup.find('h1', attrs={'itemprop':'name'})
title = movie.get_text(strip=True)
print title
#clean string loop
ctitle = title
illegal = ['NUL','\',''//',':','*','"','<','>','|']
for i in illegal:
ctitle = ctitle.replace(i, '')
print(ctitle)
# get cover movie
cover = soup.find(attrs={"class" : "poster"})
cover_url = (cover.find('img'))['src']
img = urllib2.urlopen(cover_url)
a = urlparse.urlparse(cover_url)
a.path
imgn = os.path.basename(a.path)
localFile = open(imgn, 'wb')
localFile.write(img.read())
localFile.close()
#get ext
ext = os.path.splitext(imgn)[1]
#get path & file name
x = os.path.split(os.path.abspath(imgn))
print x[0]
print x[1]
#rename file
old_file = os.path.join(x[0], x[1])
new_file = os.path.join(x[0], ctitle+ext)
print old_file
print new_file
os.rename(old_file,new_file)
os.remove(old_file)
