Svg-grabber.py: Difference between revisions
Jump to navigation
Jump to search
Crazy-chris (talk | contribs) (download 800 free svg-graphics with this script) |
No edit summary |
||
Line 17: | Line 17: | ||
s = raw_input() |
s = raw_input() |
||
if |
if s == "n": |
||
sys.exit(0) |
sys.exit(0) |
||
Line 37: | Line 37: | ||
def check_url(url, lv= 0, dl=0): |
def check_url(url, lv= 0, dl=0): |
||
lv += 1 |
lv += 1 |
||
if |
if url in history: |
||
print "o already checked:", url |
print "o already checked:", url |
||
lv -= 1 |
lv -= 1 |
||
Line 52: | Line 52: | ||
# Search for <a tags |
# Search for <a tags |
||
for a in arr: |
for a in arr: |
||
if |
if '<a ' in a: |
||
if |
if 'href' in a: |
||
s = a[a.index('href')+5:].strip() |
s = a[a.index('href')+5:].strip() |
||
# Extract Link to string s |
# Extract Link to string s |
||
Line 63: | Line 63: | ||
s = s[:s.index("'")] |
s = s[:s.index("'")] |
||
if |
if ".svg?view=log" in s: |
||
svgpath = url.replace(domain, "") |
svgpath = url.replace(domain, "") |
||
svgpath = svgpath.replace(path, "") |
svgpath = svgpath.replace(path, "") |
||
if |
if svgpath not in paths: |
||
try: os.mkdir(svgpath) |
try: os.mkdir(svgpath) |
||
except: pass |
except: pass |
||
Line 85: | Line 85: | ||
else: |
else: |
||
# find subdirs |
# find subdirs |
||
if lv == 1 and |
if lv == 1 and path in s and s[-1:] == "/": |
||
print "- possible path", s |
print "- possible path", s |
||
dl = check_url("%s%s" % (domain, s), lv + 1, dl) |
dl = check_url("%s%s" % (domain, s), lv + 1, dl) |
Latest revision as of 21:36, 5 February 2010
- So you would like to have a collection of free svg's?
- Maybe like that one at freedesktop.org?
- Then run this script, and get all 820 svg's in the correct directory structure!
#! /usr/bin/env python # script started by crazy-chris import os import sys import urllib2 print " svg-grabber" print " ===========" print " source: webcvs.freedesktop.org (~ 820 svg's)" print print " download all? (y/n)", s = raw_input() if s == "n": sys.exit(0) svg_base = "svg-icons" domain = "http://webcvs.freedesktop.org" path = "/svg-icons/lila/gnome/scalable/" url = "%s%s" % (domain, path) history = [] paths = [] try: os.mkdir(svg_base) except: pass os.chdir(svg_base) def check_url(url, lv= 0, dl=0): lv += 1 if url in history: print "o already checked:", url lv -= 1 return dl history.append(url) print print "- reading url:", url c = urllib2.urlopen(url).read() arr = c.split(">") # Search for <a tags for a in arr: if '<a ' in a: if 'href' in a: s = a[a.index('href')+5:].strip() # Extract Link to string s if s[0] == '"': s = s[1:] s = s[:s.index('"')] elif s[0] == "'": s = s[1:] s = s[:s.index("'")] if ".svg?view=log" in s: svgpath = url.replace(domain, "") svgpath = svgpath.replace(path, "") if svgpath not in paths: try: os.mkdir(svgpath) except: pass paths.append(svgpath) dl += 1 orig = s.replace(".svg?view=log", ".svg?view=co") fn = s.replace(domain, "").replace(path, "").replace("?view=log", "") print "[%i] writing to: %s/%s" % (dl, svg_base, fn) svg = urllib2.urlopen("%s%s" % (domain, orig)).read() f = open(fn, 'w') f.write(svg) else: # find subdirs if lv == 1 and path in s and s[-1:] == "/": print "- possible path", s dl = check_url("%s%s" % (domain, s), lv + 1, dl) lv -= 1 return dl dl = check_url(url) print "\n* svg's downloaded: %i" % dl