Svg-grabber.py

From OLPC
Revision as of 22:54, 5 December 2007 by Crazy-chris (talk | contribs) (download 800 free svg-graphics with this script)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search
  • So you would like to have a collection of free svg's?
  • Maybe like that one at freedesktop.org?
  • Then run this script, and get all 820 svg's in the correct directory structure!
#! /usr/bin/env python
# script started by crazy-chris

import os
import sys
import urllib2
	
print "  svg-grabber"
print "  ==========="
print "  source: webcvs.freedesktop.org (~ 820 svg's)"
print
print "  download all? (y/n)",

s = raw_input()
if (s == "n"):
	sys.exit(0)
	
svg_base = "svg-icons"

domain = "http://webcvs.freedesktop.org"
path = "/svg-icons/lila/gnome/scalable/"

url = "%s%s" % (domain, path)

history = []
paths = []

try: os.mkdir(svg_base)
except: pass

os.chdir(svg_base)

def check_url(url, lv= 0, dl=0):
	lv += 1
	if history.count(url) > 0:
		print "o already checked:", url
		lv -= 1	
		return dl
	
	history.append(url)

	print
	print "- reading url:", url
	c = urllib2.urlopen(url).read()
	
	arr = c.split(">")

	# Search for <a tags 
	for a in arr:
		if a.count('<a ') > 0:
			if a.count('href') > 0:
				s = a[a.index('href')+5:].strip()
				# Extract Link to string s
				if s[0] == '"':
					s = s[1:]
					s = s[:s.index('"')]
				elif s[0] == "'":
					s = s[1:]
					s = s[:s.index("'")]
					
				if s.count(".svg?view=log") > 0:
					svgpath = url.replace(domain, "")
					svgpath = svgpath.replace(path, "")
	
					if paths.count(svgpath) == 0:
						try: os.mkdir(svgpath)
						except: pass
						paths.append(svgpath)
					
					dl += 1
					
					orig = s.replace(".svg?view=log", ".svg?view=co")
					fn = s.replace(domain, "").replace(path, "").replace("?view=log", "")

					print "[%i] writing to: %s/%s" % (dl, svg_base, fn)

					svg = urllib2.urlopen("%s%s" % (domain, orig)).read()
					f = open(fn, 'w')
					f.write(svg)
 					
				else:
					# find subdirs
					if lv == 1 and s.count(path) > 0 and s[-1:] == "/":
						print "- possible path", s
						dl = check_url("%s%s" % (domain, s), lv + 1, dl)
	lv -= 1	
	return dl

dl = check_url(url)
print "\n* svg's downloaded: %i" % dl