Svg-grabber.py

From OLPC
Revision as of 21:36, 5 February 2010 by 164.67.235.79 (talk)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
  • So you would like to have a collection of free svg's?
  • Maybe like that one at freedesktop.org?
  • Then run this script, and get all 820 svg's in the correct directory structure!
#! /usr/bin/env python
# script started by crazy-chris

import os
import sys
import urllib2
	
print "  svg-grabber"
print "  ==========="
print "  source: webcvs.freedesktop.org (~ 820 svg's)"
print
print "  download all? (y/n)",

s = raw_input()
if s == "n":
	sys.exit(0)
	
svg_base = "svg-icons"

domain = "http://webcvs.freedesktop.org"
path = "/svg-icons/lila/gnome/scalable/"

url = "%s%s" % (domain, path)

history = []
paths = []

try: os.mkdir(svg_base)
except: pass

os.chdir(svg_base)

def check_url(url, lv= 0, dl=0):
	lv += 1
	if url in history:
		print "o already checked:", url
		lv -= 1	
		return dl
	
	history.append(url)

	print
	print "- reading url:", url
	c = urllib2.urlopen(url).read()
	
	arr = c.split(">")

	# Search for <a tags 
	for a in arr:
		if '<a ' in a:
			if 'href' in a:
				s = a[a.index('href')+5:].strip()
				# Extract Link to string s
				if s[0] == '"':
					s = s[1:]
					s = s[:s.index('"')]
				elif s[0] == "'":
					s = s[1:]
					s = s[:s.index("'")]
					
				if ".svg?view=log" in s:
					svgpath = url.replace(domain, "")
					svgpath = svgpath.replace(path, "")
	
					if svgpath not in paths:
						try: os.mkdir(svgpath)
						except: pass
						paths.append(svgpath)
					
					dl += 1
					
					orig = s.replace(".svg?view=log", ".svg?view=co")
					fn = s.replace(domain, "").replace(path, "").replace("?view=log", "")

					print "[%i] writing to: %s/%s" % (dl, svg_base, fn)

					svg = urllib2.urlopen("%s%s" % (domain, orig)).read()
					f = open(fn, 'w')
					f.write(svg)
 					
				else:
					# find subdirs
					if lv == 1 and path in s and s[-1:] == "/":
						print "- possible path", s
						dl = check_url("%s%s" % (domain, s), lv + 1, dl)
	lv -= 1	
	return dl

dl = check_url(url)
print "\n* svg's downloaded: %i" % dl