Svg-grabber.py

From OLPC
Revision as of 17:36, 5 February 2010 by 164.67.235.79 (talk)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to: navigation, search
  • So you would like to have a collection of free svg's?
  • Maybe like that one at freedesktop.org?
  • Then run this script, and get all 820 svg's in the correct directory structure!
#! /usr/bin/env python
# script started by crazy-chris

import os
import sys
import urllib2
	
print "  svg-grabber"
print "  ==========="
print "  source: webcvs.freedesktop.org (~ 820 svg's)"
print
print "  download all? (y/n)",

s = raw_input()
if s == "n":
	sys.exit(0)
	
svg_base = "svg-icons"

domain = "http://webcvs.freedesktop.org"
path = "/svg-icons/lila/gnome/scalable/"

url = "%s%s" % (domain, path)

history = []
paths = []

try: os.mkdir(svg_base)
except: pass

os.chdir(svg_base)

def check_url(url, lv= 0, dl=0):
	lv += 1
	if url in history:
		print "o already checked:", url
		lv -= 1	
		return dl
	
	history.append(url)

	print
	print "- reading url:", url
	c = urllib2.urlopen(url).read()
	
	arr = c.split(">")

	# Search for <a tags 
	for a in arr:
		if '<a ' in a:
			if 'href' in a:
				s = a[a.index('href')+5:].strip()
				# Extract Link to string s
				if s[0] == '"':
					s = s[1:]
					s = s[:s.index('"')]
				elif s[0] == "'":
					s = s[1:]
					s = s[:s.index("'")]
					
				if ".svg?view=log" in s:
					svgpath = url.replace(domain, "")
					svgpath = svgpath.replace(path, "")
	
					if svgpath not in paths:
						try: os.mkdir(svgpath)
						except: pass
						paths.append(svgpath)
					
					dl += 1
					
					orig = s.replace(".svg?view=log", ".svg?view=co")
					fn = s.replace(domain, "").replace(path, "").replace("?view=log", "")

					print "[%i] writing to: %s/%s" % (dl, svg_base, fn)

					svg = urllib2.urlopen("%s%s" % (domain, orig)).read()
					f = open(fn, 'w')
					f.write(svg)
 					
				else:
					# find subdirs
					if lv == 1 and path in s and s[-1:] == "/":
						print "- possible path", s
						dl = check_url("%s%s" % (domain, s), lv + 1, dl)
	lv -= 1	
	return dl

dl = check_url(url)
print "\n* svg's downloaded: %i" % dl