Svg-grabber.py: Difference between revisions

From OLPC
Jump to navigation Jump to search
(download 800 free svg-graphics with this script)
 
No edit summary
 
Line 17: Line 17:
s = raw_input()
s = raw_input()
if (s == "n"):
if s == "n":
sys.exit(0)
sys.exit(0)
Line 37: Line 37:
def check_url(url, lv= 0, dl=0):
def check_url(url, lv= 0, dl=0):
lv += 1
lv += 1
if history.count(url) > 0:
if url in history:
print "o already checked:", url
print "o already checked:", url
lv -= 1
lv -= 1
Line 52: Line 52:
# Search for <a tags
# Search for <a tags
for a in arr:
for a in arr:
if a.count('<a ') > 0:
if '<a ' in a:
if a.count('href') > 0:
if 'href' in a:
s = a[a.index('href')+5:].strip()
s = a[a.index('href')+5:].strip()
# Extract Link to string s
# Extract Link to string s
Line 63: Line 63:
s = s[:s.index("'")]
s = s[:s.index("'")]
if s.count(".svg?view=log") > 0:
if ".svg?view=log" in s:
svgpath = url.replace(domain, "")
svgpath = url.replace(domain, "")
svgpath = svgpath.replace(path, "")
svgpath = svgpath.replace(path, "")
if paths.count(svgpath) == 0:
if svgpath not in paths:
try: os.mkdir(svgpath)
try: os.mkdir(svgpath)
except: pass
except: pass
Line 85: Line 85:
else:
else:
# find subdirs
# find subdirs
if lv == 1 and s.count(path) > 0 and s[-1:] == "/":
if lv == 1 and path in s and s[-1:] == "/":
print "- possible path", s
print "- possible path", s
dl = check_url("%s%s" % (domain, s), lv + 1, dl)
dl = check_url("%s%s" % (domain, s), lv + 1, dl)

Latest revision as of 21:36, 5 February 2010

  • So you would like to have a collection of free svg's?
  • Maybe like that one at freedesktop.org?
  • Then run this script, and get all 820 svg's in the correct directory structure!
#! /usr/bin/env python
# script started by crazy-chris

import os
import sys
import urllib2
	
print "  svg-grabber"
print "  ==========="
print "  source: webcvs.freedesktop.org (~ 820 svg's)"
print
print "  download all? (y/n)",

s = raw_input()
if s == "n":
	sys.exit(0)
	
svg_base = "svg-icons"

domain = "http://webcvs.freedesktop.org"
path = "/svg-icons/lila/gnome/scalable/"

url = "%s%s" % (domain, path)

history = []
paths = []

try: os.mkdir(svg_base)
except: pass

os.chdir(svg_base)

def check_url(url, lv= 0, dl=0):
	lv += 1
	if url in history:
		print "o already checked:", url
		lv -= 1	
		return dl
	
	history.append(url)

	print
	print "- reading url:", url
	c = urllib2.urlopen(url).read()
	
	arr = c.split(">")

	# Search for <a tags 
	for a in arr:
		if '<a ' in a:
			if 'href' in a:
				s = a[a.index('href')+5:].strip()
				# Extract Link to string s
				if s[0] == '"':
					s = s[1:]
					s = s[:s.index('"')]
				elif s[0] == "'":
					s = s[1:]
					s = s[:s.index("'")]
					
				if ".svg?view=log" in s:
					svgpath = url.replace(domain, "")
					svgpath = svgpath.replace(path, "")
	
					if svgpath not in paths:
						try: os.mkdir(svgpath)
						except: pass
						paths.append(svgpath)
					
					dl += 1
					
					orig = s.replace(".svg?view=log", ".svg?view=co")
					fn = s.replace(domain, "").replace(path, "").replace("?view=log", "")

					print "[%i] writing to: %s/%s" % (dl, svg_base, fn)

					svg = urllib2.urlopen("%s%s" % (domain, orig)).read()
					f = open(fn, 'w')
					f.write(svg)
 					
				else:
					# find subdirs
					if lv == 1 and path in s and s[-1:] == "/":
						print "- possible path", s
						dl = check_url("%s%s" % (domain, s), lv + 1, dl)
	lv -= 1	
	return dl

dl = check_url(url)
print "\n* svg's downloaded: %i" % dl