Some fun with the Stanford GraphBase.
Fascinating conclusion, in this graph of 5 letter English words connecting two when they differ by a single letter, even though it only contains 5757/26^5 = 4.8 e-4 of the complete list of words there is a component of size 4493. (the second largest is size 24, and the maximum degree is 25).
import time
import copy
data = map (lambda x: x.strip(), open('sgb-words.txt').readlines())
print len (data)
def d (w1, w2):
count = 0
for idx in range(0,5):
if w1[idx] != w2[idx]:
count += 1
return count
print "creating graph"
t0 = time.clock ()
graph = [[a,b] for a in data for b in data if d(a,b) ==1 and a < b]
t1 = time.clock ()
print "took " + str (t1 - t0) + " seconds."
#t0 = time.clock ()
#graph2 = []
#for i in range (0, len(data)):
# for j in range(0,len(data)):
# if d(data[i],data[j]) == 1 and i < j:
# graph2.append ([i,j])
#t1 = time.clock ()
#print "took " + str (t1 - t0) + " seconds."
def deg ():
maxd = 0
maxw = ""
for idx in range (0,len(data)):
count = 0
if idx % 100 == 0:
print idx, maxd, maxw
for idx2 in range (0,len(data)):
if d(data[idx],data[idx2]) == 1:
count += 1
if count > maxd:
maxd = count
maxw = [data[idx]]
elif count == maxd:
maxw.append (data[idx])
return maxd, maxw
def nbd (w):
nbdv = []
for idx in range(0,len(data)):
if d(w,data[idx]) == 1:
nbdv.append(data[idx])
return nbdv
def gnbd (w, graph):
N = [x[1] for x in graph if x[0] == w]
N.extend ([ x[0] for x in graph if x[1] == w])
return N
def union (x,y):
ret_val = copy.copy(x)
ret_val.extend (y)
return ret_val
def unique (x):
ret_val = []
for idx in range(0,len(x)):
if not x[idx] in x[0:idx]:
ret_val.append (x[idx])
return ret_val
def collect (f, x, remlist):
if remlist == []:
return x
else:
return collect (f, f(x, remlist[0]), remlist [1:])
def collect2 (f, x, remlist):
ret_val = copy.copy(x)
for idx in range (0, len (remlist)):
ret_val.extend (remlist[idx])
return ret_val
def next (reached, graph):
N = unique (collect2 (union, [], map (lambda x: gnbd(x, graph), reached)))
return N
#print deg()
# (25, ['cores', 'bares'])
#print "Nbd of cores is:"
#print nbd('cores')
#print "Nbd of bares is:"
#print nbd('bares')
def sub (l1, l2):
ret_val = []
for idx in range(0,len(l1)):
if l1[idx] not in l2:
ret_val.append (l1[idx])
return ret_val
def component (graph, start):
reached = [start] # start with just the first node
new = [start]
t0 = time.clock ()
while True:
next_bd = next (new, graph)
new = sub (next_bd,reached)
reached = unique (union (reached, new))
if new == []:
print "DONE:", len (reached)
return reached
else:
print len (reached), time.clock() - t0
def components (graph, vertices):
rem = vertices
components = []
while rem != []:
print rem [0]
new_component = component (graph, rem[0])
rem = sub (rem, new_component)
components.append (new_component)
print "still to do: " + str (len(rem))
print "components: ", map (len, components)
return components
print components (graph, data)
Advertisement