Blaginations

An additional opportunity to be hopelessly wrong

Some fun with the Stanford GraphBase.

leave a comment »

Fascinating conclusion, in this graph of 5 letter English words connecting two when they differ by a single letter, even though it only contains 5757/26^5 = 4.8 e-4 of the complete list of words there is a component of size 4493. (the second largest is size 24, and the maximum degree is 25).

import time
import copy

data = map (lambda x: x.strip(), open('sgb-words.txt').readlines())
print len (data)

def d (w1, w2):
    count = 0
    for idx in range(0,5):
        if w1[idx] != w2[idx]:
            count += 1
    return count

print "creating graph"
t0 = time.clock ()
graph = [[a,b] for a in data for b in data if d(a,b) ==1 and a < b]
t1 = time.clock ()
print "took " + str (t1 - t0) + " seconds."

#t0 = time.clock ()
#graph2 = []
#for i in range (0, len(data)):
#    for j in range(0,len(data)):
#        if d(data[i],data[j]) == 1 and i < j:
#            graph2.append ([i,j])
#t1 = time.clock ()
#print "took " + str (t1 - t0) + " seconds."

def deg ():
    maxd = 0
    maxw = ""
    for idx in range (0,len(data)):
        count = 0
        if idx % 100 == 0:
            print idx, maxd, maxw
        for idx2 in range (0,len(data)):
            if d(data[idx],data[idx2]) == 1:
                count += 1
        if count > maxd:
            maxd = count
            maxw = [data[idx]]
        elif count == maxd:
            maxw.append (data[idx])
    return maxd, maxw

def nbd (w):
    nbdv = []
    for idx in range(0,len(data)):
        if d(w,data[idx]) == 1:
            nbdv.append(data[idx])
    return nbdv

def gnbd (w, graph):
    N = [x[1] for x in graph if x[0] == w]
    N.extend ([ x[0] for x in graph if x[1] == w])
    return N

def union (x,y):
    ret_val = copy.copy(x)
    ret_val.extend (y)
    return ret_val

def unique (x):
    ret_val = []
    for idx in range(0,len(x)):
        if not x[idx] in x[0:idx]:
            ret_val.append (x[idx])
    return ret_val

def collect (f, x, remlist):
    if remlist == []:
        return x
    else:
        return collect (f, f(x, remlist[0]), remlist [1:])

def collect2 (f, x, remlist):
    ret_val = copy.copy(x)
    for idx in range (0, len (remlist)):
        ret_val.extend (remlist[idx])
    return ret_val

def next (reached, graph):
    N = unique (collect2 (union, [], map (lambda x: gnbd(x, graph), reached)))
    return N

#print deg()
# (25, ['cores', 'bares'])
#print "Nbd of cores is:"
#print nbd('cores')
#print "Nbd of bares is:"
#print nbd('bares')

def sub (l1, l2):
    ret_val = []
    for idx in range(0,len(l1)):
        if l1[idx] not in l2:
            ret_val.append (l1[idx])
    return ret_val

def component (graph, start):
    reached = [start]  # start with just the first node
    new = [start]
    t0 = time.clock ()
    while True:
        next_bd = next (new, graph)
        new = sub (next_bd,reached)
        reached = unique (union (reached, new))
        if new == []:
            print "DONE:", len (reached)
            return reached
        else:
            print len (reached), time.clock() - t0

def components (graph, vertices):
    rem = vertices
    components = []
    while rem != []:
        print rem [0]
        new_component = component (graph, rem[0])
        rem = sub (rem, new_component)
        components.append (new_component)
        print "still to do: " + str (len(rem))
        print "components: ", map (len, components)
    return components

print components (graph, data)

Advertisement

Written by kasterma

September 1, 2011 at 7:30 pm

Posted in Uncategorized

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Connecting to %s

Follow

Get every new post delivered to your Inbox.