Topics/Material/Reading for Week 4

This week is based on Lessons 5 of the Udacity course CS101 "Introduction to Computer Science". It would be best if you sign up and watch these lectures.

We will cover:

Check out Subsection 5.5. "Dictionaries" of the Python Tutorial.

Also, check out Subsection 4.4 "Dictionaries" of the Programming Knights book. Here is the link to Chapter 4.

Strings / Lists / Dictionaries

String List Dictionary
sequence of characters list of elements set of <key, value> pairs
s='hello'
p=['alpha', 'beta']
d={'hydrogen' : 1, 'helium' : 2}
immutable mutable mutable
s[i]
p[i]
d[k]

Dictionaries

elements = {'hydrogen' : 1, 'helium' : 2, 'carbon' : 6}
elements['lithium']=3
elements['nitrogen']=8 # mistake
elements['nitrogen']=7
print(elements)
print(elements['hydrogen'])
print(elements['lithium'])
print('lithium' in elements)

Dictionaries

elements = {}
elements['H'] = {'name' : 'Hydrogen', 'number' : 1, 'weight' : 1.00794}
elements['He'] = {'name' : 'Helium', 'number' : 2, 'weight' : 4.002602, 'noble gas' : True}

Making our Search Engine Faster

We need to modify the following methods:

crawl_web
add_to_index
lookup

Modifying crawl_web

def crawl_web(seed):
    tocrawl = [seed]
    crawled = []
    index = []
    while tocrawl:
        url = tocrawl.pop()
        if url not in crawled:
            content = get_page(url)
            add_page_to_index(index, url, content)
            union(tocrawl, get_all_links(content))
            crawled.append(url)

    return index

The required modification is very simple.

def crawl_web(seed):
    # same code

    index = {}    # <= this is the only modification
    
    # same code

Modifying add_to_index

def add_to_index(index,keyword,url):
    for entry in index:
        if entry[0] == keyword:
            entry[1].append(url)
            return
    index.append([keyword,[url]])

The modified add_to_index method that use a dictionary to realize a fast index:

def add_to_index(index,keyword,url):
    if keyword in index:
        index[keyword].append(url)
    else:
        index[keyword] = [url]

Modifying lookup

def lookup(index,keyword):
    for entry in index:
        if entry[0] == keyword:
            return entry[1]
    return []

The modified look_up method that uses a dictionary to realize a fast index:

def lookup(index,keyword):
    if keyword in index:
        return index[keyword]
    return None

Modifying make_big_index

def make_big_index(size):
    index = []
    letters = ['a','a','a','a','a','a','a','a']
    while len(index) < size:
        word = make_string(letters)
        add_to_index(index, word, 'fake')
        for i in range(len(letters) - 1, 0, -1):
            if letters[i] < 'z':
                letters[i] = chr(ord(letters[i]) + 1)
                break
            else:
                letters[i] = 'a'
    return index

The required modification is very simple.

def make_big_index(size):
    index = {}  # <== This is the only modification

    # same code

Complete Code for Measuring Performance Index Based On Dictionaries - performance_index_dict.py

import time

def time_execution(code):
    start = time.clock()
    result = eval(code)
    run_time = time.clock() - start
    return result, run_time

def add_to_index(index,keyword,url):
    if keyword in index:
        index[keyword].append(url)
    else:
        index[keyword] = [url]

def lookup(index,keyword):
    if keyword in index:
        return index[keyword]
    return None

def make_string(p):
    s = ""
    for e in p:
        s = s + e
    return s

# building a fake index 
def make_big_index(size):
    index = {}
    letters = ['a','a','a','a','a','a','a','a']
    while len(index) < size:
        word = make_string(letters)
        add_to_index(index, word, 'fake')
        for i in range(len(letters) - 1, 0, -1):
            if letters[i] < 'z':
                letters[i] = chr(ord(letters[i]) + 1)
                break
            else:
                letters[i] = 'a'
    return index

index100k = make_big_index(100000)

# timing lookup

print(time_execution("lookup(index100k,'aaaaaaaa')"))
print(time_execution("lookup(index100k,'zzzzzzzz')"))

Complete Code for Measuring Performance of Index Based On Lists - performance_index_list.py

import time

def time_execution(code):
    start = time.clock()
    result = eval(code)
    run_time = time.clock() - start
    return result, run_time

def add_to_index(index,keyword,url):
    for entry in index:
        if entry[0] == keyword:
            entry[1].append(url)
            return
    index.append([keyword,[url]])

def lookup(index,keyword):
    for entry in index:
        if entry[0] == keyword:
            return entry[1]
    return []

def make_string(p):
    s = ""
    for e in p:
        s = s + e
    return s

# building a fake index 
def make_big_index(size):
    index = []
    letters = ['a','a','a','a','a','a','a','a']
    while len(index) < size:
        word = make_string(letters)
        add_to_index(index, word, 'fake')
        for i in range(len(letters) - 1, 0, -1):
            if letters[i] < 'z':
                letters[i] = chr(ord(letters[i]) + 1)
                break
            else:
                letters[i] = 'a'
    return index

index100k = make_big_index(100000)

print("first element of our fake index: ", index100k[0])
print("last element of our fake index: ", index100k[-1])

# timing lookup

print(time_execution("lookup(index10k,'aaaaaaaa')"))
print(time_execution("lookup(index10k,'zzzzzzzz')"))