Topics/Material/Reading for Week 2

This week is based on Lesson 2 of the Udacity course CS101 "Introduction to Computer Science". It would be best if you sign up and watch these lectures. If you watch them, then you'll see the solution to problem 3 on HW #1 :)

We will cover:

Finish reading Chapter 1 of "Knights Programming". The remaining sections are Section 1.8 (useful for problem 2 on HW #1) and Section 1.9.

Read Chapter 2 of "Knights Programming" to learn about functions/procedures and if statements.

Read Section 3.1 of "Knights Programming" to learn about while loops.

IDLE editor

Goal of week 2: extract all URLs in a web page

To do this, we need to learn:

Extracting the first URL

start_link  = page.find('<a href=')
start_quote = page.find('"', start_link)
end_quote   = page.find('"', start_quote + 1)
url         = page[start_quote + 1 : end_quote]
print(url)

Extracting the first two URLs

# finding the first URL
start_link = page.find('<a href=')
start_quote = page.find('"', start_link)
end_quote = page.find('"', start_quote + 1)
url = page[start_quote + 1 : end_quote]
print(url)
# finding the second URL
page = page[end_quote:]
# same code as above 
start_link = page.find('<a href=')
start_quote = page.find('"', start_link)
end_quote = page.find('"', start_quote + 1)
url = page[start_quote + 1 : end_quote]
print(url)  

To avoid this repetition of code, we will use procedural abstraction.

Procedures

#
# input  +---------------+ output
# -----> |               | ----->
# -----> | Procedure     | -----> 
# -----> |               | ----->
#        +---------------+
#
# def <name>(<parameters>):
#     <block>  

Extracting URL with get_next_target

# turn this into a procedure
start_link = page.find('<a href=')
start_quote = page.find('"', start_link)
end_quote = page.find('"', start_quote + 1)
url = page[start_quote + 1 : end_quote]
# 
print(url) 
page = page[end_quote:]
# turn this into a procedure
start_link = page.find('<a href=')
start_quote = page.find('"', start_link)
end_quote = page.find('"', start_quote + 1)
url = page[start_quote + 1 : end_quote]
#
print(url)  

Extracting URL with get_next_target

What should the inputs be for the procedure get_next_target?

What should the outputs be for the procedure get_next_target?

Procedure get_next_target

def get_next_target(s):
    start_link = s.find('<a href=')
    start_quote = s.find('"', start_link)
    end_quote = s.find('"', start_quote + 1)
    url = s[start_quote + 1 : end_quote]
    return url, end_quote

Using the urllib package to get the HTML code of a web page

import urllib.request

response = urllib.request.urlopen('http://www.cs.ucf.edu/courses/cop3223/spr2014/section1/simple.html')
html = response.read().decode("utf-8") 
# PW added decode("utf-8") to convert 
# the buffer returned by response.read() 
# into a string
print(html)

You can find more information about the urllib package at http://docs.python.org/3/howto/urllib2.html

The above code snippet is a modification of http://docs.python.org/3/howto/urllib2.html#fetching-urls

Procedure get_html

import urllib.request

def get_html(url):
   response = urllib.request.urlopen(url)
   return response.read().decode("utf-8")

Putting everything together - firstTwoURLs.py

import urllib.request

def get_html(url):
   response = urllib.request.urlopen(url)
   return response.read.decode("utf-8")

def get_next_target(s):
    start_link = s.find('<a href=')
    start_quote = s.find('"', start_link)
    end_quote = s.find('"', start_quote + 1)
    url = s[start_quote + 1 : end_quote]
    return url, end_quote

def main():
    page = get_html('http://www.cs.ucf.edu/courses/cop3223/spr2014/section1/simple.html')
    first, end = get_next_target(page)
    page = page[end:]
    second, end = get_next_target(page)
    print("The first URL is", first)
    print("The second URL is", second)

Quiz

def inc(n):
    return n+1

What does the inc procedure defined above do?

Quiz

def sum(n):
    a = a + b

What does the inc procedure defined above do?

Quiz

def sum(n):
    a = a + b
    return a

What does the modified inc procedure defined above do? Keep different data types in mind!

Programming Quiz

# Define a procedure, square, that takes one number 
# as its input, and returns the square of that 
# number (result of multiplying
# the number by itself).

def square(a):
# fill in the missing code

Solution to Programming Quiz

# Define a procedure, square, that takes one number 
# as its input, and returns the square of that 
# number (result of multiplying
# the number by itself).

def square(a):
    a = a * a
    return a

Programming Quiz

# Define a procedure, sum3, that takes three numbers 
# as its input, and returns the sum of the three 
# input numbers.

def sum3(a,b,c):
# fill in the missing code

Solution to Programming Quiz

# Define a procedure, sum3, that takes three inputs 
# as its input, and returns the sum of the three 
# input numbers.

def sum3(a,b,c):
    return a+b+c

Programming Quiz

# Define a procedure, find_second, that takes
# two strings as its inputs: a search string
# and a target string. It should return a
# number that is the position of the second
# occurrence of the target string in the
# search string.

def find_second(search, target):
# fill in missing code

Solution to Programming Quiz

# Define a procedure, find_second, that takes
# two strings as its inputs: a search string
# and a target string. It should return a
# number that is the position of the second
# occurrence of the target string in the
# search string.

def find_second(search, target):
    first = search.find(target)
    second = search.find(target, first+1)
    return second

Making Decisions

# Python operators for comparison:
# ==, !=, <, > <=, >= 
#
# Syntax:
# <Number> <Operator> <Number>
#
# the output is a Boolean value: True / False

Making Decisions

print(2 < 3)
print(21 < 3)
print(7 * 3 < 21)
print(7 * 3 != 21)
print(7 * 3 == 21)

Note that the equality comparision is done using == instead of = because = means assignment.

If Statements

# if <TestExpression>:
#     <Block>

# returns the absolute value of a number
def absolute(x):
    if x < 0:
        x = -x
    return x

Programming Quiz

# Define a procedure, bigger, that takes in
# two numbers as inputs, and returns the
# greater of the two inputs.

def bigger(a,b):
# fill in missing code

Solution to Programming Quiz

# Define a procedure, bigger, that takes in
# two numbers as inputs, and returns the
# greater of the two inputs.

def bigger(a,b):
    if (a > b):
        return a
    return b

Or

print(False or False) # => False
print(False or True)  # => True
print(True or False)  # => True
print(True or True)   # => True

# this_is_an_error is an undefined variable
print(this_is_an_error) 
# => NameError: name 'this_is_an_error' is not defined

# but the statement below is OK
print(True or this_is_an_error) # => True
# if the first operand is True, then
# Python does not evaluate the second and
# outputs True

Programming Quiz

# Define a procedure, biggest, that takes 3 
# numbers as inputs, and outputs the greatest
# of the three numbers

def biggest(a,b,c):
# fill in missing code

Solution to Programming Quiz

# Define a procedure, biggest, that takes 3 
# numbers as inputs, and outputs the greatest
# of the three numbers

def biggest(a,b,c):
  if a > b:
      if a > c:
          return a
      else:         # c >= a > b
          return c
  else:             # b >= a
      if b > c:
          return b
      else:         # c >= b >= a
          return c

Solution to Programming Quiz

# Define a procedure, biggest, that takes 3 
# numbers as inputs, and outputs the greatest
# of the three numbers

# alternative solution

def bigger(a,b):
  if (a > b):
      return a
  return b

def biggest(a,b,c):
  return bigger(bigger(a,b),c)

While Loops

# if <TestExpression>:
#     <Block>             # executed 0 or 1 times

# while < TestExpression>:  # exectued 0, 1, 2, ... times
#     < Block >

While Loops

i=0
while i < 10:
  print(i)
  i = i + 1

While Loops - Quiz

i=0
while i != 10:
  i = i + 1
  print(i)

While Loops - Quiz

i=1
while i != 10:
  i = i + 2
  print(i)

While Loops - Programming Quiz

# Define a procedure, print_numbers, that takes
# as input a positive whole number, and prints 
# out all the whole numbers from 1 to the input
# number.

# Make sure your procedure prints "upwards", so
# from 1 up to the input number.

While Loops - Solution to Programming Quiz

# Define a procedure, print_numbers, that takes
# as input a positive whole number, and prints 
# out all the whole numbers from 1 to the input
# number.

def print_numbers(n):
  i = 1;
  while i <= n:
      print(i)
      i = i + 1

# You need to call the above procedure and pass to
# it an input. Otherwise "nothing would happen".

print_numbers(3) 

While Loops - Alternative Solution to Programming Quiz

# Define a procedure, print_numbers, that takes
# as input a positive whole number, and prints 
# out all the whole numbers from 1 to the input
# number.

def print_numbers(n):
  i = 0;
  while i < n:
      i = i + 1
      print(i)

print_numbers(3) 

Factorial - Programming Quiz

# Define a procedure, factorial, that
# takes one number as its input
# and returns the factorial of
# that number.

def factorial(n):
  result = 1
  i = 2
  while i<=n:
    result = result * i
    i = i + 1 
  return result

print(factorial(1))
print(factorial(2))
print(factorial(3))
print(factorial(4))

Break

# while <TestExpression>
#     <Code>
#     if <BreakTest>
#         break
#     <MoreCode>
# <AfterWhile>

print_numbers without break

def print_numbers(n):
  i = 1
  while i <= n:
      print(i)
      i = i + 1

print_numbers with break

def print_numbers(n):
  i = 1
  while True:
      if i > n:
          break
      print(i)
      i = i + 1

This example only illustrates the usage of break. This code is not as good as the previous code.

Procedure get_next_target

def get_next_target(s):
    start_link = s.find('<a href=')
    start_quote = s.find('"', start_link)
    end_quote = s.find('"', start_quote + 1)
    url = s[start_quote + 1 : end_quote]
    return url, end_quote

# call the function 
first, end = get_next_target(page)

# Multiple Assignment
# <Name1>, <Name2>, ... = <Expression1>, <Expression2>, ...

# <Name> = <Expression>

Multiple Assignment - Quiz

# What does the code below do?
# s, t = t, s

No Links

def get_next_target(s):
    start_link = s.find('<a href=')
    start_quote = s.find('"', start_link)
    end_quote = s.find('"', start_quote + 1)
    url = s[start_quote + 1 : end_quote]
    return url, end_quote

print(get_next_target('this is a <a href="www.ucf.edu">link</a>'))
print(get_next_target('Not good'))

Correct Handling of No Links

def get_next_target(s):
    start_link = s.find('<a href=')

    if start_link != -1:
        start_quote = s.find('"', start_link)
        end_quote = s.find('"', start_quote + 1)
        url = s[start_quote + 1 : end_quote]
        return url, end_quote
    else:
        return None, 0

print(get_next_target('this is a <a href="www.ucf.edu">link</a>'))
print(get_next_target('Not good'))

Print All Links

def get_next_target(s):
    start_link = s.find('<a href=')
    if start_link == -1:
        return None, 0
    start_quote = s.find('"', start_link)
    end_quote = s.find('"', start_quote + 1)
    url = s[start_quote + 1 : end_quote]
    return url, end_quote

def print_all_links(page):
  while True:
      url, endpos = get_next_target(page)
      if url:
          print url
          page = page[endpos:]
      else:
          break

Print All Links - Complete Program

import urllib.request
 
def get_html(url):
    response = urllib.request.urlopen(url)
    html = response.read().decode("utf-8")
    return html

def get_next_target(s):
    start_link = s.find('<a href=')
    if start_link == -1:
        return None, 0
    start_quote = s.find('"', start_link)
    end_quote = s.find('"', start_quote + 1)
    url = s[start_quote + 1 : end_quote]
    return url, end_quote

def print_all_links(page):
  while True:
      url, endpos = get_next_target(page)
      if url:
          print(url)
          page = page[endpos:]
      else:
          break

page = get_html('http://www.cs.ucf.edu/courses/cop3223/spr2014/section1/simple.html')
print_all_links(page)