from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
x = 10
type(x)
int
Type Casting
float(20)
float('20')
int('20')
int(20.2)
str(201)
20.0
20.0
20
20
'201'
x = [3,2,1,4,5]
x
type(x)
[3, 2, 1, 4, 5]
list
import numpy as np
np.mean(x)
3.0
#### Missing values ####?
# Functions to identify missing values?
x = [3,2,1,4,5.2,'apple']
[isinstance(i, int) for i in x]
[isinstance(i, float) for i in x]
[isinstance(i, str) for i in x]
[True, True, True, True, False, False]
[False, False, False, False, True, False]
[False, False, False, False, False, True]
if False:
y = 10
y.is_integer() # True
y = 10.0
y.is_integer() # True
y = 10.2
y.is_integer() # False
x = True
print(x)
type(x)
type(x) == bool
True
bool
True
x = False
print(x)
type(x)
False
bool
xx = [True, False, True]
xx
sum(xx)
sum([False, False, False])
[True, False, True]
2
0
xx[0]
type(xx)
[type(a) for a in xx]
True
list
[bool, bool, bool]
x = "apple"
type(x)
len(x)
x[0]
str
5
'a'
"apple".upper()
"APPLE".lower()
"APPLE A day doctoR away".title()
'APPLE'
'apple'
'Apple A Day Doctor Away'
x = ["banana","apple","carrot"]
type(x)
len(x)
x[0]
type(x) == list # True
type(x) is list # True
list
3
'banana'
True
True
x = ["banana","apple","carrot"]
set(x)
list(set(x)) # order changed
{'apple', 'banana', 'carrot'}
['carrot', 'apple', 'banana']
x = ["banana","apple","carrot"]
max(x)
min(x)
len(x)
'carrot'
'apple'
3
sorted(x) # returns a value
x.sort() # in place
x
['apple', 'banana', 'carrot']
['apple', 'banana', 'carrot']
"apple" + "banana"
" ".join(["apple", "banana"])
'applebanana'
'apple banana'
l1 = ["apple","banana"]
l2 = ["carrot","dragonfruit"]
[a + b for a, b in zip(l1, l2)]
['applecarrot', 'bananadragonfruit']
"APPLE A day doctoR away".replace("apple","--")
"apple a day doctoR away".replace("apple","--")
"apple a day doctoR away apple".replace("apple","--")
'APPLE A day doctoR away'
'-- a day doctoR away'
'-- a day doctoR away --'
# replace only the first n instances?
# string extract all or some instances?
l = "abcdefghijklmnopqrstuvwxyz"
l[2:2]
l[2:3]
l[2:10]
l[2:10:2]
''
'c'
'cdefghij'
'cegi'
many_strings = ["north carolina","south carolina","new hampshire","north dakota"]
"north carolina".find("carolina")
"new hampshire".find("carolina") # -1 means not found
[x.find("carolina") for x in many_strings] # return position of match
[x for x in many_strings if x.find("carolina")>0] # return value that matches
[pos for pos, x in enumerate("hawaii") if x=="i"]
[pos for pos, x in enumerate("mississippi") if x=="s"]
"welcome to hotel california".split(" ")
# puzzle: identify strings with alternating letter?
['welcome', 'to', 'hotel', 'california']
import re
txt = "The rain in Spain"
re.findall("ai", txt)
['ai', 'ai']
s = list("an apple")
s[0:0] = list("a banana and ")
"".join(s)
'a banana and an apple'
l = ["apple","banana","carrot"]
l.append("gauva")
l
Accessing
l[-1] # last element
l[-2] # second last
l = list("abcdefghij")
l[0:4]
l[0:4:1]
l[0:4:2]
l[0:5:2]
l[0:5][::-1]
l[0:-2] # 0 through last 2
l[0:0]
l[:4] # same as l[None:4]
l[0:4] # is same as
l[0:-6] # treat -6 as length(x)-6
l[::-1]
l[0:10:-1] # fails
l[10:0:-1] # works
# 10, 10-1, 10-1-1, ...,
# if positive step used, start < stop
# if negative step used, start > stop
l[0:7][::-1] # most convenient way of getting in reverse
l[0:2] = ["aa","bb"]
l
l[0:2] = ["a","b"]
l
l.reverse() # reverses in place, not recommended
l
a = ["dog"]
l + a
l
a
l.extend(a) # a is a list, not a single string
l
a
l.extend(a)
l
Index of first occurence
l.index("dog")
l
# index of all occurences, always returns a list given its a list comprehension
[pos for pos, elem in enumerate(l) if elem=="dog"]
# or
[pos for pos in range(len(l)) if l[pos]=="dog"]
# or itertools?
l = ['apple', 'banana', 'carrot', 'gauva', 'dog', 'dog']
l.pop(1) # remove element by an index
l
for pos in [1,2]:
l.pop(pos)
l
Repeat list elements
# repeat lists
[11] * 3
[11, "apple"] * 3
[11, "apple"] * 3
# repeat each element of a list 3 times
[item for item in [11,"apple"] for n in range(3)]
l1 = [11,12,13]
l1[0] = [111, 112 ,113]
l1 # list of lists
l1 = [11,12,13]
l1[0:1] = [111, 112 ,113]
l1 # list of lists
# all elements except one
l1 = list("abcdefghij")
l1
l1[-1] # this gives the last one, not except one
[elem for pos, elem in enumerate(l1) if pos not in [2-1,4-1]] # works
# index by boolean - Not allowed
l1 = list("abcdefghij")
# l1[[2,3,4]] # fails, cannot index multiple elements
indices = [pos for pos, elem in enumerate([True, False] * 5) if elem==True]
[l1[index] for index in indices]
Filter a list
l = list(range(11,20))
l
list(filter(lambda x: x>15, l))
[x for x in l if x>15]
# what is map()
l = [1,2,3,4,5]
list(map(lambda x: x+100, l))
# what is reduce()
from functools import reduce
reduce(lambda x, y: x+y, [1,2,3,4,5,6])
list(range(11,20))
list(range(11,20,2))
# 11:15, not allowed
[x for x in [11,12,13,14,15] if x == 13]
[pos for pos, elem in enumerate([11,12,13,14,15]) if elem%2==1] # same as which(x%%2==1)
"y" in list("python")
"l" in list("python")
# recycle a list to a length
l1 = [1,2,3]
desired_length = 10
q, r = divmod(desired_length, len(l1))
l1*q + l1[:r]
# sorted - returns a value, not in place modification
l1=[1,2,3,4]
l2=[10,10,20,20]
[x+y for x,y in zip(l1,l2)]# does not recycle, but trims the shorter list
l1=[1,2,3,4]
l2=[10,20]
[x+y for x,y in zip(l1,l2)] # does not recycle, but trims the shorter list
list(zip(l1,l2))
# order of elements? Incomplete
x = [11,13,14,15,12]
sorted(x).index(11)
sorted(x).index(13)
sorted(x).index(14)
sorted(x).index(15)
sorted(x).index(12)
[sorted(x).index(i) for i in x]
Zipping
l_a = ["a","b","c"]
l_b = ["x","y","z"]
zip(l_a, l_b) # type is zip
l_ab_zip_list = list(zip(l_a, l_b)) # list of tuples
# recycyles? Nope, plainly chops to shorter length
list(zip(["a","b","c"], ["x","y"]))
list(zip(["a","b"], ["x","y","z"]))
# unpacking
a = l_ab_zip_list[0]
a # a is a tuple
a, b = l_ab_zip_list[0]
# a is a string, b is a string
10,20 # always a tuple, even without brackets
(10,20)
x=10; y=20;
(x,y)
x,y
x = 10,20
x
for element in l_ab_zip_list:
print("***")
print(element)
for idx, element in l_ab_zip_list:
print("***")
print(idx)
print(element)
len(l_ab_zip_list[0])
("butter","asvasdvah")
len(("butter","asvasdvah"))
("butter",) # still a tuple of length 1
len(("butter",))
type(("butter",))
("butter") # not a tuple, but a string
type(("butter"))
len(("butter")) # length 6 string
my_set = {'apple', 'banana', 'carrot'}
my_set
# typically created from a list
my_l = ['carrot','banana','apple','apple','banana','carrot']
my_set= set(my_l)
my_set
# typically created from a list
my_l = ['carrot','banana','apple','apple','banana','carrot', 20]
my_set= set(my_l)
my_set
{'apple', 'banana', 'carrot'}
{'apple', 'banana', 'carrot'}
{20, 'apple', 'banana', 'carrot'}
my_set.add("gauva") # adds if unique, updates inplace
my_set
my_set.add("carrot") # adds if unique, updates inplace
my_set
{20, 'apple', 'as2', 'asd', 'banana', 'carrot', 'gauva', 'h', 'hjy', 'i', 'j', 'k', 'rgre', 'u', 'y'}
{20, 'apple', 'as2', 'asd', 'banana', 'carrot', 'gauva', 'h', 'hjy', 'i', 'j', 'k', 'rgre', 'u', 'y'}
my_set.update(["asd","as2"]) # pass only a list or tuple
my_set
my_set.update({"hjy","rgre"})
my_set
my_set.update("hjyiukyuyukyukyku")
my_set
{20, 'apple', 'as2', 'asd', 'banana', 'carrot'}
{20, 'apple', 'as2', 'asd', 'banana', 'carrot', 'hjy', 'rgre'}
{20, 'apple', 'as2', 'asd', 'banana', 'carrot', 'h', 'hjy', 'i', 'j', 'k', 'rgre', 'u', 'y'}
Delete an element from set
my_l = ['carrot','banana','apple','apple','banana','carrot']
my_set= set(my_l)
my_set
my_set.discard("apple")
my_set
my_set.discard("asdasd") # no error if element absent
my_set
my_set.pop() # removes arbitrary element. error if empty set
my_set
{'apple', 'banana', 'carrot'}
{'banana', 'carrot'}
{'banana', 'carrot'}
'carrot'
{'banana'}
set_a = set(['a','b','c'])
set_b = set(['d','e','c'])
set_a.union(set_b)
set_a.intersection(set_b)
set_a - set_b # same as set_a.difference(set_b)
set_b - set_a
{'a', 'b', 'c', 'd', 'e'}
{'c'}
{'a', 'b'}
{'d', 'e'}
# always alphabetical? looks like it
s = {5,2,7,1,8}
s
{1, 2, 5, 7, 8}
my_dict = {} # my_dict = dict()
my_dict
my_dict['key_1'] = 1
my_dict['key_2'] = 2
my_dict
{}
{'key_1': 1, 'key_2': 2}
my_dict['new'] = {'key_3': 1, 'key_4': 2}
my_dict
my_dict['new']
{'key_1': 1, 'key_2': 2, 'new': {'key_3': 1, 'key_4': 2}}
{'key_3': 1, 'key_4': 2}
Iterate over
for my_key in my_dict:
print("###########")
print(my_key)
print(my_dict[my_key])
# Not allowed, only keys can be extracted as for loop
if False:
for my_key, my_value in my_dict:
print("###########")
print(my_key)
print(my_value)
for my_key, my_value in my_dict.items():
print("###########")
print(my_key)
print(my_value)
Access elements
my_dict.keys() # type: dict_keys
type(my_dict.keys())
list(my_dict.keys()) # list
my_dict.values() # type: dict_values
type(my_dict.values())
list(my_dict.values()) # list
my_dict.get('key_1')
if False:
my_dict['not_a_key'] # error for a missing key
my_dict.get('not_a_key') # no error thrown for a missing key
my_dict.get('not_a_key', 99999)
my_dict[2] = 1000;
my_dict
my_dict[2] # indexing by number not allowed, unless the key is a number itself
my_dict_list = list(my_dict.items()) # list of tuples
{'key_1': 1, 'key_2': 2, 'new': {'key_3': 1, 'key_4': 2}, 2: 1000}
1000
Sort a Dict
sorted({'key_1':10,'key_3':20,'key_2':15}) # returns sorted keys, not valuess
{'key_1':10,3:20,'key_2':15} # allowed heterogenous datatype for keys
if False:
sorted({'key_1':10,3:20,'key_2':15}) # avoid heterogenous datatype for keys
Update dict
my_dict = {}
my_dict['key_1'] = 1
my_dict['key_2'] = 2
my_dict
my_dict.update({'key_3': 1000})
my_dict
my_dict.update({'key_1': 999}) # adds a new key-value pair if key not present, updates if key already present
my_dict
my_dict.update({'key_4': 999}) # adds a new key-value pair if key not present, updates if key already present
my_dict
if False:
del my_dict['key_4'] # key error
my_dict
if False:
my_dict.pop('key_3') # key error
my_dict
my_dict.get('key_1')
if False:
my_dict.get('key_999') # no error for bad key
if False:
my_dict['key_999'] # error for bad key
'key_1' in my_dict # check for presense of a key
'key_000' in my_dict
# same as
'key_1' in my_dict.keys()
'key_000' in my_dict.keys()
from collections import Counter
l = ['a','b','c','b','a']
c1 = Counter(l)
c1
type(c1) # collections.Counter
dict(c1)
Counter({'a': 2, 'b': 2, 'c': 1})
collections.Counter
{'a': 2, 'b': 2, 'c': 1}
c1.most_common(2)
type(c1.most_common(2)) # list of tuples
c_temp = c1.most_common(2)
[x[1] for x in c_temp]
[('a', 2), ('b', 2)]
list
[2, 2]
nums = [1, 2, 3, 4]
fruit = ["Apples", "Peaches", "Pears", "Bananas"]
z1 = zip(nums, fruit) # datatype: zip
z1
<zip at 0x14946b0c0>