from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"


x = 10
type(x)

int


float(20)
float('20')

int('20')
int(20.2)

str(201)

20.0

20.0

20

20

'201'


x = [3,2,1,4,5]
x
type(x)

[3, 2, 1, 4, 5]

list


import numpy as np
np.mean(x)

3.0


#### Missing values ####?
# Functions to identify missing values?


x = [3,2,1,4,5.2,'apple']
[isinstance(i, int) for i in x]
[isinstance(i, float) for i in x]
[isinstance(i, str) for i in x]

[True, True, True, True, False, False]

[False, False, False, False, True, False]

[False, False, False, False, False, True]


if False:
    y = 10
    y.is_integer() # True

    y = 10.0
    y.is_integer() # True

    y = 10.2
    y.is_integer() # False


x = True
print(x)
type(x)

type(x) == bool

True

bool

True


x = False
print(x)

type(x)

False

bool


xx = [True, False, True]
xx

sum(xx)
sum([False, False, False])

[True, False, True]

2

0


xx[0]

type(xx)
[type(a) for a in xx]

True

list

[bool, bool, bool]


x = "apple"
type(x)
len(x)
x[0]

str

5

'a'


"apple".upper()
"APPLE".lower()
"APPLE A day doctoR away".title()

'APPLE'

'apple'

'Apple A Day Doctor Away'


x = ["banana","apple","carrot"]
type(x)
len(x)
x[0]

type(x) == list # True
type(x) is list # True

list

3

'banana'

True

True


x = ["banana","apple","carrot"]
set(x)
list(set(x)) # order changed

{'apple', 'banana', 'carrot'}

['carrot', 'apple', 'banana']


x = ["banana","apple","carrot"]
max(x)
min(x)
len(x)

'carrot'

'apple'

3


sorted(x) # returns a value
x.sort() # in place
x

['apple', 'banana', 'carrot']

['apple', 'banana', 'carrot']


"apple" + "banana"
" ".join(["apple", "banana"])

'applebanana'

'apple banana'


l1 = ["apple","banana"]
l2 = ["carrot","dragonfruit"]
[a + b for a, b in zip(l1, l2)]

['applecarrot', 'bananadragonfruit']


"APPLE A day doctoR away".replace("apple","--")
"apple a day doctoR away".replace("apple","--")
"apple a day doctoR away apple".replace("apple","--")

'APPLE A day doctoR away'

'-- a day doctoR away'

'-- a day doctoR away --'


# replace only the first n instances?

# string extract all or some instances?


l = "abcdefghijklmnopqrstuvwxyz"
l[2:2]
l[2:3]
l[2:10]
l[2:10:2]

''

'c'

'cdefghij'

'cegi'


many_strings = ["north carolina","south carolina","new hampshire","north dakota"]
"north carolina".find("carolina")
"new hampshire".find("carolina") # -1 means not found
[x.find("carolina") for x in many_strings] # return position of match
[x for x in many_strings if x.find("carolina")>0] # return value that matches

[pos for pos, x in enumerate("hawaii") if x=="i"]
[pos for pos, x in enumerate("mississippi") if x=="s"]


"welcome to hotel california".split(" ")
# puzzle: identify strings with alternating letter?

['welcome', 'to', 'hotel', 'california']


import re

txt = "The rain in Spain"
re.findall("ai", txt)

['ai', 'ai']


s = list("an apple")
s[0:0] = list("a banana and ")
"".join(s)

'a banana and an apple'


l = ["apple","banana","carrot"]
l.append("gauva")
l


l[-1] # last element
l[-2] # second last


l = list("abcdefghij")
l[0:4]
l[0:4:1]
l[0:4:2]
l[0:5:2]

l[0:5][::-1]

l[0:-2] # 0 through last 2
l[0:0]
l[:4] # same as l[None:4]

l[0:4] # is same as 
l[0:-6]  # treat -6 as length(x)-6


l[::-1]
l[0:10:-1] # fails

l[10:0:-1] # works
# 10, 10-1, 10-1-1, ..., 
# if positive step used, start < stop
# if negative step used, start > stop

l[0:7][::-1] # most convenient way of getting in reverse

l[0:2] = ["aa","bb"]
l
l[0:2] = ["a","b"]
l


l.reverse() # reverses in place, not recommended
l


a = ["dog"]
l + a
l
a

l.extend(a) # a is a list, not a single string
l
a

l.extend(a)
l


l.index("dog")
l

# index of all occurences, always returns a list given its a list comprehension
[pos for pos, elem in enumerate(l) if elem=="dog"]
# or 
[pos for pos in range(len(l)) if l[pos]=="dog"]
# or itertools?

l = ['apple', 'banana', 'carrot', 'gauva', 'dog', 'dog']
l.pop(1) # remove element by an index
l

for pos in [1,2]:
    l.pop(pos)
l


# repeat lists
[11] * 3
[11, "apple"] * 3
[11, "apple"] * 3


# repeat each element of a list 3 times
[item for item in [11,"apple"] for n in range(3)]

l1 = [11,12,13]
l1[0] = [111, 112 ,113]
l1 # list of lists

l1 = [11,12,13]
l1[0:1] = [111, 112 ,113]
l1 # list of lists

# all elements except one
l1 = list("abcdefghij")
l1
l1[-1] # this gives the last one, not except one
[elem for pos, elem in enumerate(l1) if pos not in [2-1,4-1]] # works

# index by boolean - Not allowed
l1 = list("abcdefghij")
# l1[[2,3,4]] # fails, cannot index multiple elements
indices = [pos for pos, elem in enumerate([True, False] * 5) if elem==True]
[l1[index] for index in indices]


l = list(range(11,20))
l
list(filter(lambda x: x>15, l))
[x for x in l if x>15]

# what is map()
l = [1,2,3,4,5]
list(map(lambda x: x+100, l))

# what is reduce()
from functools import reduce
reduce(lambda x, y: x+y, [1,2,3,4,5,6])


list(range(11,20))
list(range(11,20,2))
# 11:15, not allowed

[x for x in [11,12,13,14,15] if x == 13]
[pos for pos, elem in enumerate([11,12,13,14,15]) if elem%2==1] # same as which(x%%2==1)

"y" in list("python")
"l" in list("python")


# recycle a list to a length
l1 = [1,2,3]
desired_length = 10
q, r = divmod(desired_length, len(l1))
l1*q + l1[:r]


# sorted - returns a value, not in place modification
l1=[1,2,3,4]
l2=[10,10,20,20]
[x+y for x,y in zip(l1,l2)]# does not recycle, but trims the shorter list

l1=[1,2,3,4]
l2=[10,20]
[x+y for x,y in zip(l1,l2)] # does not recycle, but trims the shorter list
list(zip(l1,l2))


# order of elements? Incomplete
x = [11,13,14,15,12]
sorted(x).index(11)
sorted(x).index(13)
sorted(x).index(14)
sorted(x).index(15)
sorted(x).index(12)
[sorted(x).index(i) for i in x]


l_a = ["a","b","c"]
l_b = ["x","y","z"]

zip(l_a, l_b) # type is zip
l_ab_zip_list = list(zip(l_a, l_b)) # list of tuples


# recycyles? Nope, plainly chops to shorter length
list(zip(["a","b","c"], ["x","y"])) 
list(zip(["a","b"], ["x","y","z"])) 

# unpacking
a = l_ab_zip_list[0]
a # a is a tuple

a, b = l_ab_zip_list[0]
# a is a string, b is a string


10,20 # always a tuple, even without brackets
(10,20)
x=10; y=20;
(x,y)
x,y
x = 10,20
x


for element in l_ab_zip_list:
    print("***")
    print(element)

for idx, element in l_ab_zip_list:
    print("***")
    print(idx)
    print(element)
    
len(l_ab_zip_list[0])


("butter","asvasdvah")
len(("butter","asvasdvah"))

("butter",) # still a tuple of length 1
len(("butter",))
type(("butter",))

("butter") # not a tuple, but a string
type(("butter"))
len(("butter")) # length 6 string


my_set = {'apple', 'banana', 'carrot'}
my_set

# typically created from a list
my_l = ['carrot','banana','apple','apple','banana','carrot']
my_set=  set(my_l)
my_set

# typically created from a list
my_l = ['carrot','banana','apple','apple','banana','carrot', 20]
my_set=  set(my_l)
my_set

{'apple', 'banana', 'carrot'}

{'apple', 'banana', 'carrot'}

{20, 'apple', 'banana', 'carrot'}


my_set.add("gauva") # adds if unique, updates inplace 
my_set

my_set.add("carrot") # adds if unique, updates inplace 
my_set

{20,
 'apple',
 'as2',
 'asd',
 'banana',
 'carrot',
 'gauva',
 'h',
 'hjy',
 'i',
 'j',
 'k',
 'rgre',
 'u',
 'y'}

{20,
 'apple',
 'as2',
 'asd',
 'banana',
 'carrot',
 'gauva',
 'h',
 'hjy',
 'i',
 'j',
 'k',
 'rgre',
 'u',
 'y'}


my_set.update(["asd","as2"]) # pass only a list or tuple
my_set

my_set.update({"hjy","rgre"})
my_set

my_set.update("hjyiukyuyukyukyku")
my_set

{20, 'apple', 'as2', 'asd', 'banana', 'carrot'}

{20, 'apple', 'as2', 'asd', 'banana', 'carrot', 'hjy', 'rgre'}

{20,
 'apple',
 'as2',
 'asd',
 'banana',
 'carrot',
 'h',
 'hjy',
 'i',
 'j',
 'k',
 'rgre',
 'u',
 'y'}


my_l = ['carrot','banana','apple','apple','banana','carrot']
my_set=  set(my_l)
my_set

my_set.discard("apple")
my_set

my_set.discard("asdasd") # no error if element absent
my_set

my_set.pop() # removes arbitrary element. error if empty set
my_set

{'apple', 'banana', 'carrot'}

{'banana', 'carrot'}

{'banana', 'carrot'}

'carrot'

{'banana'}


set_a = set(['a','b','c'])
set_b = set(['d','e','c'])

set_a.union(set_b)
set_a.intersection(set_b)
set_a - set_b # same as set_a.difference(set_b)
set_b - set_a

{'a', 'b', 'c', 'd', 'e'}

{'c'}

{'a', 'b'}

{'d', 'e'}


# always alphabetical? looks like it
s = {5,2,7,1,8}
s

{1, 2, 5, 7, 8}


my_dict = {} # my_dict = dict()
my_dict

my_dict['key_1'] = 1
my_dict['key_2'] = 2
my_dict

{}

{'key_1': 1, 'key_2': 2}


my_dict['new'] = {'key_3': 1, 'key_4': 2}
my_dict

my_dict['new']

{'key_1': 1, 'key_2': 2, 'new': {'key_3': 1, 'key_4': 2}}

{'key_3': 1, 'key_4': 2}


for my_key in my_dict:
    print("###########")
    print(my_key)
    print(my_dict[my_key])
    
# Not allowed, only keys can be extracted as for loop    
if False:
    for my_key, my_value in my_dict:
        print("###########")
        print(my_key)
        print(my_value)
    
for my_key, my_value in my_dict.items():
    print("###########")
    print(my_key)
    print(my_value)


my_dict.keys() # type: dict_keys
type(my_dict.keys())
list(my_dict.keys()) # list

my_dict.values() # type: dict_values
type(my_dict.values())
list(my_dict.values()) # list


my_dict.get('key_1') 

if False:
    my_dict['not_a_key'] # error for a missing key
my_dict.get('not_a_key') # no error thrown for a missing key

my_dict.get('not_a_key', 99999)


my_dict[2] = 1000;
my_dict

my_dict[2] # indexing by number not allowed, unless the key is a number itself

my_dict_list = list(my_dict.items()) # list of tuples

{'key_1': 1, 'key_2': 2, 'new': {'key_3': 1, 'key_4': 2}, 2: 1000}

1000


sorted({'key_1':10,'key_3':20,'key_2':15}) # returns sorted keys, not valuess
{'key_1':10,3:20,'key_2':15} # allowed heterogenous datatype for keys

if False:
    sorted({'key_1':10,3:20,'key_2':15}) # avoid heterogenous datatype for keys


my_dict = {}
my_dict['key_1'] = 1
my_dict['key_2'] = 2
my_dict
my_dict.update({'key_3': 1000})
my_dict

my_dict.update({'key_1': 999}) # adds a new key-value pair if key not present, updates if key already present
my_dict

my_dict.update({'key_4': 999}) # adds a new key-value pair if key not present, updates if key already present
my_dict


if False:
    del my_dict['key_4'] # key error
my_dict

if False:
    my_dict.pop('key_3') # key error
my_dict

my_dict.get('key_1')
if False:
    my_dict.get('key_999') # no error for bad key
if False:
    my_dict['key_999'] # error for bad key

'key_1' in my_dict # check for presense of a key
'key_000' in my_dict 
# same as 
'key_1' in my_dict.keys()
'key_000' in my_dict.keys()


from collections import Counter

l = ['a','b','c','b','a']

c1 = Counter(l)
c1
type(c1) # collections.Counter
dict(c1)

Counter({'a': 2, 'b': 2, 'c': 1})

collections.Counter

{'a': 2, 'b': 2, 'c': 1}


c1.most_common(2)
type(c1.most_common(2)) # list of tuples
c_temp = c1.most_common(2)
[x[1] for x in c_temp]

[('a', 2), ('b', 2)]

list

[2, 2]


nums = [1, 2, 3, 4]
fruit = ["Apples", "Peaches", "Pears", "Bananas"]

z1 = zip(nums, fruit) # datatype: zip
z1

<zip at 0x14946b0c0>

📘 DataTypes¶

🔢 Integers/Numerics¶

🔘 Boolean¶

🔤 Strings¶

re package¶

String Modification - Position¶

String Modification - Pattern¶

Substring Search - RegEx¶

📋 Containers - List¶

📦 Containers - Tuple¶

🔣 Containers - Set¶

📖 Containers - Dictionary¶

🗃️ Containers - Collections¶

🪢 Containers - zip¶