1.import pandas as pd
ds=pd.Series([1,2,3,4])
print(ds)
output:
0 1
1 2
2 3
3 4
dtype: int64
2.import pandas as pd
ds=pd.Series([1,2,3,4])
print(ds.tolist())
print(type(ds.tolist()))
output:
[1, 2, 3, 4]
<class 'list'>
3.import pandas as pd
ds1=pd.Series([1,3,5,7])
ds2=pd.Series([2,4,6,8])
add=ds1+ds2
sub=ds1=ds2
mul=ds1*ds2
div=ds1/ds2
print(add)
print(sub)
print(mul)
print(div)
output:
0 3
1 7
2 11
3 15
dtype: int64
0 2
1 4
2 6
3 8
dtype: int64
0 4
1 16
2 36
3 64
dtype: int64
0 1.0
1 1.0
2 1.0
3 1.0
dtype: float64
4.import numpy as np
a=np.array([10,20,30,40,50])
print(a)
import pandas as pd
ds=pd.Series(a)
print(ds)
output:
[10 20 30 40 50]
0 10
1 20
2 30
3 40
4 50
dtype: int32
5.import pandas as pd
ds=pd.Series(i for i in range(50))
print(ds.head(-10))
output:
0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
10 10
11 11
12 12
13 13
14 14
15 15
16 16
17 17
18 18
19 19
20 20
21 21
22 22
23 23
24 24
25 25
26 26
27 27
28 28
29 29
30 30
31 31
32 32
33 33
34 34
35 35
36 36
37 37
38 38
39 39
dtype: int64
6.import pandas as pd
ds=pd.Series(i for i in range(50))
print(ds.head(-29).tail(-14))
output:
14 14
15 15
16 16
17 17
18 18
19 19
20 20
dtype: int64
7.import numpy as np
import pandas as pd
exam_data = {
'name': ['Anastasia','Dima','Katherine','James','Emily','Michael','Mathnew','Laura','Kevin','Jonas'],
'score':[12.5,9,16.5,np.nan,9,20,14.5,np.nan,8,19],
'attempts':[1,3,2,3,2,3,1,1,2,1],
'qulify':['yes','no','yes','no','no','yes','yes','no','no','yes']}
lables=['a','b','c','d','e','f','g','h','i','j']
df=pd.DataFrame(exam_data,index=lables)
print(df)
output:
name score attempts qulify
a Anastasia 12.5 1 yes
b Dima 9.0 3 no
c Katherine 16.5 2 yes
d James NaN 3 no
e Emily 9.0 2 no
f Michael 20.0 3 yes
g Mathnew 14.5 1 yes
h Laura NaN 1 no
i Kevin 8.0 2 no
j Jonas 19.0 1 yes
8.import numpy as np
import pandas as pd
exam_data = {
'name': ['Anastasia','Dima','Katherine','James','Emily','Michael','Mathnew','Laura','Kevin','Jonas'],
'score':[12.5,9,16.5,np.nan,9,20,14.5,np.nan,8,19],
'attempts':[1,3,2,3,2,3,1,1,2,1],
'qulify':['yes','no','yes','no','no','yes','yes','no','no','yes']}
lables=['a','b','c','d','e','f','g','h','i','j']
df['name']=df['name'].replace('Jonas','Suresh')
print(df)
output:
name score attempts qulify
a Anastasia 12.5 1 yes
b Dima 9.0 3 no
c Katherine 16.5 2 yes
d James NaN 3 no
e Emily 9.0 2 no
f Michael 20.0 3 yes
g Mathnew 14.5 1 yes
h Laura NaN 1 no
i Kevin 8.0 2 no
j Suresh 19.0 1 yes
9.import numpy as np
import pandas as pd
exam_data = {
'name': ['Anastasia','Dima','Katherine','James','Emily','Michael','Mathnew','Laura','Kevin','Jonas'],
'score':[12.5,9,16.5,np.nan,9,20,14.5,np.nan,8,19],
'attempts':[1,3,2,3,2,3,1,1,2,1],
'qulify':['yes','no','yes','no','no','yes','yes','no','no','yes']}
lables=['a','b','c','d','e','f','g','h','i','j']
df['colour']=['red','blue','red','blue','red','blue','red','blue','red','blue']
print(df)
output:
name score attempts qulify colour
a Anastasia 12.5 1 yes red
b Dima 9.0 3 no blue
c Katherine 16.5 2 yes red
d James NaN 3 no blue
e Emily 9.0 2 no red
f Michael 20.0 3 yes blue
g Mathnew 14.5 1 yes red
h Laura NaN 1 no blue
i Kevin 8.0 2 no red
j Suresh 19.0 1 yes blue
10.import numpy as np
import pandas as pd
exam_data = {
'name': ['Anastasia','Dima','Katherine','James','Emily','Michael','Mathnew','Laura','Kevin','Jonas'],
'score':[12.5,9,16.5,np.nan,9,20,14.5,np.nan,8,19],
'attempts':[1,3,2,3,2,3,1,1,2,1],
'qulify':['yes','no','yes','no','no','yes','yes','no','no','yes']}
lables=['a','b','c','d','e','f','g','h','i','j']
df=pd.DataFrame(exam_data,index=lables)
df=(list(df.columns.values))
print(df)
output:
['name', 'score', 'attempts', 'qulify']
C) Pandas Index:
1) Write a Pandas program to display the default index and set a column as an Index in a givendataframe.
import pandas as pd
df= pd.DataFrame({'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew', 'Laura', 'Kevin', 'Jonas'], 'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19], 'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1], 'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes'], 'labels' : ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] })
print("Default Index:")
print(df.head(10))
print("\nt_id as new Index:")
df1 = df.set_index('label')
print(df1)
print("\nReset the index:")
df2 = df1.reset_index(inplace=False)
print(df2)
O/P
Default Index:
name score attempts qualify labels
0 Anastasia 12.5 1 yes a
1 Dima 9.0 3 no b
2 Katherine 16.5 2 yes c
3 James NaN 3 no d
4 Emily 9.0 2 no e
5 Michael 20.0 3 yes f
6 Matthew 14.5 1 yes g
7 Laura NaN 1 no h
8 Kevin 8.0 2 no i
9 Jonas 19.0 1 yes j
label as new Index:
name score attempts qualify
labels
a Anastasia 12.5 1 yes
b Dima 9.0 3 no
c Katherine 16.5 2 yes
d James NaN 3 no
e Emily 9.0 2 no
f Michael 20.0 3 yes
g Matthew 14.5 1 yes
h Laura NaN 1 no
i Kevin 8.0 2 no
j Jonas 19.0 1 yes
Reset the index:
labels name score attempts qualify
0 a Anastasia 12.5 1 yes
1 b Dima 9.0 3 no
2 c Katherine 16.5 2 yes
3 d James NaN 3 no
4 e Emily 9.0 2 no
5 f Michael 20.0 3 yes
6 g Matthew 14.5 1 yes
7 h Laura NaN 1 no
8 i Kevin 8.0 2 no
9 j Jonas 19.0 1 yes
2) Write a Pandas program to create an index labels by using 64-bit integers, usingfloating-point numbers in a given dataframe.
import pandas as pd
df= pd.DataFrame({'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew', 'Laura', 'Kevin', 'Jonas'], 'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19], 'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1], 'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes'], 'labels' : ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] })
print(df)
print("\nView the Attempts:")
print(df.attempts)
print("\nView the Score:")
print(df.score)
O/P
name score attempts qualify labels
0 Anastasia 12.5 1 yes a
1 Dima 9.0 3 no b
2 Katherine 16.5 2 yes c
3 James NaN 3 no d
4 Emily 9.0 2 no e
5 Michael 20.0 3 yes f
6 Matthew 14.5 1 yes g
7 Laura NaN 1 no h
8 Kevin 8.0 2 no i
9 Jonas 19.0 1 yes j
View the Attempts:
0 1
1 3
2 2
3 3
4 2
5 3
6 1
7 1
8 2
9 1
Name: attempts, dtype: int64
View the Score:
0 12.5
1 9.0
2 16.5
3 NaN
4 9.0
5 20.0
6 14.5
7 NaN
8 8.0
9 19.0
Name: score, dtype: float64
D) Pandas String and Regular Expressions:
1) Write a Pandas program to convert all the string values to upper, lower cases in agiven pandas series. Also find the length of the string values.
import pandas as pd
import numpy as np
s=pd.Series(['hima','Parrot','np.num'])
print("Original Series")
print(s)
print("\nConvert all string values of the said Series to upper case:")
print(s.str.upper())
print("\nConvert all string values of the said Series to lower case:")
print(s.str.lower())
print("\nLength of the string values of the said Series:")
print(s.str.len())
O/P
Original Series
0 hima
1 Parrot
2 np.num
dtype: object
Convert all string values of the said Series to upper case:
0 HIMA
1 PARROT
2 NP.NUM
dtype: object
Convert all string values of the said Series to lower case:
0 hima
1 parrot
2 np.num
dtype: object
Length of the string values of the said Series:
0 4
1 6
2 6
dtype: int64
2) Write a Pandas program to remove whitespaces, left sided whitespaces and right sided whitespaces of the string values of a given pandas series.
import pandas as pd
c = pd.Index([' Yellow', 'Black ', ' Orange ', 'White', ' Pink '])
print("Original series:")
print(c)
print("\nRemove whitespace")
print(c.str.strip())
print("\nRemove left sided whitespace")
print(c.str.lstrip())
print("\nRemove Right sided whitespace")
print(c.str.rstrip())
O/P
Original series:
Index([' Yellow', 'Black ', ' Orange ', 'White', ' Pink '], dtype='object')
Remove whitespace
Index(['Yellow', 'Black', 'Orange', 'White', 'Pink'], dtype='object')
Remove left sided whitespace
Index(['Yellow', 'Black ', 'Orange ', 'White', 'Pink '], dtype='object')
Remove Right sided whitespace
Index([' Yellow', 'Black', ' Orange', 'White', ' Pink'], dtype='object')
3) Write a Pandas program to count of occurrence of a specified substring in a DataFrame column.
import pandas as pd
df=pd.DataFrame({'labels' : ['a', 'b', 'c', 'd', 'e', 'f' , 'g', 'h', 'i', 'j'], 'score': ['A12', 'A9', 'A16', 'A22', 'A9', 'A20', 'A14', 'A32', 'A8', 'A129' ], 'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew', 'Laura', 'Kevin', 'Jonas']})
print("Original DataFrame:")
print(df)
print("\nCount occurrence of 2 in score column:")
df['count'] = list(map(lambda x: x.count("2"), df['score']))
print(df)
O/P
Original DataFrame:
labels score name
0 a A12 Anastasia
1 b A9 Dima
2 c A16 Katherine
3 d A22 James
4 e A9 Emily
5 f A20 Michael
6 g A14 Matthew
7 h A32 Laura
8 i A8 Kevin
9 j A129 Jonas
Count occurrence of 2 in score column:
labels score name count
0 a A12 Anastasia 1
1 b A9 Dima 0
2 c A16 Katherine 0
3 d A22 James 2
4 e A9 Emily 0
5 f A20 Michael 1
6 g A14 Matthew 0
7 h A32 Laura 1
8 i A8 Kevin 0
9 j A129 Jonas 1
4) Write a Pandas program to swap the cases of a specified character column in a givenDataFrame.
import pandas as pd
df=pd.DataFrame({'labels' : ['a', 'b', 'c', 'd', 'e', 'f' , 'g', 'h', 'i', 'j'], 'score': ['A12', 'A9', 'A16', 'A22', 'A9', 'A20', 'A14', 'A32', 'A8', 'A129' ], 'name': ['Anastasia', 'DIMA', 'Katherine', 'JAMES', 'Emily', 'Michael', 'Matthew', 'Laura', 'Kevin', 'Jonas']})
print("Original DataFrame:")
print(df)
print("\nSwapp cases in name:")
df['swapped_name'] = list(map(lambda x: x.swapcase(), df['name']))
print(df)
O/P
Original DataFrame:
labels score name
0 a A12 Anastasia
1 b A9 DIMA
2 c A16 Katherine
3 d A22 JAMES
4 e A9 Emily
5 f A20 Michael
6 g A14 Matthew
7 h A32 Laura
8 i A8 Kevin
9 j A129 Jonas
Swapp cases in name:
labels score name swapped_name
0 a A12 AnastasiaaNASTASIA
1 b A9 DIMA dima
2 c A16 KatherinekATHERINE
3 d A22 JAMES james
4 e A9 Emily eMILY
5 f A20 Michael mICHAEL
6 g A14 Matthew mATTHEW
7 h A32 Laura lAURA
8 i A8 Kevin kEVIN
9 j A129 Jonas jONAS
E) Pandas Joining and merging DataFrame:
1) Write a Pandas program to join the two given dataframesalong rows and assign all data.
import pandas as pd
stu_data1=pd.DataFrame({'sid':['s1', 's2', 's3', 's4', 's5'], 'name': ['Sunny', 'Munny', 'Buddy', 'Akshu', 'Nani'], 'Marks': [55, 80, 95, 82, 85]})
stu_data2=pd.DataFrame({'sid':['s6', 's7', 's8', 's9', 's10'], 'name': ['Sunny', 'Munny', 'Buddy', 'Akshu', 'Nani'], 'Marks': [65, 70, 98, 72, 65]})
print("Original DataFrames:")
print(stu_data1)
print("-------------------------------------")
print(stu_data2)
print("\nJoin the said two dataframes along rows:")
result_data = pd.concat([stu_data1, stu_data2])
print(result_data)
O/P
Original DataFrames:
sidname Marks
0 s1 Sunny 55
1 s2 Munny 80
2 s3 Buddy 95
3 s4 Akshu 82
4 s5 Nani 85
-------------------------------------
sidname Marks
0 s6 Sunny 65
1 s7 Munny 70
2 s8 Buddy 98
3 s9 Akshu 72
4 s10 Nani 65
Join the said two dataframes along rows:
sidname Marks
0 s1 Sunny 55
1 s2 Munny 80
2 s3 Buddy 95
3 s4 Akshu 82
4 s5 Nani 85
0 s6 Sunny 65
1 s7 Munny 70
2 s8 Buddy 98
3 s9 Akshu 72
4 s10 Nani 65
2) Write a Pandas program to append a list of dictioneries or series to a existing DataFrame and display the combined data.
import pandas as pd
stu_data1=pd.DataFrame({'sid':['s1', 's2', 's3', 's4', 's5'], 'name': ['Sunny', 'Munny', 'Buddy', 'Akshu', 'Nani'], 'Marks': [55, 80, 95, 82, 85]})
s6 = pd.Series(['S6', 'Yogi', 96], index=['sid', 'name', 'marks'])
dicts = [{'sid': 'S6', 'name': 'Yogi', 'Marks': 96},{'sid': 'S7', 'name': 'Abhi', 'Marks': 88}]
print("Original DataFrames:")
print(stu_data1)
print("\nDictionary:")
print(s6)
combined_data= stu_data1.append(dicts, ignore_index=True, sort=False)
print("\nCombined Data:")
print(combined_data)
O/P
Original DataFrames:
sidname Marks
0 s1 Sunny 55
1 s2 Munny 80
2 s3 Buddy 95
3 s4 Akshu 82
4 s5 Nani 85
Dictionary:
sid S6
name Yogi
marks 96
dtype: object
Combined Data:
sidname Marks
0 s1 Sunny 55
1 s2 Munny 80
2 s3 Buddy 95
3 s4 Akshu 82
4 s5 Nani 85
5 S6 Yogi 96
6 S7 Abhi 88
3) Write a Pandas program to join the two dataframes with matching records from bothsides where available.
import pandas as pd
stu_data1=pd.DataFrame({'sid':['s1', 's2', 's3', 's4', 's5'], 'name': ['Sunny', 'Munny', 'Buddy', 'Akshu', 'Nani'], 'Marks': [55, 80, 95, 82, 85]})
stu_data2=pd.DataFrame({'sid':['s6', 's7', 's8', 's9', 's10'], 'name': ['Sunny', 'Munny', 'Buddy', 'Akshu', 'Nani'], 'Marks': [65, 70, 98, 72, 65]})
print("Original DataFrames:")
print(stu_data1)
print(stu_data2)
merged_data = pd.merge(stu_data1, stu_data2, on='sid', how='outer')
print("Merged data (outer join):")
print(merged_data)
O/P
Original DataFrames:
sidname Marks
0 s1 Sunny 55
1 s2 Munny 80
2 s3 Buddy 95
3 s4 Akshu 82
4 s5 Nani 85
sidname Marks
0 s6 Sunny 65
1 s7 Munny 70
2 s8 Buddy 98
3 s9 Akshu 72
4 s10 Nani 65
Merged data (outer join):
sidname_xMarks_xname_yMarks_y
0 s1 Sunny 55.0 NaNNaN
1 s2 Munny 80.0 NaNNaN
2 s3 Buddy 95.0 NaNNaN
3 s4 Akshu 82.0 NaNNaN
4 s5 Nani 85.0 NaNNaN
5 s6 NaNNaN Sunny 65.0
6 s7 NaNNaNMunny 70.0
7 s8 NaNNaN Buddy 98.0
8 s9 NaNNaNAkshu 72.0
9 s10 NaNNaN Nani 65.0
F) Pandas Time Series:
1) Write a Pandas program to create
a) Datetime object for Jan 15 2012.
b) Specific date and time of 9:20 pm.
c) Local date and time.
d) A date without time.
e) Current date.
f) Time from a datetime.
g) Current local time.
import datetime
from datetime import datetime
print("Datetime object for Jan 11 2012:")
print(datetime(2012, 1, 11))
print("\nSpecific date and time of 9:20 pm")
print(datetime(2011, 1, 11, 21, 20))
print("\nLocal date and time:")
print(datetime.now())
print("\nA date without time: ")
print(datetime.date(datetime(2012, 5, 22)))
print("\nCurrent date:")
print(datetime.now().date())
print("\nTime from a datetime:")
print(datetime.time(datetime(2012, 12, 15, 18, 12)))
print("\nCurrent local time:")
print(datetime.now().time())
O/P
Datetime object for May 20 1985:
1985-05-20 00:00:00
Specific date and time of 9:20 pm
2011-01-11 21:20:00
Local date and time:
2022-03-18 14:12:11.440313
A date without time:
2012-05-22
Current date:
2022-03-18
Time from a datetime:
18:12:00
Current local time:
14:12:11.440313
2) Write a Pandas program to create a date from a given year, month, day and anotherdate from a given string formats.
from datetime import datetime
date1 = datetime(year=2017, month=09, day=10)
print("Date from a given year, month, day:")
print(date1)
from dateutil import parser
date2 = parser.parse("8th of January, 2010")
print("\nDate from a given string formats:")
print(date2)
O/P
Date from a given year, month, day:
2017-09-10 00:00:00
Date from a given string formats:
2010-01-08 00:00:00
3) Write a Pandas program to create a time-series with two index labels and randomvalues. Also print the type of the index.
import pandas as pd
import numpy as np
import datetime
from datetime import datetime, date
dates = [datetime(2011, 9, 1), datetime(2011, 9, 2)]
print("Time-series with two index labels:")
time_series = pd.Series(np.random.randn(2), dates)
print(time_series)
print("\nType of the index:")
print(type(time_series.index))
O/P
Time-series with two index labels:
2011-09-01 0.408125
2011-09-02 0.884415
dtype: float64
Type of the index:
<class 'pandas.core.indexes.datetimes.DatetimeIndex'>
G) Pandas Grouping Aggregate:
Consider dataset:
school class name date_Of_Birth age height weight address
S1 s001 V Alberto Franco 15/05/2002 12 173 35 street1
S2 s002 V Gino Mcneill17/05/2002 12 192 32street2
S3 s003 VI Ryan Parkes 16/02/1999 13 186 33street3
S4 s001 VI Eesha Hinton 25/09/1998 13 167 30 street1
S5 s002 V Gino Mcneill11/05/2002 14 151 31street2
S6 s004 VI David Parkes 15/09/1997 12 159 32street4
1) Write a Pandas program to split the following dataframe into groups based on school
code. Also check the type of GroupBy object.
import pandas as pd
pd.set_option('display.max_rows', None)
#pd.set_option('display.max_columns', None)
stu_data = pd.DataFrame({'school_code': ['s001','s002','s003','s001','s002','s004'],'class': ['V', 'V', 'VI', 'VI', 'V', 'VI'], 'name': ['Alberto Franco','GinoMcneill','Ryan Parkes', 'Eesha Hinton', 'Gino Mcneill', 'David Parkes'], 'date_Of_Birth ': ['15/05/2002','17/05/2002','16/02/1999','25/09/1998','11/05/2002','15/09/1997'],'age': [12, 12, 13, 13, 14, 12], 'height': [173, 192, 186, 167, 151, 159], 'weight': [35, 32, 33, 30, 31, 32], 'address': ['street1', 'street2', 'street3', 'street1', 'street2', 'street4']}, index=['S1', 'S2', 'S3', 'S4', 'S5', 'S6'])
print("Original DataFrame:")
print(stu_data)
print('\nSplit the said data on school_code wise:')
result = stu_data.groupby(['school_code'])
for name,group in result:
print("\nGroup:")
print(name)
print(group0
print("\nType of the object:")
print(type(result))
O/P
Original DataFrame:
school_code class name date_Of_Birthage height weight \
S1 s001 V Alberto Franco 15/05/2002 12 173 35
S2 s002 V Gino Mcneill 17/05/2002 12 192 32
S3 s003 VI Ryan Parkes 16/02/1999 13 186 33
S4 s001 VI Eesha Hinton 25/09/1998 13 167 30
S5 s002 V Gino Mcneill 11/05/2002 14 151 31
S6 s004 VI David Parkes 15/09/1997 12 159 32
address
S1 street1
S2 street2
S3 street3
S4 street1
S5 street2
S6 street4
Split the said data on school_code wise:
Group:
s001
school_code class name date_Of_Birthage height weight \
S1 s001 V Alberto Franco 15/05/2002 12 173 35
S4 s001 VI Eesha Hinton 25/09/1998 13 167 30
address
S1 street1
S4 street1
Group:
s002
school_code class name date_Of_Birthage height weight \
S2 s002 V GinoMcneill 17/05/2002 12 192 32
S5 s002 V GinoMcneill 11/05/2002 14 151 31
address
S2 street2
S5 street2
Group:
s003
school_code class name date_Of_Birthage height weight address
S3 s003 VI Ryan Parkes 16/02/1999 13 186 33 street3
Group:
s004
school_code class name date_Of_Birthage height weight \
S6 s004 VI David Parkes 15/09/1997 12 159 32
address
S6 street4
Type of the object:
pandas.core.groupby.generic.DataFrameGroupBy
2) Write a Pandas program to split the following dataframe by school code and getmean, min, and max value of age for each school.
import pandas as pd
pd.set_option('display.max_rows', None)
#pd.set_option('display.max_columns', None)
stu_data = pd.DataFrame({'school_code': ['s001','s002','s003','s001','s002','s004'],'class': ['V', 'V', 'VI', 'VI', 'V', 'VI'], 'name': ['Alberto Franco','GinoMcneill','Ryan Parkes', 'Eesha Hinton', 'Gino Mcneill', 'David Parkes'], 'date_Of_Birth ': ['15/05/2002','17/05/2002','16/02/1999','25/09/1998','11/05/2002','15/09/1997'],'age': [12, 12, 13, 13, 14, 12], 'height': [173, 192, 186, 167, 151, 159], 'weight': [35, 32, 33, 30, 31, 32], 'address': ['street1', 'street2', 'street3', 'street1', 'street2', 'street4']}, index=['S1', 'S2', 'S3', 'S4', 'S5', 'S6'])
print("Original DataFrame:")
print(stu_data)
print('\nMean, min, and max value of age for each value of the school:')
grouped_single = stu_data.groupby('school_code').agg({'age': ['mean', 'min', 'max']})
print(grouped_single)
O/P
Original DataFrame:
school_code class name date_Of_Birthage height weight \
S1 s001 V Alberto Franco 15/05/2002 12 173 35
S2 s002 V Gino Mcneill 17/05/2002 12 192 32
S3 s003 VI Ryan Parkes 16/02/1999 13 186 33
S4 s001 VI Eesha Hinton 25/09/1998 13 167 30
S5 s002 V Gino Mcneill 11/05/2002 14 151 31
S6 s004 VI David Parkes 15/09/1997 12 159 32
address
S1 street1
S2 street2
S3 street3
S4 street1
S5 street2
S6 street4
Mean, min, and max value of age for each value of the school:
age
mean min max
school_code
s001 12.5 12 13
s002 13.0 12 14
s003 13.0 13 13
s004 12.0 12 12
H) Pandas Styling:
1) Create a dataframe of ten rows, four columns with random values. Write a Pandasprogram to highlight the negative numbers red and positive numbers black.
import pandas as pd
import numpy as np
np.random.seed(24)
df = pd.DataFrame({'A': np.linspace(1, 10, 10)})
df = pd.concat([df, pd.DataFrame(np.random.randn(10, 4), columns=list('BCDE'))],
axis=1)
print("Original array:")
print(df)
def color_negative_red(val):
color = 'red' if val< 0 else 'black'
return 'color: %s' % color
print("\nNegative numbers red and positive numbers black:")
df.style.applymap(color_negative_red)
O/P
Original array:
A B C D E
0 1.0 1.329212 -0.770033 -0.316280 -0.990810
1 2.0 -1.070816 -1.438713 0.564417 0.295722
2 3.0 -1.626404 0.219565 0.678805 1.889273
3 4.0 0.961538 0.104011 -0.481165 0.850229
4 5.0 1.453425 1.057737 0.165562 0.515018
5 6.0 -1.336936 0.562861 1.392855 -0.063328
6 7.0 0.121668 1.207603 -0.002040 1.627796
7 8.0 0.354493 1.037528 -0.385684 0.519818
8 9.0 1.686583 -1.325963 1.428984 -2.089354
9 10.0 -0.129820 0.631523 -0.586538 0.290720
Negative numbers red and positive numbers black:
Out[10]:
A | B | C | D | E | |
0 | 1.000000 | 1.329212 | -0.770033 | -0.316280 | -0.990810 |
1 | 2.000000 | -1.070816 | -1.438713 | 0.564417 | 0.295722 |
2 | 3.000000 | -1.626404 | 0.219565 | 0.678805 | 1.889273 |
3 | 4.000000 | 0.961538 | 0.104011 | -0.481165 | 0.850229 |
4 | 5.000000 | 1.453425 | 1.057737 | 0.165562 | 0.515018 |
5 | 6.000000 | -1.336936 | 0.562861 | 1.392855 | -0.063328 |
6 | 7.000000 | 0.121668 | 1.207603 | -0.002040 | 1.627796 |
7 | 8.000000 | 0.354493 | 1.037528 | -0.385684 | 0.519818 |
8 | 9.000000 | 1.686583 | -1.325963 | 1.428984 | -2.089354 |
9 | 10.000000 | -0.129820 | 0.631523 | -0.586538 | 0.290720 |
2) Create a dataframe of ten rows, four columns with random values. Write a Pandasprogram to highlight the maximum value in each column.
import pandas as pd
import numpy as np
np.random.seed(24)
df = pd.DataFrame({'A': np.linspace(1, 10, 10)})
df = pd.concat([df, pd.DataFrame(np.random.randn(10, 4), columns=list('BCDE'))],axis=1)
df.iloc[0, 2] = np.nan
df.iloc[3, 3] = np.nan
df.iloc[4, 1] = np.nan
df.iloc[9, 4] = np.nan
print("Original array:")
print(df)
def highlight_max(s):
'''
highlight the maximum in a Series green.
'''
is_max = s == s.max()
return ['background-color: green' if v else '' for v in is_max]
print("\nHighlight the maximum value in each column:")
df.style.apply(highlight_max,subset=pd.IndexSlice[:, ['B', 'C', 'D', 'E']])
O/P
Original array:
A B C D E
0 1.0 1.329212NaN -0.316280 -0.990810
1 2.0 -1.070816 -1.438713 0.564417 0.295722
2 3.0 -1.626404 0.219565 0.678805 1.889273
3 4.0 0.961538 0.104011 NaN 0.850229
4 5.0 NaN 1.057737 0.165562 0.515018
5 6.0 -1.336936 0.562861 1.392855 -0.063328
6 7.0 0.121668 1.207603 -0.002040 1.627796
7 8.0 0.354493 1.037528 -0.385684 0.519818
8 9.0 1.686583 -1.325963 1.428984 -2.089354
9 10.0 -0.129820 0.631523 -0.586538 NaN
Highlight the maximum value in each column:
Out[11]:
A | B | C | D | E | |
0 | 1.000000 | 1.329212 | nan | -0.316280 | -0.990810 |
1 | 2.000000 | -1.070816 | -1.438713 | 0.564417 | 0.295722 |
2 | 3.000000 | -1.626404 | 0.219565 | 0.678805 | 1.889273 |
3 | 4.000000 | 0.961538 | 0.104011 | nan | 0.850229 |
4 | 5.000000 | nan | 1.057737 | 0.165562 | 0.515018 |
5 | 6.000000 | -1.336936 | 0.562861 | 1.392855 | -0.063328 |
6 | 7.000000 | 0.121668 | 1.207603 | -0.002040 | 1.627796 |
7 | 8.000000 | 0.354493 | 1.037528 | -0.385684 | 0.519818 |
8 | 9.000000 | 1.686583 | -1.325963 | 1.428984 | -2.089354 |
9 | 10.000000 | -0.129820 | 0.631523 | -0.586538 | nan |
3) Create a dataframe of ten rows, four columns with random values. Write a Pandasprogram to highlight dataframe's specific columns.
import pandas as pd
import numpy as np
np.random.seed(24)
df = pd.DataFrame({'A': np.linspace(1, 10, 10)})
df = pd.concat([df, pd.DataFrame(np.random.randn(10, 4), columns=list('BCDE'))],axis=1)
df.iloc[0, 2] = np.nan
df.iloc[3, 3] = np.nan
df.iloc[4, 1] = np.nan
df.iloc[9, 4] = np.nan
print("Original array:")
print(df)
def highlight_cols(s):
color = 'grey'
return 'background-color: %s' % color
print("\nHighlight specific columns:")
df.style.applymap(highlight_cols, subset=pd.IndexSlice[:, ['B', 'C']])
O/P
Original array:
A B C D E
0 1.0 1.329212NaN -0.316280 -0.990810
1 2.0 -1.070816 -1.438713 0.564417 0.295722
2 3.0 -1.626404 0.219565 0.678805 1.889273
3 4.0 0.961538 0.104011 NaN 0.850229
4 5.0 NaN 1.057737 0.165562 0.515018
5 6.0 -1.336936 0.562861 1.392855 -0.063328
6 7.0 0.121668 1.207603 -0.002040 1.627796
7 8.0 0.354493 1.037528 -0.385684 0.519818
8 9.0 1.686583 -1.325963 1.428984 -2.089354
9 10.0 -0.129820 0.631523 -0.586538 NaN
Highlight specific columns:
Out[12]:
A | B | C | D | E | |
0 | 1.000000 | 1.329212 | nan | -0.316280 | -0.990810 |
1 | 2.000000 | -1.070816 | -1.438713 | 0.564417 | 0.295722 |
2 | 3.000000 | -1.626404 | 0.219565 | 0.678805 | 1.889273 |
3 | 4.000000 | 0.961538 | 0.104011 | nan | 0.850229 |
4 | 5.000000 | nan | 1.057737 | 0.165562 | 0.515018 |
5 | 6.000000 | -1.336936 | 0.562861 | 1.392855 | -0.063328 |
6 | 7.000000 | 0.121668 | 1.207603 | -0.002040 | 1.627796 |
7 | 8.000000 | 0.354493 | 1.037528 | -0.385684 | 0.519818 |
8 | 9.000000 | 1.686583 | -1.325963 | 1.428984 | -2.089354 |
9 | 10.000000 | -0.129820 | 0.631523 | -0.586538 | nan |
I) Excel:
1) Write a Pandas program to import excel data into a Pandas dataframe.
import pandas as pd
df = pd.read_excel('D:\Student.xlsx')
print(df.head)
O/P
<bound method NDFrame.head of Unnamed: 0 Unnamed: 1 Unnamed: 2 Unnamed: 3
0 NaNsidsname address
1 NaN 1 ABC STREET1
2 NaN 2 BCD STREET2
3 NaN 3 CDE STREET3
4 NaN 4 DEF STREET4
5 NaN 5 EFG STREET5>
2) Write a Pandas program to find the sum, mean, max, min value of a column of file.
import numpy as np
df = pd.read_excel('D:\Student.xlsx')
print("Sum: ",df["Production"].sum())
print("Mean: ",df["Production"].mean())
print("Maximum: ",df["Production"].max())
print("Minimum: ",df["Production"].min())