|
# This simply keeps me from having to do 'c' over and over.
r,g,b,y,m,c = 'r','g','b','y','m','c'
cayley_colors = {}
cayley_colors.update({(r,r):r, (r,g):y, (r,b):m, (r,y):g, (r,c):b, (r,m):c})
cayley_colors.update({(g,r):y, (g,g):g, (g,b):c, (g,y):r, (g,c):b, (g,m):m})
cayley_colors.update({(b,r):m, (b,g):c, (b,b):b, (b,y):g, (b,c):r, (b,m):y})
cayley_colors.update({(y,r):m, (y,g):r, (y,b):c, (y,y):y, (y,c):g, (y,m):b})
cayley_colors.update({(c,r):g, (c,g):y, (c,b):m, (c,y):b, (c,c):c, (c,m):r})
cayley_colors.update({(m,r):c, (m,g):y, (m,b):r, (m,y):g, (m,b):c, (m,m):m})
print("If if got this right, with so little coffee, it is a miracle.")
cayley_colors
class BaseCayleyGroup(object):
pass
class CayleyColorsGroup(BaseCayleyGroup):
__symbols = {'r', 'g', 'b', 'y', 'c', 'm'} # Attribute of class -- group symbols
__table = None # Attribute of class -- Cayley Table for class.
def __init__(self, v):
# Not strictly the correct approach. Should probably return an error, but OK.
self.__value = v
# This is a property of the class. I could have done this outside of the class
# but I that freaks me out for no good reason.
if CayleyColorsGroup.__table is None:
r,g,b,y,m,c = 'r','g','b','y','m','c'
cayley_colors = {}
cayley_colors.update({(r,r):r, (r,g):y, (r,b):m, (r,y):g, (r,c):b, (r,m):c})
cayley_colors.update({(g,r):y, (g,g):g, (g,b):c, (g,y):r, (g,c):b, (g,m):m})
cayley_colors.update({(b,r):m, (b,g):c, (b,b):b, (b,y):g, (b,c):r, (b,m):y})
cayley_colors.update({(y,r):m, (y,g):r, (y,b):c, (y,y):y, (y,c):g, (y,m):b})
cayley_colors.update({(c,r):g, (c,g):y, (c,b):m, (c,y):b, (c,c):c, (c,m):r})
cayley_colors.update({(m,r):c, (m,g):y, (m,b):r, (m,y):g, (m,b):c, (m,m):m})
CayleyColorsGroup.__table = cayley_colors
@classmethod # Method on class. HINT: Base class calls.
def get_cayley_table(cls):
return CayleyColorsGroup.__table
# Method on instance. HINT: base class calls.
def get_value(self):
return self.__value
c1 = CayleyColorsGroup('r')
c2 = CayleyColorsGroup('r')
c3 = CayleyColorsGroup('g')
print('(c1 == c2) =', c1 == c2)
print('(c1 == c3) =', c1 == c3)
==
operator works $\Rightarrow$__eq__()
CayleyColorsGroup
does not implement __eq__()
BaseCayleyGroup.
BaseCayleyGroup
calls CayleyColorsGroup.get_value()
c4 = c1*c2
print(c4)
*
operator works $\Rightarrow$__mul__()
CayleyColorsGroup
does not implement __mul__()
BaseCayleyGroup.
BaseCayleyGroup
calls CayleyColorsGroup.get_cayley_table()
c5 = c4.get_cayley_table()[('r','g')]
c5
__mul__()
must be something likec6 = CayleyColorsGroup('r')
c7 = CayleyColorsGroup('g')
print("If I have ", c6, 'and', c7, 'then', c6, '*', c7, '=')
c8 = CayleyColorsGroup(c6.get_cayley_table()[c6.get_value(), c7.get_value()])
print(c8)
BaseCayleyGroup
must implement __eq__()
and __mul__()
get_value()
and get_cayley_table()
on itself, and getting the child implementation.__str__()
in BaseCayleyTable
to do the printing, but he did not ask us to do that.x = BaseCayleyGroup(anything)
must raise an exception.All told, you write about 8 statements in the base class, and know you have to implement
__init__()
__eq__()
__str__()__
(Optional).__mul__()
You have implemented simple classes and demonstrated Abstraction, Encapsulation and Inheritance.
tt = MySparseMatrix(int, 2, 2)
tt.set(0,0,11)
tt.set(0,1,5)
tt.set(1,0,2)
print(tt.get(0,1))
print("tt = ", tt)
5
tt = 2 X 2 matrix of type <class 'int'>:
11,5
2,0
m = [[0,0],[0,0]]
print(m)
__str__()
method to print slightly differently.set(0, 0, 11)
must map to something like m[0][0]
but that only works if I have already set up the lists with default values. Otherwise, I get undefined.x = complex
print(x())
MyMatrix.
Equality and addition are obvious once you have figured out the matrix basic initialization, get and set.(i,j)
and you have to return:(i,j)
to a value.import logging
file_level = logging.DEBUG console_level = logging.ERROR
# create logger with logger to show to the class.
# An application can have several loggers.
logger = logging.getLogger('e1006_application')
# The logger's level is DEBUG.
logger.setLevel(logging.DEBUG)
# create file handler which logs even debug messages
fh = logging.FileHandler('demo.log')
fh.setLevel(logging.DEBUG)
# create console handler with a higher log level
ch = logging.StreamHandler()
ch.setLevel(logging.ERROR)
# create formatter and add it to the handlers
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fh.setFormatter(formatter)
ch.setFormatter(formatter)
# add the handlers to the logger
logger.addHandler(fh)
logger.addHandler(ch)
logger.debug("***** Starting a test run *******")
logger.info('Creating an info message.')
logger.debug("Creating a debug message")
logger.warning("Creating a warning message.")
logger.error("Creating an ERROR message.")
logger.error("Creating another ERROR message.!")
with open("demo.log", "r") as log_file:
for line in log_file:
print(line)
# I can change the level of the logger.
ch.setLevel(logging.WARNING)
logger.debug("***** Starting a test run *******")
logger.info('Creating an info message.')
logger.debug("Creating a debug message")
logger.warning("Creating a warning message.")
logger.error("Creating an ERROR message.")
logger.error("Creating another ERROR message.!")
Level | Numeric value |
---|---|
CRITICAL | 50 |
ERROR | 40 |
WARNING | 30 |
INFO | 20 |
DEBUG | 10 |
NOTSET | 0 |
with open("demo.log", "r") as log_file:
for line in log_file:
print(line)
Logging Conceptual Model |
Slack Critical Message Channel |
Full Log Stream |
Summary:
"The Case For Python in Scientific Computing"
Python and Scientific/Engineering |
Team Performance Data |
Player Positions |
Player Batting Info |
Player Pitching Info |
Player Fielding Info |
Player Salary Info |
import pandas as pd
df = pd.read_csv("../Data/teaminfo.csv")
df.tail(20)
import matplotlib.pyplot as plt
plt.ioff()
# Make a figure/subplot
plt.figure(figsize=(10,8))
# Histogram wins into 20 buckets
n, bins, p = plt.hist(df['wins'], 20, label="Histogram of Wins")
# Compute the mean number of wins and plot as a line.
mu = df['wins'].mean()
min = df['wins'].min()
max = df['wins'].max()
l = plt.plot([min,max], [mu,mu], label="Average Wins")
plt.legend()
plt.show()
# Make a new DataFrame with just the columns we want.
w_v_hr = pd.DataFrame(df[['home_runs','wins']])
# Make a new figure and set the size.
plt.figure(figsize=(10,8))
# Produce a scatter plot. The point color is green and the marker is a triangle.
plt.scatter(w_v_hr['home_runs'], w_v_hr['wins'], color="g", marker="^")
# Set some labels, font sizes and colors.
plt.xlabel("Homeruns", fontsize=20, color="r")
plt.ylabel("Wins", fontsize=20, color="b")
# Show the figure.
plt.show()
w_v_hr.corr()
HOW HOW TO INTERPRET A CORRELATION COEFFICIENT R
In statistics, the correlation coefficient r measures the strength and direction of a linear relationship between two variables on a scatterplot. The value of r is always between +1 and –1. To interpret its value, see which of the following values your correlation r is closest to:
df.corr()
df2 = df.drop(['year'], axis=1)
df2.corr()
print("(rows, columns) for teams is = ", df.shape)
batting_df = pd.read_csv("../Data/playerbatting.csv")
pitching_df = pd.read_csv("../Data/playerpitching.csv")
fielding_df = pd.read_csv("../Data/playerfielding.csv")
salary_df = pd.read_csv("../Data/playersalary.csv")
print("(rows, columns) for batting is = ", batting_df.shape)
print("(rows, columns) for pitching is = ", pitching_df.shape)
print("(rows, columns) for fielding is = ", fielding_df.shape)
print("(rows, columns) for salary is = ", salary_df.shape)
all_dfs = [df, batting_df, pitching_df, fielding_df, salary_df]
total_cells = sum([x.shape[0]*x.shape[1] for x in all_dfs])
print("total number of cells is = ", total_cells)
We have at least 3 tasks:
This will involve:
But first, least learn a little about Pandas and PyPlot, which we need for all 5 tasks.
((https://en.wikipedia.org/wiki/Pandas_(software)))
"pandas is a software library written for the Python programming language for data manipulation and analysis. In particular, it offers data structures and operations for manipulating numerical tables and time series. It is free software released under the three-clause BSD license.[2] The name is derived from the term "panel data", an econometrics term for data sets that include both time-series and cross-sectional data"
Library features:
Data Structure | Dimensions | Description |
---|---|---|
Series | 1 | 1D labeled homogeneous array, sizeimmutable. |
Data Frames | 2 | General 2D labeled, size-mutable tabular structure with potentially heterogeneously typed columns. |
Panel | 3 | General 3D labeled, size-mutable array. |
Pandas Data Structures |
DataFrame |
batting_df.tail(30)
df.tail(20)
teams_sorted_by_wins = df.sort_values(by=['wins'])
teams_sorted_by_wins.head(10)
teams_sorted_by_wins = df.sort_values(by=['wins', 'teamid'], ascending=False)
teams_sorted_by_wins.head(10)
teams_sorted_by_wins
is already sorted. I want ONLY National League Teams for years greater than or equal to 2000, and the team is either BOS or NYY.f1 =(df['year'] >= 2000)
f2 = (df['teamid'].isin(['BOS','NYA']))
interesting_teams = teams_sorted_by_wins[f1 & f2]
interesting_teams.head(20)
unhappy_teams = df[(df['wins'] > (df['loses'] + 20)) & (df['rank'] > 1)]
unhappy_teams.sort_values(by=['wins'], ascending=False).head(10)
just_team_year_and_wins = df[['teamid', 'year', 'wins']]
just_team_year_and_wins.head(10)
batting_df.head(10)
salary_df.head(10)
salary_df2 = pd.read_csv('../Data/playersalary.csv')
salary_df2.columns = salary_df2.columns.str.lower()
salary_df2.head(10)
batting_df2 = batting_df.set_index(['playerid', 'yearid'])
salary_df2 = salary_df2.set_index(['playerid', 'yearid'])
batting_and_salary = batting_df2.join(salary_df2)
batting_and_salary[['home_runs','salary']].head(10)
Index
"A (...) index is a data structure that improves the speed of data retrieval operations on a (...) table at the cost of additional (...) storage space to maintain the index data structure. Indexes are used to quickly locate data without having to search every row in a (...) table every time a (...) table is accessed. Indexes can be created using one or more columns of a (...) table, providing the basis for both rapid random lookups and efficient access of ordered records." (https://en.wikipedia.org/wiki/Database_index)
Binary Tree Index |
JOIN
Natural JOIN Example |
Formal Definition:
\begin{equation} x_i = f_i(p_i), \forall p \in T. \end{equation}
Next Task
We will define and train a Multi-Layer Perceptron!
Neurons and Perceptrons |