Here I write up some functions to generate probability univariate and normal probability distributions based on the book Data Science from Scratch

Libraries

import math as m

import altair as alt
import numpy as np
import pandas as pd
alt.data_transformers.disable_max_rows()
DataTransformerRegistry.enable('default')

Uniform distribution

def uniform_pdf(x: float) -> float:
    return 1 if 0 <= x < 1 else 0
def uniform_cdf(x: float) -> float:
    if x < 0: return 0
    elif x < 1 : return x
    
    else: return 1

Example values

print("x\tpdf\tcdf\n")

for x in [-2, 0, .2, .8, 1, 1.5]:
    print(f"{x}\t{uniform_pdf(x)}\t{uniform_cdf(x)}")
x	pdf	cdf

-2	0	0
0	1	0
0.2	1	0.2
0.8	1	0.8
1	0	1
1.5	0	1

For plotting we generate both cdf and pdf values in a tidy format.

x = pd.Series(np.linspace(-1, 2, 1000))

uniform = pd.DataFrame(
    {
        'x': x,
        'pdf': x.apply(uniform_pdf),
        'cdf': x.apply(uniform_cdf)
    }
).melt(id_vars='x')

uniform
x variable value
0 -1.000000 pdf 0.0
1 -0.996997 pdf 0.0
2 -0.993994 pdf 0.0
3 -0.990991 pdf 0.0
4 -0.987988 pdf 0.0
... ... ... ...
1995 1.987988 cdf 1.0
1996 1.990991 cdf 1.0
1997 1.993994 cdf 1.0
1998 1.996997 cdf 1.0
1999 2.000000 cdf 1.0

2000 rows × 3 columns

uniform.groupby('variable').describe().loc[:, ('value', slice(None))].T
variable cdf pdf
value count 1000.000000 1000.000000
mean 0.500000 0.333000
std 0.441243 0.471522
min 0.000000 0.000000
25% 0.000000 0.000000
50% 0.500000 0.000000
75% 1.000000 1.000000
max 1.000000 1.000000
chart = alt.Chart().mark_line().encode(
    alt.X('x:Q'), alt.Y('value:Q'), alt.Color('variable:N'),
)

label = alt.selection_single(
    encodings=['x'], on='mouseover', nearest=True, empty='none'
)

alt.layer(
    chart,
    chart.mark_circle().encode(opacity=alt.condition(label, alt.value(1), alt.value(0))).add_selection(label),
    alt.Chart().mark_rule(color='darkgray').encode(alt.X('x:Q')).transform_filter(label),
    chart.mark_text(align='left', dx=5, dy=-5, strokeWidth=0.5).encode(
        text=alt.Text('value:Q', format=',.4f')
    ).transform_filter(label),
    data=uniform
).properties(width=600, title='Uniform PDF and CDF')

Normal PDF

def calc_normal_pdf(x: float, mu: float = 0, sigma: float=1) -> float:
    return m.exp(-(x-mu)**2 / (2 * sigma **2)) * 1/(m.sqrt(2 * m.pi) * sigma)
x = pd.Series(np.linspace(-5, 5, 1000))

normal_pdf = pd.DataFrame(
    {
        'x': x,
        'mu=0, sigma=1': x.apply(calc_normal_pdf),
        'mu=0, sigma=2': x.apply(lambda x: calc_normal_pdf(x, 0, 2)),
        'mu=0, sigma=3': x.apply(lambda x: calc_normal_pdf(x, 0, 3))
    }
).melt(id_vars='x')

normal_pdf
x variable value
0 -5.00000 mu=0, sigma=1 0.000001
1 -4.98999 mu=0, sigma=1 0.000002
2 -4.97998 mu=0, sigma=1 0.000002
3 -4.96997 mu=0, sigma=1 0.000002
4 -4.95996 mu=0, sigma=1 0.000002
... ... ... ...
2995 4.95996 mu=0, sigma=3 0.033902
2996 4.96997 mu=0, sigma=3 0.033715
2997 4.97998 mu=0, sigma=3 0.033529
2998 4.98999 mu=0, sigma=3 0.033344
2999 5.00000 mu=0, sigma=3 0.033159

3000 rows × 3 columns

normal_pdf.groupby('variable').describe()['value'].T
variable mu=0, sigma=1 mu=0, sigma=2 mu=0, sigma=3
count 1000.000000 1000.000000 1000.000000
mean 0.099900 0.098668 0.090385
std 0.134980 0.065984 0.032457
min 0.000001 0.008764 0.033159
25% 0.000351 0.034353 0.060851
50% 0.017420 0.091182 0.093905
75% 0.181794 0.163888 0.121860
max 0.398937 0.199471 0.132981
label = alt.selection_single(
    encodings=['x'], on='mouseover', nearest=True, empty='none'
)

chart = alt.Chart().mark_line().encode(
    alt.X('x:Q'), alt.Y('value:Q'), alt.Color('variable:N')
)

alt.layer(
    chart,

    chart.mark_circle().encode(opacity=alt.condition(label, alt.value(1), alt.value(0))).add_selection(label),
    
    alt.Chart().mark_rule(color='darkgray').encode(alt.X('x:Q')).transform_filter(label),
    
    chart.mark_text(align='left', dx=5, dy=-5).encode(text=alt.Text('value:Q', format=',.6f')).transform_filter(label),
    # tooltip=alt.Tooltip('value:Q'),
    data=normal_pdf
).properties(width=600, title="Normal PDF")

Normal CDF

Finally, we also calculate and plot the CDF or normal distribution.

def calc_normal_cdf(x: float, mu: float = 0, sigma: float = 1) -> float:
    return (1 + m.erf((x - mu) / m.sqrt(2) / sigma)) /2
normal_cdf = pd.DataFrame(
    {
        'x': x,
        'mu=0, sigma=1': x.apply(calc_normal_cdf),
        'mu=0, sigma=2': x.apply(lambda x: calc_normal_cdf(x, 0, 2)),
        'mu=0, sigma=3': x.apply(lambda x: calc_normal_cdf(x, 0, 3)),        
    }
).melt(id_vars='x')

normal_cdf
x variable value
0 -5.00000 mu=0, sigma=1 2.866516e-07
1 -4.98999 mu=0, sigma=1 3.019121e-07
2 -4.97998 mu=0, sigma=1 3.179543e-07
3 -4.96997 mu=0, sigma=1 3.348164e-07
4 -4.95996 mu=0, sigma=1 3.525386e-07
... ... ... ...
2995 4.95996 mu=0, sigma=3 9.508671e-01
2996 4.96997 mu=0, sigma=3 9.512055e-01
2997 4.97998 mu=0, sigma=3 9.515421e-01
2998 4.98999 mu=0, sigma=3 9.518768e-01
2999 5.00000 mu=0, sigma=3 9.522096e-01

3000 rows × 3 columns

normal_cdf.describe()['value'].T
count    3.000000e+03
mean     5.000000e-01
std      3.760737e-01
min      2.866516e-07
25%      1.055739e-01
50%      5.000000e-01
75%      8.944261e-01
max      9.999997e-01
Name: value, dtype: float64
label = alt.selection_single(
    encodings=['x'], on='mouseover', nearest=True, empty='none'
)

chart = alt.Chart().mark_line().encode(
    alt.X('x:Q'), alt.Y('value:Q'), alt.Color('variable:N')
)

alt.layer(
    chart,

    chart.mark_circle().encode(opacity=alt.condition(label, alt.value(1), alt.value(0))).add_selection(label),
    
    alt.Chart().mark_rule(color='darkgray').encode(alt.X('x:Q')).transform_filter(label),
    
    chart.mark_text(align='left', dx=5, dy=-5).encode(text=alt.Text('value:Q', format=',.6f')).transform_filter(label),
    # tooltip=alt.Tooltip('value:Q'),
    data=normal_cdf
).properties(width=600, title="Normal CDFs")