Skip to content

Instantly share code, notes, and snippets.

@brentp
Last active September 17, 2024 14:48
Show Gist options
  • Save brentp/089c7d6d69d78d26437f to your computer and use it in GitHub Desktop.
Save brentp/089c7d6d69d78d26437f to your computer and use it in GitHub Desktop.

Revisions

  1. brentp revised this gist Oct 4, 2014. 1 changed file with 10 additions and 7 deletions.
    17 changes: 10 additions & 7 deletions betareg.py
    Original file line number Diff line number Diff line change
    @@ -96,18 +96,21 @@ def __init__(self, endog, exog, Z=None, link=Logit(),
    :ref:`links`
    """.format(example=_init_example)

    assert np.all((0 < endog) & (endog < 1))
    if Z is None:
    extra_names = ['phi']
    Z = np.ones((len(endog), 1), dtype='f')
    else:
    extra_names = ['precision-%s' % zc for zc in \
    (Z.columns if hasattr(Z, 'columns') else range(1, Z.shape[1] + 1))]
    kwds['extra_params_names'] = extra_names

    super(Beta, self).__init__(endog, exog, **kwds)
    self.link = link
    self.link_phi = link_phi

    if Z is None:
    self.Z = np.ones((self.endog.shape[0], 1), dtype='f')
    else:
    self.Z = np.asarray(Z)
    assert len(self.Z) == len(self.endog)
    self.Z = Z
    assert len(self.Z) == len(self.endog)

    def nloglikeobs(self, params):
    """
    @@ -182,7 +185,7 @@ def _ll_br(self, y, X, Z, params):
    #print GLM.from_formula('iyield ~ C(batch) + temp', dat, family=Binomial()).fit().summary()

    dev = pd.read_csv('methylation-test.csv')
    Z = patsy.dmatrix('~ age', dev)
    Z = patsy.dmatrix('~ age', dev, return_type='dataframe')
    m = Beta.from_formula('methylation ~ gender + CpG', dev,
    Z=Z,
    link_phi=sm.families.links.identity())
  2. brentp revised this gist Oct 4, 2014. 2 changed files with 23 additions and 9 deletions.
    4 changes: 3 additions & 1 deletion betareg.py
    Original file line number Diff line number Diff line change
    @@ -182,6 +182,8 @@ def _ll_br(self, y, X, Z, params):
    #print GLM.from_formula('iyield ~ C(batch) + temp', dat, family=Binomial()).fit().summary()

    dev = pd.read_csv('methylation-test.csv')
    m = Beta.from_formula('methylation ~ age + gender', dev,
    Z = patsy.dmatrix('~ age', dev)
    m = Beta.from_formula('methylation ~ gender + CpG', dev,
    Z=Z,
    link_phi=sm.families.links.identity())
    print m.fit().summary()
    28 changes: 20 additions & 8 deletions test_beta.py
    Original file line number Diff line number Diff line change
    @@ -1,6 +1,7 @@

    # in R:
    import io
    import statsmodels.api as sm
    import pandas as pd
    import patsy
    from betareg import Beta
    @@ -20,14 +21,14 @@

    _methylation_estimates_mean = u"""\
    varname Estimate StdError zvalue Pr(>|z|)
    (Intercept) 1.44224 0.03401 42.404 <2e-16
    (Intercept) 1.44224 0.03401 42.404 2e-16
    genderM 0.06986 0.04359 1.603 0.109
    CpGCpG_1 0.60735 0.04834 12.563 <2e-16
    CpGCpG_2 0.97355 0.05311 18.331 <2e-16"""
    CpGCpG_1 0.60735 0.04834 12.563 2e-16
    CpGCpG_2 0.97355 0.05311 18.331 2e-16"""

    _methylation_estimates_precision = u"""\
    varname Estimate StdError zvalue Pr(>|z|)
    (Intercept) 8.22829 1.79098 4.594 4.34e-06 ***
    (Intercept) 8.22829 1.79098 4.594 4.34e-06
    age -0.03471 0.03276 -1.059 0.289"""


    @@ -41,7 +42,8 @@
    methylation = pd.read_csv('methylation-test.csv')

    def check_same(a, b, eps, name):
    assert np.allclose(a, b, atol=eps), ("different from expected", name, list(a), list(b))
    assert np.allclose(a, b, rtol=0.01, atol=eps), \
    ("different from expected", name, list(a), list(b))


    def test_income_coefficients():
    @@ -69,8 +71,18 @@ def test_methylation_coefficients():
    model = "methylation ~ gender + CpG"
    Z = patsy.dmatrix("~ age", methylation)

    mod = Beta.from_formula(model, methylation, Z=Z)
    mod = Beta.from_formula(model, methylation, Z=Z, link_phi=sm.families.links.identity())
    rslt = mod.fit()
    yield check_same, rslt.params[:-2], expected_methylation_mean['Estimate'], 1e-2, "estimates"
    #yield check_same, rslt.tvalues[:-2], expected_methylation_mean['zvalue'], 0.1, "z-scores"
    #yield check_same, rslt.pvalues[:-2], expected_methylation_mean['Pr(>|z|)'], 1e-3, "p-values"
    yield check_same, rslt.tvalues[:-2], expected_methylation_mean['zvalue'], 0.1, "z-scores"
    yield check_same, rslt.pvalues[:-2], expected_methylation_mean['Pr(>|z|)'], 1e-2, "p-values"

    def test_methylation_precision():
    model = "methylation ~ gender + CpG"
    Z = patsy.dmatrix("~ age", methylation)

    mod = Beta.from_formula(model, methylation, Z=Z, link_phi=sm.families.links.identity())
    rslt = mod.fit()
    #yield check_same, sm.families.links.logit()(rslt.params[-2:]), expected_methylation_precision['Estimate'], 1e-3, "estimate"
    #yield check_same, rslt.tvalues[-2:], expected_methylation_precision['zvalue'], 0.1, "z-score"

  3. brentp revised this gist Oct 4, 2014. 2 changed files with 30 additions and 5 deletions.
    1 change: 0 additions & 1 deletion ex.R
    Original file line number Diff line number Diff line change
    @@ -17,7 +17,6 @@ data("FoodExpenditure", package = "betareg")


    meth = read.csv('methylation-test.csv')
    meth$methylation = 1 / (1 + exp(-meth$methylation))
    m = betareg(methylation ~ gender + CpG | age, meth)
    print(summary(m))

    34 changes: 30 additions & 4 deletions test_beta.py
    Original file line number Diff line number Diff line change
    @@ -2,6 +2,7 @@
    # in R:
    import io
    import pandas as pd
    import patsy
    from betareg import Beta
    import numpy as np

    @@ -37,14 +38,39 @@
    expected_methylation_precision = pd.read_table(io.StringIO(_methylation_estimates_precision), sep="\s+")

    income = pd.read_csv('foodexpenditure.csv')
    methylation = pd.read_csv('methylation-test.csv')

    def check_same(a, b, eps, name):
    assert np.allclose(a, b, atol=eps), ("different from expected", name, list(a), list(b))


    def test_income_coefficients():
    model = "I(food/income) ~ income + persons"

    mod = Beta.from_formula(model, income)
    rslt = mod.fit()
    assert np.allclose(rslt.params[:-1], expected_income_mean['Estimate'])
    print rslt.tvalues
    print expected_income_mean['zvalue']
    assert np.allclose(rslt.tvalues[:-1], expected_income_mean['zvalue'], rtol=0.1)
    yield check_same, rslt.params[:-1], expected_income_mean['Estimate'], 1e-3, "estimates"
    yield check_same, rslt.tvalues[:-1], expected_income_mean['zvalue'], 0.1, "z-scores"
    yield check_same, rslt.pvalues[:-1], expected_income_mean['Pr(>|z|)'], 1e-3, "p-values"


    def test_income_precision():
    model = "I(food/income) ~ income + persons"

    mod = Beta.from_formula(model, income)
    rslt = mod.fit()
    # note that we have to exp the phi results for now.
    yield check_same, np.exp(rslt.params[-1:]), expected_income_precision['Estimate'], 1e-3, "estimate"
    #yield check_same, rslt.tvalues[-1:], expected_income_precision['zvalue'], 0.1, "z-score"
    yield check_same, rslt.pvalues[-1:], expected_income_precision['Pr(>|z|)'], 1e-3, "p-values"


    def test_methylation_coefficients():
    model = "methylation ~ gender + CpG"
    Z = patsy.dmatrix("~ age", methylation)

    mod = Beta.from_formula(model, methylation, Z=Z)
    rslt = mod.fit()
    yield check_same, rslt.params[:-2], expected_methylation_mean['Estimate'], 1e-2, "estimates"
    #yield check_same, rslt.tvalues[:-2], expected_methylation_mean['zvalue'], 0.1, "z-scores"
    #yield check_same, rslt.pvalues[:-2], expected_methylation_mean['Pr(>|z|)'], 1e-3, "p-values"
  4. brentp revised this gist Oct 4, 2014. 4 changed files with 96 additions and 42 deletions.
    1 change: 0 additions & 1 deletion betareg.py
    Original file line number Diff line number Diff line change
    @@ -182,7 +182,6 @@ def _ll_br(self, y, X, Z, params):
    #print GLM.from_formula('iyield ~ C(batch) + temp', dat, family=Binomial()).fit().summary()

    dev = pd.read_csv('methylation-test.csv')
    dev['methylation'] = sm.families.links.Logit().inverse(dev['methylation'])
    m = Beta.from_formula('methylation ~ age + gender', dev,
    link_phi=sm.families.links.identity())
    print m.fit().summary()
    13 changes: 9 additions & 4 deletions ex.R
    Original file line number Diff line number Diff line change
    @@ -3,16 +3,21 @@ library(betareg)
    data("GasolineYield", package = "betareg")
    data("FoodExpenditure", package = "betareg")

    fe_beta = betareg(I(food/income) ~ income + persons, data = FoodExpenditure)
    print(summary(fe_beta))
    #fe_beta = betareg(I(food/income) ~ income + persons, data = FoodExpenditure)
    #print(summary(fe_beta)$coefficients)


    m = betareg(yield ~ batch + temp, data = GasolineYield)
    #m = betareg(yield ~ batch + temp, data = GasolineYield)
    #print(summary(m))



    gy2 <- betareg(yield ~ batch + temp | temp, data = GasolineYield, link.phi="identity")
    #gy2 <- betareg(yield ~ batch + temp | temp, data = GasolineYield, link.phi="identity")
    #print(summary(gy2))


    meth = read.csv('methylation-test.csv')
    meth$methylation = 1 / (1 + exp(-meth$methylation))
    m = betareg(methylation ~ gender + CpG | age, meth)
    print(summary(m))

    74 changes: 37 additions & 37 deletions methylation-test.csv
    Original file line number Diff line number Diff line change
    @@ -1,37 +1,37 @@
    ,gender,age,Basename,ID,id,plate,CpG,methylation
    0,F,58.231,6042324088_R04C01,age58.231_F,id_0,6042324088,CpG_0,1.4828322881625375
    1,M,52.632,6042324088_R06C01,age52.632_M,id_1,6042324088,CpG_0,1.4051509852087734
    2,M,64.679,6042324088_R01C01,age64.679_M,id_2,6042324088,CpG_0,1.4051509852087734
    3,F,55.299,6042324088_R04C02,age55.299_F,id_3,6042324088,CpG_0,1.4370666864933141
    4,M,56.019,6042324088_R02C01,age56.019_M,id_4,6042324088,CpG_0,1.7743677265161857
    5,M,62.021,6042324088_R01C02,age62.021_M,id_5,6042324088,CpG_0,1.4696224926932235
    6,F,52.298,6042324088_R06C02,age52.298_F,id_6,6042324088,CpG_0,1.489478597355121
    7,F,39.71,6042324088_R03C01,age39.71_F,id_7,6042324088,CpG_0,1.564513100525912
    8,F,57.492,6042324088_R05C02,age57.492_F,id_8,6042324088,CpG_0,1.5785565986326349
    9,F,57.624,6042324088_R05C01,age57.624_F,id_9,6042324088,CpG_0,1.242506468328179
    10,F,40.487,6042324088_R03C02,age40.487_F,id_10,6042324088,CpG_0,1.3009807774073552
    11,M,53.662,6042324088_R02C02,age53.662_M,id_11,6042324088,CpG_0,1.5299568447640945
    12,F,58.231,6042324088_R04C01,age58.231_F,id_0,6042324088,CpG_1,2.100996545241666
    13,M,52.632,6042324088_R06C01,age52.632_M,id_1,6042324088,CpG_1,2.1322666810614472
    14,M,64.679,6042324088_R01C01,age64.679_M,id_2,6042324088,CpG_1,2.1322666810614472
    15,F,55.299,6042324088_R04C02,age55.299_F,id_3,6042324088,CpG_1,1.8921458020642403
    16,M,56.019,6042324088_R02C01,age56.019_M,id_4,6042324088,CpG_1,2.3634832752006427
    17,M,62.021,6042324088_R01C02,age62.021_M,id_5,6042324088,CpG_1,2.0805670342015703
    18,F,52.298,6042324088_R06C02,age52.298_F,id_6,6042324088,CpG_1,2.0406555166446796
    19,F,39.71,6042324088_R03C01,age39.71_F,id_7,6042324088,CpG_1,2.175197255017929
    20,F,57.492,6042324088_R05C02,age57.492_F,id_8,6042324088,CpG_1,2.153549513833558
    21,F,57.624,6042324088_R05C01,age57.624_F,id_9,6042324088,CpG_1,1.815289966638249
    22,F,40.487,6042324088_R03C02,age40.487_F,id_10,6042324088,CpG_1,2.060457163597239
    23,M,53.662,6042324088_R02C02,age53.662_M,id_11,6042324088,CpG_1,1.9924301646902063
    24,F,58.231,6042324088_R04C01,age58.231_F,id_0,6042324088,CpG_2,2.682732393117921
    25,M,52.632,6042324088_R06C01,age52.632_M,id_1,6042324088,CpG_2,2.3508277619403852
    26,M,64.679,6042324088_R01C01,age64.679_M,id_2,6042324088,CpG_2,2.1972245773362196
    27,F,55.299,6042324088_R04C02,age55.299_F,id_3,6042324088,CpG_2,2.3383031755961254
    28,M,56.019,6042324088_R02C01,age56.019_M,id_4,6042324088,CpG_2,2.666159259393051
    29,M,62.021,6042324088_R01C02,age62.021_M,id_5,6042324088,CpG_2,2.556365613770146
    30,F,52.298,6042324088_R06C02,age52.298_F,id_6,6042324088,CpG_2,2.376272808785205
    31,F,39.71,6042324088_R03C01,age39.71_F,id_7,6042324088,CpG_2,2.3891995658308174
    32,F,57.492,6042324088_R05C02,age57.492_F,id_8,6042324088,CpG_2,2.571428861772524
    33,F,57.624,6042324088_R05C01,age57.624_F,id_9,6042324088,CpG_2,2.442347035369205
    34,F,40.487,6042324088_R03C02,age40.487_F,id_10,6042324088,CpG_2,2.3891995658308174
    35,M,53.662,6042324088_R02C02,age53.662_M,id_11,6042324088,CpG_2,2.52680914144201
    ,Unnamed: 0,gender,age,Basename,ID,id,plate,CpG,methylation
    0,0,F,58.231,6042324088_R04C01,age58.231_F,id_0,6042324088,CpG_0,0.815
    1,1,M,52.632,6042324088_R06C01,age52.632_M,id_1,6042324088,CpG_0,0.803
    2,2,M,64.679,6042324088_R01C01,age64.679_M,id_2,6042324088,CpG_0,0.803
    3,3,F,55.299,6042324088_R04C02,age55.299_F,id_3,6042324088,CpG_0,0.808
    4,4,M,56.019,6042324088_R02C01,age56.019_M,id_4,6042324088,CpG_0,0.8550000000000001
    5,5,M,62.021,6042324088_R01C02,age62.021_M,id_5,6042324088,CpG_0,0.8129999999999998
    6,6,F,52.298,6042324088_R06C02,age52.298_F,id_6,6042324088,CpG_0,0.816
    7,7,F,39.71,6042324088_R03C01,age39.71_F,id_7,6042324088,CpG_0,0.827
    8,8,F,57.492,6042324088_R05C02,age57.492_F,id_8,6042324088,CpG_0,0.829
    9,9,F,57.623999999999995,6042324088_R05C01,age57.624_F,id_9,6042324088,CpG_0,0.7760000000000001
    10,10,F,40.486999999999995,6042324088_R03C02,age40.487_F,id_10,6042324088,CpG_0,0.7859999999999999
    11,11,M,53.662,6042324088_R02C02,age53.662_M,id_11,6042324088,CpG_0,0.822
    12,12,F,58.231,6042324088_R04C01,age58.231_F,id_0,6042324088,CpG_1,0.891
    13,13,M,52.632,6042324088_R06C01,age52.632_M,id_1,6042324088,CpG_1,0.894
    14,14,M,64.679,6042324088_R01C01,age64.679_M,id_2,6042324088,CpG_1,0.894
    15,15,F,55.299,6042324088_R04C02,age55.299_F,id_3,6042324088,CpG_1,0.869
    16,16,M,56.019,6042324088_R02C01,age56.019_M,id_4,6042324088,CpG_1,0.914
    17,17,M,62.021,6042324088_R01C02,age62.021_M,id_5,6042324088,CpG_1,0.889
    18,18,F,52.298,6042324088_R06C02,age52.298_F,id_6,6042324088,CpG_1,0.8850000000000001
    19,19,F,39.71,6042324088_R03C01,age39.71_F,id_7,6042324088,CpG_1,0.898
    20,20,F,57.492,6042324088_R05C02,age57.492_F,id_8,6042324088,CpG_1,0.896
    21,21,F,57.623999999999995,6042324088_R05C01,age57.624_F,id_9,6042324088,CpG_1,0.86
    22,22,F,40.486999999999995,6042324088_R03C02,age40.487_F,id_10,6042324088,CpG_1,0.887
    23,23,M,53.662,6042324088_R02C02,age53.662_M,id_11,6042324088,CpG_1,0.8800000000000001
    24,24,F,58.231,6042324088_R04C01,age58.231_F,id_0,6042324088,CpG_2,0.936
    25,25,M,52.632,6042324088_R06C01,age52.632_M,id_1,6042324088,CpG_2,0.9129999999999999
    26,26,M,64.679,6042324088_R01C01,age64.679_M,id_2,6042324088,CpG_2,0.9000000000000001
    27,27,F,55.299,6042324088_R04C02,age55.299_F,id_3,6042324088,CpG_2,0.9119999999999999
    28,28,M,56.019,6042324088_R02C01,age56.019_M,id_4,6042324088,CpG_2,0.9349999999999999
    29,29,M,62.021,6042324088_R01C02,age62.021_M,id_5,6042324088,CpG_2,0.9280000000000002
    30,30,F,52.298,6042324088_R06C02,age52.298_F,id_6,6042324088,CpG_2,0.9150000000000001
    31,31,F,39.71,6042324088_R03C01,age39.71_F,id_7,6042324088,CpG_2,0.9160000000000001
    32,32,F,57.492,6042324088_R05C02,age57.492_F,id_8,6042324088,CpG_2,0.929
    33,33,F,57.623999999999995,6042324088_R05C01,age57.624_F,id_9,6042324088,CpG_2,0.92
    34,34,F,40.486999999999995,6042324088_R03C02,age40.487_F,id_10,6042324088,CpG_2,0.9160000000000001
    35,35,M,53.662,6042324088_R02C02,age53.662_M,id_11,6042324088,CpG_2,0.926
    50 changes: 50 additions & 0 deletions test_beta.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,50 @@

    # in R:
    import io
    import pandas as pd
    from betareg import Beta
    import numpy as np

    # betareg(I(food/income) ~ income + persons, data = FoodExpenditure)
    _income_estimates_mean = u"""\
    varname Estimate StdError zvalue Pr(>|z|)
    (Intercept) -0.62254806 0.223853539 -2.781051 5.418326e-03
    income -0.01229884 0.003035585 -4.051556 5.087819e-05
    persons 0.11846210 0.035340667 3.352005 8.022853e-04"""

    _income_estimates_precision = u"""\
    varname Estimate StdError zvalue Pr(>|z|)
    (phi) 35.60975 8.079598 4.407366 1.046351e-05
    """

    _methylation_estimates_mean = u"""\
    varname Estimate StdError zvalue Pr(>|z|)
    (Intercept) 1.44224 0.03401 42.404 <2e-16
    genderM 0.06986 0.04359 1.603 0.109
    CpGCpG_1 0.60735 0.04834 12.563 <2e-16
    CpGCpG_2 0.97355 0.05311 18.331 <2e-16"""

    _methylation_estimates_precision = u"""\
    varname Estimate StdError zvalue Pr(>|z|)
    (Intercept) 8.22829 1.79098 4.594 4.34e-06 ***
    age -0.03471 0.03276 -1.059 0.289"""


    expected_income_mean = pd.read_table(io.StringIO(_income_estimates_mean), sep="\s+")
    expected_income_precision = pd.read_table(io.StringIO(_income_estimates_precision), sep="\s+")

    expected_methylation_mean = pd.read_table(io.StringIO(_methylation_estimates_mean), sep="\s+")
    expected_methylation_precision = pd.read_table(io.StringIO(_methylation_estimates_precision), sep="\s+")

    income = pd.read_csv('foodexpenditure.csv')

    def test_income_coefficients():
    model = "I(food/income) ~ income + persons"

    mod = Beta.from_formula(model, income)
    rslt = mod.fit()
    assert np.allclose(rslt.params[:-1], expected_income_mean['Estimate'])
    print rslt.tvalues
    print expected_income_mean['zvalue']
    assert np.allclose(rslt.tvalues[:-1], expected_income_mean['zvalue'], rtol=0.1)

  5. brentp revised this gist Oct 4, 2014. 1 changed file with 6 additions and 2 deletions.
    8 changes: 6 additions & 2 deletions betareg.py
    Original file line number Diff line number Diff line change
    @@ -5,9 +5,13 @@
    References
    ----------
    Grün, Bettina, Ioannis Kosmidis, and Achim Zeileis. Extended beta regression in R: Shaken, stirred, mixed, and partitioned. No. 2011-22. Working Papers in Economics and Statistics, 2011.
    Grün, Bettina, Ioannis Kosmidis, and Achim Zeileis. Extended beta regression
    in R: Shaken, stirred, mixed, and partitioned. No. 2011-22. Working Papers in
    Economics and Statistics, 2011.
    Smithson, Michael, and Jay Verkuilen. "A better lemon squeezer? Maximum-likelihood regression with beta-distributed dependent variables." Psychological methods 11.1 (2006): 54.
    Smithson, Michael, and Jay Verkuilen. "A better lemon squeezer?
    Maximum-likelihood regression with beta-distributed dependent variables."
    Psychological methods 11.1 (2006): 54.
    """
    import numpy as np
    import pandas as pd
  6. brentp revised this gist Oct 4, 2014. 1 changed file with 6 additions and 5 deletions.
    11 changes: 6 additions & 5 deletions betareg.py
    Original file line number Diff line number Diff line change
    @@ -10,12 +10,13 @@
    Smithson, Michael, and Jay Verkuilen. "A better lemon squeezer? Maximum-likelihood regression with beta-distributed dependent variables." Psychological methods 11.1 (2006): 54.
    """
    import numpy as np
    import pandas as pd
    import statsmodels.api as sm

    from scipy.special import gammaln as lgamma

    from statsmodels.base.model import GenericLikelihoodModel
    from statsmodels.api import GLM
    from statsmodels.genmod.families import Binomial
    import statsmodels.api as sm
    import pandas as pd

    # this is only need while #2024 is open.
    class Logit(sm.families.links.Logit):
    @@ -135,8 +136,8 @@ def fit(self, start_params=None, maxiter=100000, maxfun=5000, disp=False,
    """

    if start_params is None:
    start_params = GLM(self.endog, self.exog, family=Binomial()
    ).fit(disp=False).params
    start_params = sm.GLM(self.endog, self.exog, family=Binomial()
    ).fit(disp=False).params
    start_params = np.append(start_params, [0.5] * self.Z.shape[1])

    return super(Beta, self).fit(start_params=start_params,
  7. brentp revised this gist Oct 4, 2014. 1 changed file with 32 additions and 1 deletion.
    33 changes: 32 additions & 1 deletion betareg.py
    Original file line number Diff line number Diff line change
    @@ -19,19 +19,44 @@

    # this is only need while #2024 is open.
    class Logit(sm.families.links.Logit):

    """Logit tranform that won't overflow with large numbers."""

    def inverse(self, z):
    return 1 / (1. + np.exp(-z))

    _init_example = """
    Beta regression with default of logit-link for exog and log-link
    for precision.
    >>> mod = Beta(endog, exog)
    >>> rslt = mod.fit()
    >>> print rslt.summary()
    We can also specify a formula and a specific structure and use the
    identity-link for phi.
    >>> from sm.families.links import identity
    >>> Z = patsy.dmatrix('~ temp', dat, return_type='dataframe')
    >>> mod = Beta.from_formula('iyield ~ C(batch, Treatment(10)) + temp',
    ... dat, Z=Z, link_phi=identity())
    In the case of proportion-data, we may think that the precision depends on
    the number of measurements. E.g for sequence data, on the number of
    sequence reads covering a site:
    >>> Z = patsy.dmatrix('~ coverage', df)
    >>> mod = Beta.from_formula('methylation ~ disease + age + gender + coverage', df, Z)
    >>> rslt = mod.fit()
    """

    class Beta(GenericLikelihoodModel):

    """Beta Regression.
    This implementation uses $\phi\$ as a precision parameter equal to
    This implementation uses `phi` as a precision parameter equal to
    `a + b` from the Beta parameters.
    """

    @@ -60,6 +85,11 @@ def __init__(self, endog, exog, Z=None, link=Logit(),
    Examples
    --------
    {example}
    See Also
    --------
    :ref:`links`
    """.format(example=_init_example)

    assert np.all((0 < endog) & (endog < 1))
    @@ -121,6 +151,7 @@ def _ll_br(self, y, X, Z, params):

    mu = self.link.inverse(np.dot(X, Xparams))
    phi = self.link_phi.inverse(np.dot(Z, Zparams))
    # TODO: derive a and b and constrain to > 0?

    if np.any(phi <= np.finfo(float).eps): return np.array(-np.inf)

  8. brentp revised this gist Oct 4, 2014. 1 changed file with 75 additions and 13 deletions.
    88 changes: 75 additions & 13 deletions betareg.py
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,13 @@
    """
    Beta Regression.
    # -*- coding: utf-8 -*-

    u"""
    Beta regression for modeling rates and proportions.
    References
    ----------
    Grün, Bettina, Ioannis Kosmidis, and Achim Zeileis. Extended beta regression in R: Shaken, stirred, mixed, and partitioned. No. 2011-22. Working Papers in Economics and Statistics, 2011.
    Smithson, Michael, and Jay Verkuilen. "A better lemon squeezer? Maximum-likelihood regression with beta-distributed dependent variables." Psychological methods 11.1 (2006): 54.
    """
    import numpy as np
    from scipy.special import gammaln as lgamma
    @@ -9,24 +17,54 @@
    import statsmodels.api as sm
    import pandas as pd

    #see http://cran.r-project.org/web/packages/betareg/vignettes/betareg-ext.pdf
    # nice reference:
    # http://psychology3.anu.edu.au/people/smithson/details/Pubs/Smithson_Verkuilen06.pdf

    # this is only need while #2024 is open.
    class Logit(sm.families.links.Logit):
    """Logit tranform that won't overflow with large numbers."""
    def inverse(self, z):
    return 1 / (1. + np.exp(-z))

    class BetaReg(GenericLikelihoodModel):
    _init_example = """
    """

    class Beta(GenericLikelihoodModel):

    """Beta Regression.
    """Beta Regression."""
    This implementation uses $\phi\$ as a precision parameter equal to
    `a + b` from the Beta parameters.
    """

    def __init__(self, endog, exog, Z=None, link=Logit(),
    link_phi=sm.families.links.Log(), **kwds):
    """
    Parameters
    ----------
    endog : array-like
    1d array of endogenous values (i.e. responses, outcomes,
    dependent variables, or 'Y' values).
    exog : array-like
    2d array of exogeneous values (i.e. covariates, predictors,
    independent variables, regressors, or 'X' values). A nobs x k
    array where `nobs` is the number of observations and `k` is
    the number of regressors. An intercept is not included by
    default and should be added by the user. See
    `statsmodels.tools.add_constant`.
    Z : array-like
    2d array of variables for the precision phi.
    link : link
    Any link in sm.families.links for `exog`
    link_phi : link
    Any link in sm.families.links for `Z`
    Examples
    --------
    {example}
    """.format(example=_init_example)

    assert np.all((0 < endog) & (endog < 1))

    super(BetaReg, self).__init__(endog, exog, **kwds)
    super(Beta, self).__init__(endog, exog, **kwds)
    self.link = link
    self.link_phi = link_phi

    @@ -37,17 +75,41 @@ def __init__(self, endog, exog, Z=None, link=Logit(),
    assert len(self.Z) == len(self.endog)

    def nloglikeobs(self, params):
    """
    Negative log-likelihood.
    Parameters
    ----------
    params : np.ndarray
    Parameter estimates
    """
    return -self._ll_br(self.endog, self.exog, self.Z, params)

    def fit(self, start_params=None, maxiter=100000, maxfun=5000, disp=False,
    method='bfgs', **kwds):
    """
    Fit the model.
    Parameters
    ----------
    start_params : array-like
    A vector of starting values for the regression
    coefficients. If None, a default is chosen.
    maxiter : integer
    The maximum number of iterations
    disp : bool
    Show convergence stats.
    method : str
    The optimization method to use.
    """

    if start_params is None:
    start_params = GLM(self.endog, self.exog, family=Binomial()
    ).fit(disp=False).params
    start_params = np.append(start_params, [0.5] * self.Z.shape[1])

    return super(BetaReg, self).fit(start_params=start_params,
    return super(Beta, self).fit(start_params=start_params,
    maxiter=maxiter, maxfun=maxfun,
    method=method, disp=disp, **kwds)

    @@ -74,17 +136,17 @@ def _ll_br(self, y, X, Z, params):
    dat = pd.read_table('gasoline.txt')
    Z = patsy.dmatrix('~ temp', dat, return_type='dataframe')
    # using other precison params with
    m = BetaReg.from_formula('iyield ~ C(batch, Treatment(10)) + temp', dat,
    m = Beta.from_formula('iyield ~ C(batch, Treatment(10)) + temp', dat,
    Z=Z, link_phi=sm.families.links.identity())
    print m.fit().summary()

    fex = pd.read_csv('foodexpenditure.csv')
    m = BetaReg.from_formula(' I(food/income) ~ income + persons', fex)
    m = Beta.from_formula(' I(food/income) ~ income + persons', fex)
    print m.fit().summary()
    #print GLM.from_formula('iyield ~ C(batch) + temp', dat, family=Binomial()).fit().summary()

    dev = pd.read_csv('methylation-test.csv')
    dev['methylation'] = sm.families.links.Logit().inverse(dev['methylation'])
    m = BetaReg.from_formula('methylation ~ age + gender', dev,
    m = Beta.from_formula('methylation ~ age + gender', dev,
    link_phi=sm.families.links.identity())
    print m.fit().summary()
  9. brentp revised this gist Oct 4, 2014. 1 changed file with 4 additions and 7 deletions.
    11 changes: 4 additions & 7 deletions betareg.py
    Original file line number Diff line number Diff line change
    @@ -39,20 +39,17 @@ def __init__(self, endog, exog, Z=None, link=Logit(),
    def nloglikeobs(self, params):
    return -self._ll_br(self.endog, self.exog, self.Z, params)

    def fit(self, start_params=None, maxiter=1000000, maxfun=50000,
    disp=False,
    def fit(self, start_params=None, maxiter=100000, maxfun=5000, disp=False,
    method='bfgs', **kwds):

    if start_params is None:
    start_params = GLM(self.endog, self.exog, family=Binomial()
    ).fit(disp=False).params
    start_params = np.append(start_params, [0.5] * self.Z.shape[1])

    return super(BetaReg, self).fit(start_params=start_params,
    maxiter=maxiter,
    maxfun=maxfun,
    method=method,
    disp=disp,
    **kwds)
    maxiter=maxiter, maxfun=maxfun,
    method=method, disp=disp, **kwds)

    def _ll_br(self, y, X, Z, params):
    nz = self.Z.shape[1]
  10. brentp revised this gist Oct 4, 2014. 1 changed file with 19 additions and 11 deletions.
    30 changes: 19 additions & 11 deletions betareg.py
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,6 @@
    """
    Beta Regression.
    """
    import numpy as np
    from scipy.special import gammaln as lgamma
    from statsmodels.base.model import GenericLikelihoodModel
    @@ -11,23 +14,28 @@
    # http://psychology3.anu.edu.au/people/smithson/details/Pubs/Smithson_Verkuilen06.pdf

    class Logit(sm.families.links.Logit):
    """Logit tranform that won't overflow with large numbers."""
    def inverse(self, z):
    return 1 / (1. + np.exp(-z))

    class BetaReg(GenericLikelihoodModel):

    """Beta Regression."""

    def __init__(self, endog, exog, Z=None, link=Logit(),
    link_phi=sm.families.links.Log(), **kwds):
    assert np.all((0 < endog) & (endog < 1))

    super(BetaReg, self).__init__(endog, exog, **kwds)
    self.link = link
    self.link_phi = link_phi
    # how to set default Z?

    if Z is None:
    self.Z = np.ones((self.endog.shape[0], 1), dtype='f')
    else:
    self.Z = np.asarray(Z)
    assert len(self.Z) == len(self.endog)

    def nloglikeobs(self, params):
    return -self._ll_br(self.endog, self.exog, self.Z, params)

    @@ -38,15 +46,14 @@ def fit(self, start_params=None, maxiter=1000000, maxfun=50000,
    start_params = GLM(self.endog, self.exog, family=Binomial()
    ).fit(disp=False).params
    start_params = np.append(start_params, [0.5] * self.Z.shape[1])
    #start_params = np.append(np.zeros(self.exog.shape[1]), 0.5)
    #self.exog[0] = np.mean(self.endog)

    return super(BetaReg, self).fit(start_params=start_params,
    maxiter=maxiter,
    maxfun=maxfun,
    method=method,
    disp=disp,
    **kwds)
    maxiter=maxiter,
    maxfun=maxfun,
    method=method,
    disp=disp,
    **kwds)

    def _ll_br(self, y, X, Z, params):
    nz = self.Z.shape[1]

    @@ -56,15 +63,16 @@ def _ll_br(self, y, X, Z, params):
    mu = self.link.inverse(np.dot(X, Xparams))
    phi = self.link_phi.inverse(np.dot(Z, Zparams))

    if np.any(phi <= np.finfo(float).eps): return np.array(-inf)
    if np.any(phi <= np.finfo(float).eps): return np.array(-np.inf)

    ll = lgamma(phi) - lgamma(mu * phi) - lgamma((1 - mu) * phi) \
    + (mu * phi - 1) * np.log(y) + (((1 - mu) * phi) - 1) * np.log(1 - y)
    + (mu * phi - 1) * np.log(y) + (((1 - mu) * phi) - 1) \
    * np.log(1 - y)

    return ll

    if __name__ == "__main__":

    import pandas as pd
    import patsy
    dat = pd.read_table('gasoline.txt')
    Z = patsy.dmatrix('~ temp', dat, return_type='dataframe')
  11. brentp revised this gist Oct 3, 2014. 2 changed files with 50 additions and 3 deletions.
    16 changes: 13 additions & 3 deletions betareg.py
    Original file line number Diff line number Diff line change
    @@ -4,6 +4,7 @@
    from statsmodels.api import GLM
    from statsmodels.genmod.families import Binomial
    import statsmodels.api as sm
    import pandas as pd

    #see http://cran.r-project.org/web/packages/betareg/vignettes/betareg-ext.pdf
    # nice reference:
    @@ -16,6 +17,7 @@ def inverse(self, z):
    class BetaReg(GenericLikelihoodModel):
    def __init__(self, endog, exog, Z=None, link=Logit(),
    link_phi=sm.families.links.Log(), **kwds):
    assert np.all((0 < endog) & (endog < 1))

    super(BetaReg, self).__init__(endog, exog, **kwds)
    self.link = link
    @@ -26,22 +28,24 @@ def __init__(self, endog, exog, Z=None, link=Logit(),
    else:
    self.Z = np.asarray(Z)
    assert len(self.Z) == len(self.endog)

    def nloglikeobs(self, params):
    return -self._ll_br(self.endog, self.exog, self.Z, params)

    def fit(self, start_params=None, maxiter=1000000, maxfun=50000,
    disp=False,
    method='bfgs', **kwds):
    if start_params is None:
    start_params = GLM(self.endog, self.exog, family=Binomial()).fit().params
    start_params = np.append(start_params, [1] * self.Z.shape[1])
    start_params = GLM(self.endog, self.exog, family=Binomial()
    ).fit(disp=False).params
    start_params = np.append(start_params, [0.5] * self.Z.shape[1])
    #start_params = np.append(np.zeros(self.exog.shape[1]), 0.5)
    #self.exog[0] = np.mean(self.endog)

    return super(BetaReg, self).fit(start_params=start_params,
    maxiter=maxiter,
    maxfun=maxfun,
    method=method,
    disp=disp,
    **kwds)
    def _ll_br(self, y, X, Z, params):
    nz = self.Z.shape[1]
    @@ -73,3 +77,9 @@ def _ll_br(self, y, X, Z, params):
    m = BetaReg.from_formula(' I(food/income) ~ income + persons', fex)
    print m.fit().summary()
    #print GLM.from_formula('iyield ~ C(batch) + temp', dat, family=Binomial()).fit().summary()

    dev = pd.read_csv('methylation-test.csv')
    dev['methylation'] = sm.families.links.Logit().inverse(dev['methylation'])
    m = BetaReg.from_formula('methylation ~ age + gender', dev,
    link_phi=sm.families.links.identity())
    print m.fit().summary()
    37 changes: 37 additions & 0 deletions methylation-test.csv
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,37 @@
    ,gender,age,Basename,ID,id,plate,CpG,methylation
    0,F,58.231,6042324088_R04C01,age58.231_F,id_0,6042324088,CpG_0,1.4828322881625375
    1,M,52.632,6042324088_R06C01,age52.632_M,id_1,6042324088,CpG_0,1.4051509852087734
    2,M,64.679,6042324088_R01C01,age64.679_M,id_2,6042324088,CpG_0,1.4051509852087734
    3,F,55.299,6042324088_R04C02,age55.299_F,id_3,6042324088,CpG_0,1.4370666864933141
    4,M,56.019,6042324088_R02C01,age56.019_M,id_4,6042324088,CpG_0,1.7743677265161857
    5,M,62.021,6042324088_R01C02,age62.021_M,id_5,6042324088,CpG_0,1.4696224926932235
    6,F,52.298,6042324088_R06C02,age52.298_F,id_6,6042324088,CpG_0,1.489478597355121
    7,F,39.71,6042324088_R03C01,age39.71_F,id_7,6042324088,CpG_0,1.564513100525912
    8,F,57.492,6042324088_R05C02,age57.492_F,id_8,6042324088,CpG_0,1.5785565986326349
    9,F,57.624,6042324088_R05C01,age57.624_F,id_9,6042324088,CpG_0,1.242506468328179
    10,F,40.487,6042324088_R03C02,age40.487_F,id_10,6042324088,CpG_0,1.3009807774073552
    11,M,53.662,6042324088_R02C02,age53.662_M,id_11,6042324088,CpG_0,1.5299568447640945
    12,F,58.231,6042324088_R04C01,age58.231_F,id_0,6042324088,CpG_1,2.100996545241666
    13,M,52.632,6042324088_R06C01,age52.632_M,id_1,6042324088,CpG_1,2.1322666810614472
    14,M,64.679,6042324088_R01C01,age64.679_M,id_2,6042324088,CpG_1,2.1322666810614472
    15,F,55.299,6042324088_R04C02,age55.299_F,id_3,6042324088,CpG_1,1.8921458020642403
    16,M,56.019,6042324088_R02C01,age56.019_M,id_4,6042324088,CpG_1,2.3634832752006427
    17,M,62.021,6042324088_R01C02,age62.021_M,id_5,6042324088,CpG_1,2.0805670342015703
    18,F,52.298,6042324088_R06C02,age52.298_F,id_6,6042324088,CpG_1,2.0406555166446796
    19,F,39.71,6042324088_R03C01,age39.71_F,id_7,6042324088,CpG_1,2.175197255017929
    20,F,57.492,6042324088_R05C02,age57.492_F,id_8,6042324088,CpG_1,2.153549513833558
    21,F,57.624,6042324088_R05C01,age57.624_F,id_9,6042324088,CpG_1,1.815289966638249
    22,F,40.487,6042324088_R03C02,age40.487_F,id_10,6042324088,CpG_1,2.060457163597239
    23,M,53.662,6042324088_R02C02,age53.662_M,id_11,6042324088,CpG_1,1.9924301646902063
    24,F,58.231,6042324088_R04C01,age58.231_F,id_0,6042324088,CpG_2,2.682732393117921
    25,M,52.632,6042324088_R06C01,age52.632_M,id_1,6042324088,CpG_2,2.3508277619403852
    26,M,64.679,6042324088_R01C01,age64.679_M,id_2,6042324088,CpG_2,2.1972245773362196
    27,F,55.299,6042324088_R04C02,age55.299_F,id_3,6042324088,CpG_2,2.3383031755961254
    28,M,56.019,6042324088_R02C01,age56.019_M,id_4,6042324088,CpG_2,2.666159259393051
    29,M,62.021,6042324088_R01C02,age62.021_M,id_5,6042324088,CpG_2,2.556365613770146
    30,F,52.298,6042324088_R06C02,age52.298_F,id_6,6042324088,CpG_2,2.376272808785205
    31,F,39.71,6042324088_R03C01,age39.71_F,id_7,6042324088,CpG_2,2.3891995658308174
    32,F,57.492,6042324088_R05C02,age57.492_F,id_8,6042324088,CpG_2,2.571428861772524
    33,F,57.624,6042324088_R05C01,age57.624_F,id_9,6042324088,CpG_2,2.442347035369205
    34,F,40.487,6042324088_R03C02,age40.487_F,id_10,6042324088,CpG_2,2.3891995658308174
    35,M,53.662,6042324088_R02C02,age53.662_M,id_11,6042324088,CpG_2,2.52680914144201
  12. brentp revised this gist Oct 3, 2014. 1 changed file with 2 additions and 0 deletions.
    2 changes: 2 additions & 0 deletions betareg.py
    Original file line number Diff line number Diff line change
    @@ -6,6 +6,8 @@
    import statsmodels.api as sm

    #see http://cran.r-project.org/web/packages/betareg/vignettes/betareg-ext.pdf
    # nice reference:
    # http://psychology3.anu.edu.au/people/smithson/details/Pubs/Smithson_Verkuilen06.pdf

    class Logit(sm.families.links.Logit):
    def inverse(self, z):
  13. brentp revised this gist Oct 3, 2014. 2 changed files with 20 additions and 5 deletions.
    12 changes: 10 additions & 2 deletions betareg.py
    Original file line number Diff line number Diff line change
    @@ -6,6 +6,7 @@
    import statsmodels.api as sm

    #see http://cran.r-project.org/web/packages/betareg/vignettes/betareg-ext.pdf

    class Logit(sm.families.links.Logit):
    def inverse(self, z):
    return 1 / (1. + np.exp(-z))
    @@ -31,7 +32,7 @@ def fit(self, start_params=None, maxiter=1000000, maxfun=50000,
    method='bfgs', **kwds):
    if start_params is None:
    start_params = GLM(self.endog, self.exog, family=Binomial()).fit().params
    start_params = np.append(start_params, [0.5] * self.Z.shape[1])
    start_params = np.append(start_params, [1] * self.Z.shape[1])
    #start_params = np.append(np.zeros(self.exog.shape[1]), 0.5)
    #self.exog[0] = np.mean(self.endog)

    @@ -49,15 +50,22 @@ def _ll_br(self, y, X, Z, params):
    mu = self.link.inverse(np.dot(X, Xparams))
    phi = self.link_phi.inverse(np.dot(Z, Zparams))

    if np.any(phi <= np.finfo(float).eps): return np.array(-inf)

    ll = lgamma(phi) - lgamma(mu * phi) - lgamma((1 - mu) * phi) \
    + (mu * phi - 1) * np.log(y) + (((1 - mu) * phi) - 1) * np.log(1 - y)
    return ll

    if __name__ == "__main__":

    import pandas as pd
    import patsy
    dat = pd.read_table('gasoline.txt')
    m = BetaReg.from_formula('iyield ~ C(batch, Treatment(10)) + temp', dat)
    Z = patsy.dmatrix('~ temp', dat, return_type='dataframe')
    # using other precison params with
    m = BetaReg.from_formula('iyield ~ C(batch, Treatment(10)) + temp', dat,
    Z=Z, link_phi=sm.families.links.identity())
    print m.fit().summary()

    fex = pd.read_csv('foodexpenditure.csv')
    m = BetaReg.from_formula(' I(food/income) ~ income + persons', fex)
    13 changes: 10 additions & 3 deletions ex.R
    Original file line number Diff line number Diff line change
    @@ -3,9 +3,16 @@ library(betareg)
    data("GasolineYield", package = "betareg")
    data("FoodExpenditure", package = "betareg")

    m = betareg(yield ~ batch + temp, data = GasolineYield)
    print(summary(m))

    fe_beta = betareg(I(food/income) ~ income + persons, data = FoodExpenditure)
    print(summary(fe_beta))


    m = betareg(yield ~ batch + temp, data = GasolineYield)
    #print(summary(m))



    gy2 <- betareg(yield ~ batch + temp | temp, data = GasolineYield, link.phi="identity")
    #print(summary(gy2))


  14. brentp revised this gist Oct 3, 2014. 1 changed file with 14 additions and 10 deletions.
    24 changes: 14 additions & 10 deletions betareg.py
    Original file line number Diff line number Diff line change
    @@ -3,18 +3,20 @@
    from statsmodels.base.model import GenericLikelihoodModel
    from statsmodels.api import GLM
    from statsmodels.genmod.families import Binomial
    import statsmodels.api as sm

    #see http://cran.r-project.org/web/packages/betareg/vignettes/betareg-ext.pdf

    def ilogit(a):
    return 1 / (1. + np.exp(-a))

    def logit(a):
    return np.log(p / (1. - p))
    class Logit(sm.families.links.Logit):
    def inverse(self, z):
    return 1 / (1. + np.exp(-z))

    class BetaReg(GenericLikelihoodModel):
    def __init__(self, endog, exog, Z=None, **kwds):
    def __init__(self, endog, exog, Z=None, link=Logit(),
    link_phi=sm.families.links.Log(), **kwds):

    super(BetaReg, self).__init__(endog, exog, **kwds)
    self.link = link
    self.link_phi = link_phi
    # how to set default Z?
    if Z is None:
    self.Z = np.ones((self.endog.shape[0], 1), dtype='f')
    @@ -25,7 +27,8 @@ def __init__(self, endog, exog, Z=None, **kwds):
    def nloglikeobs(self, params):
    return -self._ll_br(self.endog, self.exog, self.Z, params)

    def fit(self, start_params=None, maxiter=1000000, maxfun=50000, **kwds):
    def fit(self, start_params=None, maxiter=1000000, maxfun=50000,
    method='bfgs', **kwds):
    if start_params is None:
    start_params = GLM(self.endog, self.exog, family=Binomial()).fit().params
    start_params = np.append(start_params, [0.5] * self.Z.shape[1])
    @@ -35,15 +38,16 @@ def fit(self, start_params=None, maxiter=1000000, maxfun=50000, **kwds):
    return super(BetaReg, self).fit(start_params=start_params,
    maxiter=maxiter,
    maxfun=maxfun,
    method=method,
    **kwds)
    def _ll_br(self, y, X, Z, params):
    nz = self.Z.shape[1]

    Xparams = params[:-nz]
    Zparams = params[-nz:]

    mu = ilogit(np.dot(X, Xparams))
    phi = np.exp(np.dot(Z, Zparams))
    mu = self.link.inverse(np.dot(X, Xparams))
    phi = self.link_phi.inverse(np.dot(Z, Zparams))

    ll = lgamma(phi) - lgamma(mu * phi) - lgamma((1 - mu) * phi) \
    + (mu * phi - 1) * np.log(y) + (((1 - mu) * phi) - 1) * np.log(1 - y)
  15. brentp revised this gist Oct 3, 2014. 3 changed files with 48 additions and 2 deletions.
    6 changes: 4 additions & 2 deletions betareg.py
    Original file line number Diff line number Diff line change
    @@ -47,13 +47,15 @@ def _ll_br(self, y, X, Z, params):

    ll = lgamma(phi) - lgamma(mu * phi) - lgamma((1 - mu) * phi) \
    + (mu * phi - 1) * np.log(y) + (((1 - mu) * phi) - 1) * np.log(1 - y)
    print ll.sum()
    return ll

    if __name__ == "__main__":

    import pandas as pd
    dat = pd.read_table('gasoline.txt')
    m = BetaReg.from_formula('iyield ~ C(batch) + temp', dat)
    m = BetaReg.from_formula('iyield ~ C(batch, Treatment(10)) + temp', dat)

    fex = pd.read_csv('foodexpenditure.csv')
    m = BetaReg.from_formula(' I(food/income) ~ income + persons', fex)
    print m.fit().summary()
    #print GLM.from_formula('iyield ~ C(batch) + temp', dat, family=Binomial()).fit().summary()
    5 changes: 5 additions & 0 deletions ex.R
    Original file line number Diff line number Diff line change
    @@ -1,6 +1,11 @@
    library(betareg)

    data("GasolineYield", package = "betareg")
    data("FoodExpenditure", package = "betareg")

    m = betareg(yield ~ batch + temp, data = GasolineYield)
    print(summary(m))

    fe_beta = betareg(I(food/income) ~ income + persons, data = FoodExpenditure)
    print(summary(fe_beta))

    39 changes: 39 additions & 0 deletions foodexpenditure.csv
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,39 @@
    food,income,persons
    15.998,62.476,1
    16.652,82.304,5
    21.741,74.679,3
    7.431,39.151,3
    10.481,64.724,5
    13.548,36.786,3
    23.256,83.052,4
    17.976,86.935,1
    14.161,88.233,2
    8.825,38.695,2
    14.184,73.831,7
    19.604,77.122,3
    13.728,45.519,2
    21.141,82.251,2
    17.446,59.862,3
    9.629,26.563,3
    14.005,61.818,2
    9.16,29.682,1
    18.831,50.825,5
    7.641,71.062,4
    13.882,41.99,4
    9.67,37.324,3
    21.604,86.352,5
    10.866,45.506,2
    28.98,69.929,6
    10.882,61.041,2
    18.561,82.469,1
    11.629,44.208,2
    18.067,49.467,5
    14.539,25.905,5
    19.192,79.178,5
    25.918,75.811,3
    28.833,82.718,6
    15.869,48.311,4
    14.91,42.494,5
    9.55,40.573,4
    23.066,44.872,6
    14.751,27.167,7
  16. brentp renamed this gist Oct 3, 2014. 1 changed file with 7 additions and 6 deletions.
    13 changes: 7 additions & 6 deletions betabinomial.py → betareg.py
    Original file line number Diff line number Diff line change
    @@ -12,9 +12,9 @@ def ilogit(a):
    def logit(a):
    return np.log(p / (1. - p))

    class BetaBinomial(GenericLikelihoodModel):
    class BetaReg(GenericLikelihoodModel):
    def __init__(self, endog, exog, Z=None, **kwds):
    super(BetaBinomial, self).__init__(endog, exog, **kwds)
    super(BetaReg, self).__init__(endog, exog, **kwds)
    # how to set default Z?
    if Z is None:
    self.Z = np.ones((self.endog.shape[0], 1), dtype='f')
    @@ -23,7 +23,7 @@ def __init__(self, endog, exog, Z=None, **kwds):
    assert len(self.Z) == len(self.endog)

    def nloglikeobs(self, params):
    return -self._ll_bb(self.endog, self.exog, self.Z, params)
    return -self._ll_br(self.endog, self.exog, self.Z, params)

    def fit(self, start_params=None, maxiter=1000000, maxfun=50000, **kwds):
    if start_params is None:
    @@ -32,11 +32,11 @@ def fit(self, start_params=None, maxiter=1000000, maxfun=50000, **kwds):
    #start_params = np.append(np.zeros(self.exog.shape[1]), 0.5)
    #self.exog[0] = np.mean(self.endog)

    return super(BetaBinomial, self).fit(start_params=start_params,
    return super(BetaReg, self).fit(start_params=start_params,
    maxiter=maxiter,
    maxfun=maxfun,
    **kwds)
    def _ll_bb(self, y, X, Z, params):
    def _ll_br(self, y, X, Z, params):
    nz = self.Z.shape[1]

    Xparams = params[:-nz]
    @@ -54,5 +54,6 @@ def _ll_bb(self, y, X, Z, params):

    import pandas as pd
    dat = pd.read_table('gasoline.txt')
    m = BetaBinomial.from_formula('iyield ~ C(batch) + temp', dat)
    m = BetaReg.from_formula('iyield ~ C(batch) + temp', dat)
    print m.fit().summary()
    #print GLM.from_formula('iyield ~ C(batch) + temp', dat, family=Binomial()).fit().summary()
  17. brentp revised this gist Oct 3, 2014. 1 changed file with 0 additions and 1 deletion.
    1 change: 0 additions & 1 deletion gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -1 +0,0 @@
    d
  18. brentp revised this gist Oct 3, 2014. 1 changed file with 1 addition and 0 deletions.
    1 change: 1 addition & 0 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1 @@
    d
  19. brentp created this gist Oct 3, 2014.
    1 change: 1 addition & 0 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1 @@
    d
  20. brentp created this gist Oct 3, 2014.
    58 changes: 58 additions & 0 deletions betabinomial.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,58 @@
    import numpy as np
    from scipy.special import gammaln as lgamma
    from statsmodels.base.model import GenericLikelihoodModel
    from statsmodels.api import GLM
    from statsmodels.genmod.families import Binomial

    #see http://cran.r-project.org/web/packages/betareg/vignettes/betareg-ext.pdf

    def ilogit(a):
    return 1 / (1. + np.exp(-a))

    def logit(a):
    return np.log(p / (1. - p))

    class BetaBinomial(GenericLikelihoodModel):
    def __init__(self, endog, exog, Z=None, **kwds):
    super(BetaBinomial, self).__init__(endog, exog, **kwds)
    # how to set default Z?
    if Z is None:
    self.Z = np.ones((self.endog.shape[0], 1), dtype='f')
    else:
    self.Z = np.asarray(Z)
    assert len(self.Z) == len(self.endog)

    def nloglikeobs(self, params):
    return -self._ll_bb(self.endog, self.exog, self.Z, params)

    def fit(self, start_params=None, maxiter=1000000, maxfun=50000, **kwds):
    if start_params is None:
    start_params = GLM(self.endog, self.exog, family=Binomial()).fit().params
    start_params = np.append(start_params, [0.5] * self.Z.shape[1])
    #start_params = np.append(np.zeros(self.exog.shape[1]), 0.5)
    #self.exog[0] = np.mean(self.endog)

    return super(BetaBinomial, self).fit(start_params=start_params,
    maxiter=maxiter,
    maxfun=maxfun,
    **kwds)
    def _ll_bb(self, y, X, Z, params):
    nz = self.Z.shape[1]

    Xparams = params[:-nz]
    Zparams = params[-nz:]

    mu = ilogit(np.dot(X, Xparams))
    phi = np.exp(np.dot(Z, Zparams))

    ll = lgamma(phi) - lgamma(mu * phi) - lgamma((1 - mu) * phi) \
    + (mu * phi - 1) * np.log(y) + (((1 - mu) * phi) - 1) * np.log(1 - y)
    print ll.sum()
    return ll

    if __name__ == "__main__":

    import pandas as pd
    dat = pd.read_table('gasoline.txt')
    m = BetaBinomial.from_formula('iyield ~ C(batch) + temp', dat)
    print m.fit().summary()
    6 changes: 6 additions & 0 deletions ex.R
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,6 @@
    library(betareg)

    data("GasolineYield", package = "betareg")
    m = betareg(yield ~ batch + temp, data = GasolineYield)
    print(summary(m))

    33 changes: 33 additions & 0 deletions gasoline.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,33 @@
    iyield gravity pressure temp10 temp batch
    0.122 50.8 8.6 190 205 1
    0.223 50.8 8.6 190 275 1
    0.347 50.8 8.6 190 345 1
    0.457 50.8 8.6 190 407 1
    0.08 40.8 3.5 210 218 2
    0.131 40.8 3.5 210 273 2
    0.266 40.8 3.5 210 347 2
    0.074 40 6.1 217 212 3
    0.182 40 6.1 217 272 3
    0.304 40 6.1 217 340 3
    0.069 38.4 6.1 220 235 4
    0.152 38.4 6.1 220 300 4
    0.26 38.4 6.1 220 365 4
    0.336 38.4 6.1 220 410 4
    0.144 40.3 4.8 231 307 5
    0.268 40.3 4.8 231 367 5
    0.349 40.3 4.8 231 395 5
    0.1 32.2 5.2 236 267 6
    0.248 32.2 5.2 236 360 6
    0.317 32.2 5.2 236 402 6
    0.028 41.3 1.8 267 235 7
    0.064 41.3 1.8 267 275 7
    0.161 41.3 1.8 267 358 7
    0.278 41.3 1.8 267 416 7
    0.05 38.1 1.2 274 285 8
    0.176 38.1 1.2 274 365 8
    0.321 38.1 1.2 274 444 8
    0.14 32.2 2.4 284 351 9
    0.232 32.2 2.4 284 424 9
    0.085 31.8 0.2 316 365 10
    0.147 31.8 0.2 316 379 10
    0.18 31.8 0.2 316 428 10