brentp · September 17, 2024 14:48 · Oct 4, 2014 · Oct 4, 2014 · Oct 4, 2014 · Oct 4, 2014
diff --git a/betareg.py b/betareg.py
@@ -96,18 +96,21 @@ def __init__(self, endog, exog, Z=None, link=Logit(),
         :ref:`links`
 
         """.format(example=_init_example)
-
         assert np.all((0 < endog) & (endog < 1))
+        if Z is None:
+            extra_names = ['phi']
+            Z = np.ones((len(endog), 1), dtype='f')
+        else:
+            extra_names = ['precision-%s' % zc for zc in \
+                        (Z.columns if hasattr(Z, 'columns') else range(1, Z.shape[1] + 1))]
+        kwds['extra_params_names'] = extra_names
 
         super(Beta, self).__init__(endog, exog, **kwds)
         self.link = link
         self.link_phi = link_phi
 
-        if Z is None:
-            self.Z = np.ones((self.endog.shape[0], 1), dtype='f')
-        else:
-            self.Z = np.asarray(Z)
-            assert len(self.Z) == len(self.endog)
+        self.Z = Z
+        assert len(self.Z) == len(self.endog)
 
     def nloglikeobs(self, params):
         """
@@ -182,7 +185,7 @@ def _ll_br(self, y, X, Z, params):
     #print GLM.from_formula('iyield ~ C(batch) + temp', dat, family=Binomial()).fit().summary()
 
     dev = pd.read_csv('methylation-test.csv')
-    Z = patsy.dmatrix('~ age', dev)
+    Z = patsy.dmatrix('~ age', dev, return_type='dataframe')
     m = Beta.from_formula('methylation ~ gender + CpG', dev,
             Z=Z,
             link_phi=sm.families.links.identity())

diff --git a/betareg.py b/betareg.py
@@ -182,6 +182,8 @@ def _ll_br(self, y, X, Z, params):
     #print GLM.from_formula('iyield ~ C(batch) + temp', dat, family=Binomial()).fit().summary()
 
     dev = pd.read_csv('methylation-test.csv')
-    m = Beta.from_formula('methylation ~ age + gender', dev,
+    Z = patsy.dmatrix('~ age', dev)
+    m = Beta.from_formula('methylation ~ gender + CpG', dev,
+            Z=Z,
             link_phi=sm.families.links.identity())
     print m.fit().summary()
diff --git a/test_beta.py b/test_beta.py
@@ -1,6 +1,7 @@
 
 # in R:
 import io
+import statsmodels.api as sm
 import pandas as pd
 import patsy
 from betareg import Beta
@@ -20,14 +21,14 @@
 
 _methylation_estimates_mean = u"""\
 varname      Estimate StdError zvalue Pr(>|z|)    
-(Intercept)  1.44224    0.03401  42.404   <2e-16
+(Intercept)  1.44224    0.03401  42.404   2e-16
 genderM      0.06986    0.04359   1.603    0.109    
-CpGCpG_1     0.60735    0.04834  12.563   <2e-16
-CpGCpG_2     0.97355    0.05311  18.331   <2e-16"""
+CpGCpG_1     0.60735    0.04834  12.563   2e-16
+CpGCpG_2     0.97355    0.05311  18.331   2e-16"""
 
 _methylation_estimates_precision = u"""\
 varname Estimate StdError zvalue Pr(>|z|)    
-(Intercept)  8.22829    1.79098   4.594 4.34e-06 ***
+(Intercept)  8.22829    1.79098   4.594 4.34e-06
 age         -0.03471    0.03276  -1.059    0.289"""
 
 
@@ -41,7 +42,8 @@
 methylation = pd.read_csv('methylation-test.csv')
 
 def check_same(a, b, eps, name):
-    assert np.allclose(a, b, atol=eps), ("different from expected", name, list(a), list(b))
+    assert np.allclose(a, b, rtol=0.01, atol=eps), \
+            ("different from expected", name, list(a), list(b))
 
 
 def test_income_coefficients():
@@ -69,8 +71,18 @@ def test_methylation_coefficients():
     model = "methylation ~ gender + CpG"
     Z = patsy.dmatrix("~ age", methylation)
 
-    mod = Beta.from_formula(model, methylation, Z=Z)
+    mod = Beta.from_formula(model, methylation, Z=Z, link_phi=sm.families.links.identity())
     rslt = mod.fit()
     yield check_same, rslt.params[:-2], expected_methylation_mean['Estimate'], 1e-2, "estimates"
-    #yield check_same, rslt.tvalues[:-2], expected_methylation_mean['zvalue'], 0.1, "z-scores"
-    #yield check_same, rslt.pvalues[:-2], expected_methylation_mean['Pr(>|z|)'], 1e-3, "p-values"
+    yield check_same, rslt.tvalues[:-2], expected_methylation_mean['zvalue'], 0.1, "z-scores"
+    yield check_same, rslt.pvalues[:-2], expected_methylation_mean['Pr(>|z|)'], 1e-2, "p-values"
+
+def test_methylation_precision():
+    model = "methylation ~ gender + CpG"
+    Z = patsy.dmatrix("~ age", methylation)
+
+    mod = Beta.from_formula(model, methylation, Z=Z, link_phi=sm.families.links.identity())
+    rslt = mod.fit()
+    #yield check_same, sm.families.links.logit()(rslt.params[-2:]), expected_methylation_precision['Estimate'], 1e-3, "estimate"
+    #yield check_same, rslt.tvalues[-2:], expected_methylation_precision['zvalue'], 0.1, "z-score"
+
diff --git a/ex.R b/ex.R
@@ -17,7 +17,6 @@ data("FoodExpenditure", package = "betareg")
 
 
 meth = read.csv('methylation-test.csv')
-meth$methylation = 1 / (1 + exp(-meth$methylation))
 m =  betareg(methylation ~ gender + CpG | age, meth)
 print(summary(m))
 
diff --git a/test_beta.py b/test_beta.py
@@ -2,6 +2,7 @@
 # in R:
 import io
 import pandas as pd
+import patsy
 from betareg import Beta
 import numpy as np
 
@@ -37,14 +38,39 @@
 expected_methylation_precision = pd.read_table(io.StringIO(_methylation_estimates_precision), sep="\s+")
 
 income = pd.read_csv('foodexpenditure.csv')
+methylation = pd.read_csv('methylation-test.csv')
+
+def check_same(a, b, eps, name):
+    assert np.allclose(a, b, atol=eps), ("different from expected", name, list(a), list(b))
+
 
 def test_income_coefficients():
     model = "I(food/income) ~ income + persons"
 
     mod = Beta.from_formula(model, income)
     rslt = mod.fit()
-    assert np.allclose(rslt.params[:-1], expected_income_mean['Estimate'])
-    print rslt.tvalues
-    print expected_income_mean['zvalue']
-    assert np.allclose(rslt.tvalues[:-1], expected_income_mean['zvalue'], rtol=0.1)
+    yield check_same, rslt.params[:-1], expected_income_mean['Estimate'], 1e-3, "estimates"
+    yield check_same, rslt.tvalues[:-1], expected_income_mean['zvalue'], 0.1, "z-scores"
+    yield check_same, rslt.pvalues[:-1], expected_income_mean['Pr(>|z|)'], 1e-3, "p-values"
+
 
+def test_income_precision():
+    model = "I(food/income) ~ income + persons"
+
+    mod = Beta.from_formula(model, income)
+    rslt = mod.fit()
+    # note that we have to exp the phi results for now.
+    yield check_same, np.exp(rslt.params[-1:]), expected_income_precision['Estimate'], 1e-3, "estimate"
+    #yield check_same, rslt.tvalues[-1:], expected_income_precision['zvalue'], 0.1, "z-score"
+    yield check_same, rslt.pvalues[-1:], expected_income_precision['Pr(>|z|)'], 1e-3, "p-values"
+
+
+def test_methylation_coefficients():
+    model = "methylation ~ gender + CpG"
+    Z = patsy.dmatrix("~ age", methylation)
+
+    mod = Beta.from_formula(model, methylation, Z=Z)
+    rslt = mod.fit()
+    yield check_same, rslt.params[:-2], expected_methylation_mean['Estimate'], 1e-2, "estimates"
+    #yield check_same, rslt.tvalues[:-2], expected_methylation_mean['zvalue'], 0.1, "z-scores"
+    #yield check_same, rslt.pvalues[:-2], expected_methylation_mean['Pr(>|z|)'], 1e-3, "p-values"
diff --git a/betareg.py b/betareg.py
@@ -182,7 +182,6 @@ def _ll_br(self, y, X, Z, params):
     #print GLM.from_formula('iyield ~ C(batch) + temp', dat, family=Binomial()).fit().summary()
 
     dev = pd.read_csv('methylation-test.csv')
-    dev['methylation'] = sm.families.links.Logit().inverse(dev['methylation'])
     m = Beta.from_formula('methylation ~ age + gender', dev,
             link_phi=sm.families.links.identity())
     print m.fit().summary()
diff --git a/ex.R b/ex.R
@@ -3,16 +3,21 @@ library(betareg)
 data("GasolineYield", package = "betareg")
 data("FoodExpenditure", package = "betareg")
 
-fe_beta = betareg(I(food/income) ~ income + persons, data = FoodExpenditure)
-print(summary(fe_beta))
+#fe_beta = betareg(I(food/income) ~ income + persons, data = FoodExpenditure)
+#print(summary(fe_beta)$coefficients)
 
 
-m = betareg(yield ~ batch + temp, data = GasolineYield)
+#m = betareg(yield ~ batch + temp, data = GasolineYield)
 #print(summary(m))
 
 
 
-gy2 <- betareg(yield ~ batch + temp | temp, data = GasolineYield, link.phi="identity")
+#gy2 <- betareg(yield ~ batch + temp | temp, data = GasolineYield, link.phi="identity")
 #print(summary(gy2))
 
 
+meth = read.csv('methylation-test.csv')
+meth$methylation = 1 / (1 + exp(-meth$methylation))
+m =  betareg(methylation ~ gender + CpG | age, meth)
+print(summary(m))
+
diff --git a/methylation-test.csv b/methylation-test.csv
@@ -1,37 +1,37 @@
-,gender,age,Basename,ID,id,plate,CpG,methylation
-0,F,58.231,6042324088_R04C01,age58.231_F,id_0,6042324088,CpG_0,1.4828322881625375
-1,M,52.632,6042324088_R06C01,age52.632_M,id_1,6042324088,CpG_0,1.4051509852087734
-2,M,64.679,6042324088_R01C01,age64.679_M,id_2,6042324088,CpG_0,1.4051509852087734
-3,F,55.299,6042324088_R04C02,age55.299_F,id_3,6042324088,CpG_0,1.4370666864933141
-4,M,56.019,6042324088_R02C01,age56.019_M,id_4,6042324088,CpG_0,1.7743677265161857
-5,M,62.021,6042324088_R01C02,age62.021_M,id_5,6042324088,CpG_0,1.4696224926932235
-6,F,52.298,6042324088_R06C02,age52.298_F,id_6,6042324088,CpG_0,1.489478597355121
-7,F,39.71,6042324088_R03C01,age39.71_F,id_7,6042324088,CpG_0,1.564513100525912
-8,F,57.492,6042324088_R05C02,age57.492_F,id_8,6042324088,CpG_0,1.5785565986326349
-9,F,57.624,6042324088_R05C01,age57.624_F,id_9,6042324088,CpG_0,1.242506468328179
-10,F,40.487,6042324088_R03C02,age40.487_F,id_10,6042324088,CpG_0,1.3009807774073552
-11,M,53.662,6042324088_R02C02,age53.662_M,id_11,6042324088,CpG_0,1.5299568447640945
-12,F,58.231,6042324088_R04C01,age58.231_F,id_0,6042324088,CpG_1,2.100996545241666
-13,M,52.632,6042324088_R06C01,age52.632_M,id_1,6042324088,CpG_1,2.1322666810614472
-14,M,64.679,6042324088_R01C01,age64.679_M,id_2,6042324088,CpG_1,2.1322666810614472
-15,F,55.299,6042324088_R04C02,age55.299_F,id_3,6042324088,CpG_1,1.8921458020642403
-16,M,56.019,6042324088_R02C01,age56.019_M,id_4,6042324088,CpG_1,2.3634832752006427
-17,M,62.021,6042324088_R01C02,age62.021_M,id_5,6042324088,CpG_1,2.0805670342015703
-18,F,52.298,6042324088_R06C02,age52.298_F,id_6,6042324088,CpG_1,2.0406555166446796
-19,F,39.71,6042324088_R03C01,age39.71_F,id_7,6042324088,CpG_1,2.175197255017929
-20,F,57.492,6042324088_R05C02,age57.492_F,id_8,6042324088,CpG_1,2.153549513833558
-21,F,57.624,6042324088_R05C01,age57.624_F,id_9,6042324088,CpG_1,1.815289966638249
-22,F,40.487,6042324088_R03C02,age40.487_F,id_10,6042324088,CpG_1,2.060457163597239
-23,M,53.662,6042324088_R02C02,age53.662_M,id_11,6042324088,CpG_1,1.9924301646902063
-24,F,58.231,6042324088_R04C01,age58.231_F,id_0,6042324088,CpG_2,2.682732393117921
-25,M,52.632,6042324088_R06C01,age52.632_M,id_1,6042324088,CpG_2,2.3508277619403852
-26,M,64.679,6042324088_R01C01,age64.679_M,id_2,6042324088,CpG_2,2.1972245773362196
-27,F,55.299,6042324088_R04C02,age55.299_F,id_3,6042324088,CpG_2,2.3383031755961254
-28,M,56.019,6042324088_R02C01,age56.019_M,id_4,6042324088,CpG_2,2.666159259393051
-29,M,62.021,6042324088_R01C02,age62.021_M,id_5,6042324088,CpG_2,2.556365613770146
-30,F,52.298,6042324088_R06C02,age52.298_F,id_6,6042324088,CpG_2,2.376272808785205
-31,F,39.71,6042324088_R03C01,age39.71_F,id_7,6042324088,CpG_2,2.3891995658308174
-32,F,57.492,6042324088_R05C02,age57.492_F,id_8,6042324088,CpG_2,2.571428861772524
-33,F,57.624,6042324088_R05C01,age57.624_F,id_9,6042324088,CpG_2,2.442347035369205
-34,F,40.487,6042324088_R03C02,age40.487_F,id_10,6042324088,CpG_2,2.3891995658308174
-35,M,53.662,6042324088_R02C02,age53.662_M,id_11,6042324088,CpG_2,2.52680914144201
+,Unnamed: 0,gender,age,Basename,ID,id,plate,CpG,methylation
+0,0,F,58.231,6042324088_R04C01,age58.231_F,id_0,6042324088,CpG_0,0.815
+1,1,M,52.632,6042324088_R06C01,age52.632_M,id_1,6042324088,CpG_0,0.803
+2,2,M,64.679,6042324088_R01C01,age64.679_M,id_2,6042324088,CpG_0,0.803
+3,3,F,55.299,6042324088_R04C02,age55.299_F,id_3,6042324088,CpG_0,0.808
+4,4,M,56.019,6042324088_R02C01,age56.019_M,id_4,6042324088,CpG_0,0.8550000000000001
+5,5,M,62.021,6042324088_R01C02,age62.021_M,id_5,6042324088,CpG_0,0.8129999999999998
+6,6,F,52.298,6042324088_R06C02,age52.298_F,id_6,6042324088,CpG_0,0.816
+7,7,F,39.71,6042324088_R03C01,age39.71_F,id_7,6042324088,CpG_0,0.827
+8,8,F,57.492,6042324088_R05C02,age57.492_F,id_8,6042324088,CpG_0,0.829
+9,9,F,57.623999999999995,6042324088_R05C01,age57.624_F,id_9,6042324088,CpG_0,0.7760000000000001
+10,10,F,40.486999999999995,6042324088_R03C02,age40.487_F,id_10,6042324088,CpG_0,0.7859999999999999
+11,11,M,53.662,6042324088_R02C02,age53.662_M,id_11,6042324088,CpG_0,0.822
+12,12,F,58.231,6042324088_R04C01,age58.231_F,id_0,6042324088,CpG_1,0.891
+13,13,M,52.632,6042324088_R06C01,age52.632_M,id_1,6042324088,CpG_1,0.894
+14,14,M,64.679,6042324088_R01C01,age64.679_M,id_2,6042324088,CpG_1,0.894
+15,15,F,55.299,6042324088_R04C02,age55.299_F,id_3,6042324088,CpG_1,0.869
+16,16,M,56.019,6042324088_R02C01,age56.019_M,id_4,6042324088,CpG_1,0.914
+17,17,M,62.021,6042324088_R01C02,age62.021_M,id_5,6042324088,CpG_1,0.889
+18,18,F,52.298,6042324088_R06C02,age52.298_F,id_6,6042324088,CpG_1,0.8850000000000001
+19,19,F,39.71,6042324088_R03C01,age39.71_F,id_7,6042324088,CpG_1,0.898
+20,20,F,57.492,6042324088_R05C02,age57.492_F,id_8,6042324088,CpG_1,0.896
+21,21,F,57.623999999999995,6042324088_R05C01,age57.624_F,id_9,6042324088,CpG_1,0.86
+22,22,F,40.486999999999995,6042324088_R03C02,age40.487_F,id_10,6042324088,CpG_1,0.887
+23,23,M,53.662,6042324088_R02C02,age53.662_M,id_11,6042324088,CpG_1,0.8800000000000001
+24,24,F,58.231,6042324088_R04C01,age58.231_F,id_0,6042324088,CpG_2,0.936
+25,25,M,52.632,6042324088_R06C01,age52.632_M,id_1,6042324088,CpG_2,0.9129999999999999
+26,26,M,64.679,6042324088_R01C01,age64.679_M,id_2,6042324088,CpG_2,0.9000000000000001
+27,27,F,55.299,6042324088_R04C02,age55.299_F,id_3,6042324088,CpG_2,0.9119999999999999
+28,28,M,56.019,6042324088_R02C01,age56.019_M,id_4,6042324088,CpG_2,0.9349999999999999
+29,29,M,62.021,6042324088_R01C02,age62.021_M,id_5,6042324088,CpG_2,0.9280000000000002
+30,30,F,52.298,6042324088_R06C02,age52.298_F,id_6,6042324088,CpG_2,0.9150000000000001
+31,31,F,39.71,6042324088_R03C01,age39.71_F,id_7,6042324088,CpG_2,0.9160000000000001
+32,32,F,57.492,6042324088_R05C02,age57.492_F,id_8,6042324088,CpG_2,0.929
+33,33,F,57.623999999999995,6042324088_R05C01,age57.624_F,id_9,6042324088,CpG_2,0.92
+34,34,F,40.486999999999995,6042324088_R03C02,age40.487_F,id_10,6042324088,CpG_2,0.9160000000000001
+35,35,M,53.662,6042324088_R02C02,age53.662_M,id_11,6042324088,CpG_2,0.926
diff --git a/test_beta.py b/test_beta.py
@@ -0,0 +1,50 @@
+
+# in R:
+import io
+import pandas as pd
+from betareg import Beta
+import numpy as np
+
+# betareg(I(food/income) ~ income + persons, data = FoodExpenditure)
+_income_estimates_mean = u"""\
+varname        Estimate  StdError   zvalue     Pr(>|z|)
+(Intercept) -0.62254806 0.223853539 -2.781051 5.418326e-03
+income      -0.01229884 0.003035585 -4.051556 5.087819e-05
+persons      0.11846210 0.035340667  3.352005 8.022853e-04"""
+
+_income_estimates_precision = u"""\
+varname  Estimate StdError  zvalue     Pr(>|z|)
+(phi) 35.60975   8.079598 4.407366 1.046351e-05
+"""
+
+_methylation_estimates_mean = u"""\
+varname      Estimate StdError zvalue Pr(>|z|)    
+(Intercept)  1.44224    0.03401  42.404   <2e-16
+genderM      0.06986    0.04359   1.603    0.109    
+CpGCpG_1     0.60735    0.04834  12.563   <2e-16
+CpGCpG_2     0.97355    0.05311  18.331   <2e-16"""
+
+_methylation_estimates_precision = u"""\
+varname Estimate StdError zvalue Pr(>|z|)    
+(Intercept)  8.22829    1.79098   4.594 4.34e-06 ***
+age         -0.03471    0.03276  -1.059    0.289"""
+
+
+expected_income_mean = pd.read_table(io.StringIO(_income_estimates_mean), sep="\s+")
+expected_income_precision = pd.read_table(io.StringIO(_income_estimates_precision), sep="\s+")
+
+expected_methylation_mean = pd.read_table(io.StringIO(_methylation_estimates_mean), sep="\s+")
+expected_methylation_precision = pd.read_table(io.StringIO(_methylation_estimates_precision), sep="\s+")
+
+income = pd.read_csv('foodexpenditure.csv')
+
+def test_income_coefficients():
+    model = "I(food/income) ~ income + persons"
+
+    mod = Beta.from_formula(model, income)
+    rslt = mod.fit()
+    assert np.allclose(rslt.params[:-1], expected_income_mean['Estimate'])
+    print rslt.tvalues
+    print expected_income_mean['zvalue']
+    assert np.allclose(rslt.tvalues[:-1], expected_income_mean['zvalue'], rtol=0.1)
+
diff --git a/betareg.py b/betareg.py
@@ -5,9 +5,13 @@
 
 References
 ----------
-Grün, Bettina, Ioannis Kosmidis, and Achim Zeileis. Extended beta regression in R: Shaken, stirred, mixed, and partitioned. No. 2011-22. Working Papers in Economics and Statistics, 2011.
+Grün, Bettina, Ioannis Kosmidis, and Achim Zeileis. Extended beta regression
+in R: Shaken, stirred, mixed, and partitioned. No. 2011-22. Working Papers in
+Economics and Statistics, 2011.
 
-Smithson, Michael, and Jay Verkuilen. "A better lemon squeezer? Maximum-likelihood regression with beta-distributed dependent variables." Psychological methods 11.1 (2006): 54.
+Smithson, Michael, and Jay Verkuilen. "A better lemon squeezer?
+Maximum-likelihood regression with beta-distributed dependent variables."
+Psychological methods 11.1 (2006): 54.
 """
 import numpy as np
 import pandas as pd

diff --git a/betareg.py b/betareg.py
@@ -10,12 +10,13 @@
 Smithson, Michael, and Jay Verkuilen. "A better lemon squeezer? Maximum-likelihood regression with beta-distributed dependent variables." Psychological methods 11.1 (2006): 54.
 """
 import numpy as np
+import pandas as pd
+import statsmodels.api as sm
+
 from scipy.special import gammaln as lgamma
+
 from statsmodels.base.model import GenericLikelihoodModel
-from statsmodels.api import GLM
 from statsmodels.genmod.families import Binomial
-import statsmodels.api as sm
-import pandas as pd
 
 # this is only need while #2024 is open.
 class Logit(sm.families.links.Logit):
@@ -135,8 +136,8 @@ def fit(self, start_params=None, maxiter=100000, maxfun=5000, disp=False,
         """
 
         if start_params is None:
-            start_params = GLM(self.endog, self.exog, family=Binomial()
-                              ).fit(disp=False).params
+            start_params = sm.GLM(self.endog, self.exog, family=Binomial()
+                                 ).fit(disp=False).params
             start_params = np.append(start_params, [0.5] * self.Z.shape[1])
 
         return super(Beta, self).fit(start_params=start_params,

diff --git a/betareg.py b/betareg.py
@@ -19,19 +19,44 @@
 
 # this is only need while #2024 is open.
 class Logit(sm.families.links.Logit):
+
     """Logit tranform that won't overflow with large numbers."""
+
     def inverse(self, z):
         return 1 / (1. + np.exp(-z))
 
 _init_example = """
 
+    Beta regression with default of logit-link for exog and log-link
+    for precision.
+
+    >>> mod = Beta(endog, exog)
+    >>> rslt = mod.fit()
+    >>> print rslt.summary()
+
+    We can also specify a formula and a specific structure and use the
+    identity-link for phi.
+
+    >>> from sm.families.links import identity
+    >>> Z = patsy.dmatrix('~ temp', dat, return_type='dataframe')
+    >>> mod = Beta.from_formula('iyield ~ C(batch, Treatment(10)) + temp',
+    ...                         dat, Z=Z, link_phi=identity())
+
+    In the case of proportion-data, we may think that the precision depends on
+    the number of measurements. E.g for sequence data, on the number of
+    sequence reads covering a site:
+
+    >>> Z = patsy.dmatrix('~ coverage', df)
+    >>> mod = Beta.from_formula('methylation ~ disease + age + gender + coverage', df, Z)
+    >>> rslt = mod.fit()
+
 """
 
 class Beta(GenericLikelihoodModel):
 
     """Beta Regression.
 
-    This implementation uses $\phi\$ as a precision parameter equal to
+    This implementation uses `phi` as a precision parameter equal to
     `a + b` from the Beta parameters.
     """
 
@@ -60,6 +85,11 @@ def __init__(self, endog, exog, Z=None, link=Logit(),
         Examples
         --------
         {example}
+
+        See Also
+        --------
+        :ref:`links`
+
         """.format(example=_init_example)
 
         assert np.all((0 < endog) & (endog < 1))
@@ -121,6 +151,7 @@ def _ll_br(self, y, X, Z, params):
 
         mu = self.link.inverse(np.dot(X, Xparams))
         phi = self.link_phi.inverse(np.dot(Z, Zparams))
+        # TODO: derive a and b and constrain to > 0?
 
         if np.any(phi <= np.finfo(float).eps): return np.array(-np.inf)
 

diff --git a/betareg.py b/betareg.py
@@ -1,5 +1,13 @@
-"""
-Beta Regression.
+# -*- coding: utf-8 -*-
+
+u"""
+Beta regression for modeling rates and proportions.
+
+References
+----------
+Grün, Bettina, Ioannis Kosmidis, and Achim Zeileis. Extended beta regression in R: Shaken, stirred, mixed, and partitioned. No. 2011-22. Working Papers in Economics and Statistics, 2011.
+
+Smithson, Michael, and Jay Verkuilen. "A better lemon squeezer? Maximum-likelihood regression with beta-distributed dependent variables." Psychological methods 11.1 (2006): 54.
 """
 import numpy as np
 from scipy.special import gammaln as lgamma
@@ -9,24 +17,54 @@
 import statsmodels.api as sm
 import pandas as pd
 
-#see http://cran.r-project.org/web/packages/betareg/vignettes/betareg-ext.pdf
-# nice reference:
-# http://psychology3.anu.edu.au/people/smithson/details/Pubs/Smithson_Verkuilen06.pdf
-
+# this is only need while #2024 is open.
 class Logit(sm.families.links.Logit):
     """Logit tranform that won't overflow with large numbers."""
     def inverse(self, z):
         return 1 / (1. + np.exp(-z))
 
-class BetaReg(GenericLikelihoodModel):
+_init_example = """
+
+"""
+
+class Beta(GenericLikelihoodModel):
+
+    """Beta Regression.
 
-    """Beta Regression."""
+    This implementation uses $\phi\$ as a precision parameter equal to
+    `a + b` from the Beta parameters.
+    """
 
     def __init__(self, endog, exog, Z=None, link=Logit(),
             link_phi=sm.families.links.Log(), **kwds):
+        """
+        Parameters
+        ----------
+        endog : array-like
+            1d array of endogenous values (i.e. responses, outcomes,
+            dependent variables, or 'Y' values).
+        exog : array-like
+            2d array of exogeneous values (i.e. covariates, predictors,
+            independent variables, regressors, or 'X' values). A nobs x k
+            array where `nobs` is the number of observations and `k` is
+            the number of regressors. An intercept is not included by
+            default and should be added by the user. See
+            `statsmodels.tools.add_constant`.
+        Z : array-like
+            2d array of variables for the precision phi.
+        link : link
+            Any link in sm.families.links for `exog`
+        link_phi : link
+            Any link in sm.families.links for `Z`
+
+        Examples
+        --------
+        {example}
+        """.format(example=_init_example)
+
         assert np.all((0 < endog) & (endog < 1))
 
-        super(BetaReg, self).__init__(endog, exog, **kwds)
+        super(Beta, self).__init__(endog, exog, **kwds)
         self.link = link
         self.link_phi = link_phi
 
@@ -37,17 +75,41 @@ def __init__(self, endog, exog, Z=None, link=Logit(),
             assert len(self.Z) == len(self.endog)
 
     def nloglikeobs(self, params):
+        """
+        Negative log-likelihood.
+
+        Parameters
+        ----------
+
+        params : np.ndarray
+            Parameter estimates
+        """
         return -self._ll_br(self.endog, self.exog, self.Z, params)
 
     def fit(self, start_params=None, maxiter=100000, maxfun=5000, disp=False,
             method='bfgs', **kwds):
+        """
+        Fit the model.
+
+        Parameters
+        ----------
+        start_params : array-like
+            A vector of starting values for the regression
+            coefficients.  If None, a default is chosen.
+        maxiter : integer
+            The maximum number of iterations
+        disp : bool
+            Show convergence stats.
+        method : str
+            The optimization method to use.
+        """
 
         if start_params is None:
             start_params = GLM(self.endog, self.exog, family=Binomial()
                               ).fit(disp=False).params
             start_params = np.append(start_params, [0.5] * self.Z.shape[1])
 
-        return super(BetaReg, self).fit(start_params=start_params,
+        return super(Beta, self).fit(start_params=start_params,
                                         maxiter=maxiter, maxfun=maxfun,
                                         method=method, disp=disp, **kwds)
 
@@ -74,17 +136,17 @@ def _ll_br(self, y, X, Z, params):
     dat = pd.read_table('gasoline.txt')
     Z = patsy.dmatrix('~ temp', dat, return_type='dataframe')
     # using other precison params with
-    m = BetaReg.from_formula('iyield ~ C(batch, Treatment(10)) + temp', dat,
+    m = Beta.from_formula('iyield ~ C(batch, Treatment(10)) + temp', dat,
             Z=Z, link_phi=sm.families.links.identity())
     print m.fit().summary()
 
     fex = pd.read_csv('foodexpenditure.csv')
-    m = BetaReg.from_formula(' I(food/income) ~ income + persons', fex)
+    m = Beta.from_formula(' I(food/income) ~ income + persons', fex)
     print m.fit().summary()
     #print GLM.from_formula('iyield ~ C(batch) + temp', dat, family=Binomial()).fit().summary()
 
     dev = pd.read_csv('methylation-test.csv')
     dev['methylation'] = sm.families.links.Logit().inverse(dev['methylation'])
-    m = BetaReg.from_formula('methylation ~ age + gender', dev,
+    m = Beta.from_formula('methylation ~ age + gender', dev,
             link_phi=sm.families.links.identity())
     print m.fit().summary()
diff --git a/betareg.py b/betareg.py
@@ -39,20 +39,17 @@ def __init__(self, endog, exog, Z=None, link=Logit(),
     def nloglikeobs(self, params):
         return -self._ll_br(self.endog, self.exog, self.Z, params)
 
-    def fit(self, start_params=None, maxiter=1000000, maxfun=50000,
-            disp=False,
+    def fit(self, start_params=None, maxiter=100000, maxfun=5000, disp=False,
             method='bfgs', **kwds):
+
         if start_params is None:
             start_params = GLM(self.endog, self.exog, family=Binomial()
                               ).fit(disp=False).params
             start_params = np.append(start_params, [0.5] * self.Z.shape[1])
 
         return super(BetaReg, self).fit(start_params=start_params,
-                                        maxiter=maxiter,
-                                        maxfun=maxfun,
-                                        method=method,
-                                        disp=disp,
-                                        **kwds)
+                                        maxiter=maxiter, maxfun=maxfun,
+                                        method=method, disp=disp, **kwds)
 
     def _ll_br(self, y, X, Z, params):
         nz = self.Z.shape[1]

diff --git a/betareg.py b/betareg.py
@@ -1,3 +1,6 @@
+"""
+Beta Regression.
+"""
 import numpy as np
 from scipy.special import gammaln as lgamma
 from statsmodels.base.model import GenericLikelihoodModel
@@ -11,23 +14,28 @@
 # http://psychology3.anu.edu.au/people/smithson/details/Pubs/Smithson_Verkuilen06.pdf
 
 class Logit(sm.families.links.Logit):
+    """Logit tranform that won't overflow with large numbers."""
     def inverse(self, z):
         return 1 / (1. + np.exp(-z))
 
 class BetaReg(GenericLikelihoodModel):
+
+    """Beta Regression."""
+
     def __init__(self, endog, exog, Z=None, link=Logit(),
             link_phi=sm.families.links.Log(), **kwds):
         assert np.all((0 < endog) & (endog < 1))
 
         super(BetaReg, self).__init__(endog, exog, **kwds)
         self.link = link
         self.link_phi = link_phi
-        # how to set default Z?
+
         if Z is None:
             self.Z = np.ones((self.endog.shape[0], 1), dtype='f')
         else:
             self.Z = np.asarray(Z)
             assert len(self.Z) == len(self.endog)
+
     def nloglikeobs(self, params):
         return -self._ll_br(self.endog, self.exog, self.Z, params)
 
@@ -38,15 +46,14 @@ def fit(self, start_params=None, maxiter=1000000, maxfun=50000,
             start_params = GLM(self.endog, self.exog, family=Binomial()
                               ).fit(disp=False).params
             start_params = np.append(start_params, [0.5] * self.Z.shape[1])
-            #start_params = np.append(np.zeros(self.exog.shape[1]), 0.5)
-        #self.exog[0] = np.mean(self.endog)
 
         return super(BetaReg, self).fit(start_params=start_params,
-                                             maxiter=maxiter,
-                                             maxfun=maxfun,
-                                             method=method,
-                                             disp=disp,
-                                             **kwds)
+                                        maxiter=maxiter,
+                                        maxfun=maxfun,
+                                        method=method,
+                                        disp=disp,
+                                        **kwds)
+
     def _ll_br(self, y, X, Z, params):
         nz = self.Z.shape[1]
 
@@ -56,15 +63,16 @@ def _ll_br(self, y, X, Z, params):
         mu = self.link.inverse(np.dot(X, Xparams))
         phi = self.link_phi.inverse(np.dot(Z, Zparams))
 
-        if np.any(phi <= np.finfo(float).eps): return np.array(-inf)
+        if np.any(phi <= np.finfo(float).eps): return np.array(-np.inf)
 
         ll = lgamma(phi) - lgamma(mu * phi) - lgamma((1 - mu) * phi) \
-                + (mu * phi - 1) * np.log(y) + (((1 - mu) * phi) - 1) * np.log(1 - y)
+                + (mu * phi - 1) * np.log(y) + (((1 - mu) * phi) - 1) \
+                * np.log(1 - y)
+
         return ll
 
 if __name__ == "__main__":
 
-    import pandas as pd
     import patsy
     dat = pd.read_table('gasoline.txt')
     Z = patsy.dmatrix('~ temp', dat, return_type='dataframe')

diff --git a/betareg.py b/betareg.py
@@ -4,6 +4,7 @@
 from statsmodels.api import GLM
 from statsmodels.genmod.families import Binomial
 import statsmodels.api as sm
+import pandas as pd
 
 #see http://cran.r-project.org/web/packages/betareg/vignettes/betareg-ext.pdf
 # nice reference:
@@ -16,6 +17,7 @@ def inverse(self, z):
 class BetaReg(GenericLikelihoodModel):
     def __init__(self, endog, exog, Z=None, link=Logit(),
             link_phi=sm.families.links.Log(), **kwds):
+        assert np.all((0 < endog) & (endog < 1))
 
         super(BetaReg, self).__init__(endog, exog, **kwds)
         self.link = link
@@ -26,22 +28,24 @@ def __init__(self, endog, exog, Z=None, link=Logit(),
         else:
             self.Z = np.asarray(Z)
             assert len(self.Z) == len(self.endog)
-
     def nloglikeobs(self, params):
         return -self._ll_br(self.endog, self.exog, self.Z, params)
 
     def fit(self, start_params=None, maxiter=1000000, maxfun=50000,
+            disp=False,
             method='bfgs', **kwds):
         if start_params is None:
-            start_params = GLM(self.endog, self.exog, family=Binomial()).fit().params
-            start_params = np.append(start_params, [1] * self.Z.shape[1])
+            start_params = GLM(self.endog, self.exog, family=Binomial()
+                              ).fit(disp=False).params
+            start_params = np.append(start_params, [0.5] * self.Z.shape[1])
             #start_params = np.append(np.zeros(self.exog.shape[1]), 0.5)
         #self.exog[0] = np.mean(self.endog)
 
         return super(BetaReg, self).fit(start_params=start_params,
                                              maxiter=maxiter,
                                              maxfun=maxfun,
                                              method=method,
+                                             disp=disp,
                                              **kwds)
     def _ll_br(self, y, X, Z, params):
         nz = self.Z.shape[1]
@@ -73,3 +77,9 @@ def _ll_br(self, y, X, Z, params):
     m = BetaReg.from_formula(' I(food/income) ~ income + persons', fex)
     print m.fit().summary()
     #print GLM.from_formula('iyield ~ C(batch) + temp', dat, family=Binomial()).fit().summary()
+
+    dev = pd.read_csv('methylation-test.csv')
+    dev['methylation'] = sm.families.links.Logit().inverse(dev['methylation'])
+    m = BetaReg.from_formula('methylation ~ age + gender', dev,
+            link_phi=sm.families.links.identity())
+    print m.fit().summary()
diff --git a/methylation-test.csv b/methylation-test.csv
@@ -0,0 +1,37 @@
+,gender,age,Basename,ID,id,plate,CpG,methylation
+0,F,58.231,6042324088_R04C01,age58.231_F,id_0,6042324088,CpG_0,1.4828322881625375
+1,M,52.632,6042324088_R06C01,age52.632_M,id_1,6042324088,CpG_0,1.4051509852087734
+2,M,64.679,6042324088_R01C01,age64.679_M,id_2,6042324088,CpG_0,1.4051509852087734
+3,F,55.299,6042324088_R04C02,age55.299_F,id_3,6042324088,CpG_0,1.4370666864933141
+4,M,56.019,6042324088_R02C01,age56.019_M,id_4,6042324088,CpG_0,1.7743677265161857
+5,M,62.021,6042324088_R01C02,age62.021_M,id_5,6042324088,CpG_0,1.4696224926932235
+6,F,52.298,6042324088_R06C02,age52.298_F,id_6,6042324088,CpG_0,1.489478597355121
+7,F,39.71,6042324088_R03C01,age39.71_F,id_7,6042324088,CpG_0,1.564513100525912
+8,F,57.492,6042324088_R05C02,age57.492_F,id_8,6042324088,CpG_0,1.5785565986326349
+9,F,57.624,6042324088_R05C01,age57.624_F,id_9,6042324088,CpG_0,1.242506468328179
+10,F,40.487,6042324088_R03C02,age40.487_F,id_10,6042324088,CpG_0,1.3009807774073552
+11,M,53.662,6042324088_R02C02,age53.662_M,id_11,6042324088,CpG_0,1.5299568447640945
+12,F,58.231,6042324088_R04C01,age58.231_F,id_0,6042324088,CpG_1,2.100996545241666
+13,M,52.632,6042324088_R06C01,age52.632_M,id_1,6042324088,CpG_1,2.1322666810614472
+14,M,64.679,6042324088_R01C01,age64.679_M,id_2,6042324088,CpG_1,2.1322666810614472
+15,F,55.299,6042324088_R04C02,age55.299_F,id_3,6042324088,CpG_1,1.8921458020642403
+16,M,56.019,6042324088_R02C01,age56.019_M,id_4,6042324088,CpG_1,2.3634832752006427
+17,M,62.021,6042324088_R01C02,age62.021_M,id_5,6042324088,CpG_1,2.0805670342015703
+18,F,52.298,6042324088_R06C02,age52.298_F,id_6,6042324088,CpG_1,2.0406555166446796
+19,F,39.71,6042324088_R03C01,age39.71_F,id_7,6042324088,CpG_1,2.175197255017929
+20,F,57.492,6042324088_R05C02,age57.492_F,id_8,6042324088,CpG_1,2.153549513833558
+21,F,57.624,6042324088_R05C01,age57.624_F,id_9,6042324088,CpG_1,1.815289966638249
+22,F,40.487,6042324088_R03C02,age40.487_F,id_10,6042324088,CpG_1,2.060457163597239
+23,M,53.662,6042324088_R02C02,age53.662_M,id_11,6042324088,CpG_1,1.9924301646902063
+24,F,58.231,6042324088_R04C01,age58.231_F,id_0,6042324088,CpG_2,2.682732393117921
+25,M,52.632,6042324088_R06C01,age52.632_M,id_1,6042324088,CpG_2,2.3508277619403852
+26,M,64.679,6042324088_R01C01,age64.679_M,id_2,6042324088,CpG_2,2.1972245773362196
+27,F,55.299,6042324088_R04C02,age55.299_F,id_3,6042324088,CpG_2,2.3383031755961254
+28,M,56.019,6042324088_R02C01,age56.019_M,id_4,6042324088,CpG_2,2.666159259393051
+29,M,62.021,6042324088_R01C02,age62.021_M,id_5,6042324088,CpG_2,2.556365613770146
+30,F,52.298,6042324088_R06C02,age52.298_F,id_6,6042324088,CpG_2,2.376272808785205
+31,F,39.71,6042324088_R03C01,age39.71_F,id_7,6042324088,CpG_2,2.3891995658308174
+32,F,57.492,6042324088_R05C02,age57.492_F,id_8,6042324088,CpG_2,2.571428861772524
+33,F,57.624,6042324088_R05C01,age57.624_F,id_9,6042324088,CpG_2,2.442347035369205
+34,F,40.487,6042324088_R03C02,age40.487_F,id_10,6042324088,CpG_2,2.3891995658308174
+35,M,53.662,6042324088_R02C02,age53.662_M,id_11,6042324088,CpG_2,2.52680914144201
diff --git a/betareg.py b/betareg.py
@@ -6,6 +6,8 @@
 import statsmodels.api as sm
 
 #see http://cran.r-project.org/web/packages/betareg/vignettes/betareg-ext.pdf
+# nice reference:
+# http://psychology3.anu.edu.au/people/smithson/details/Pubs/Smithson_Verkuilen06.pdf
 
 class Logit(sm.families.links.Logit):
     def inverse(self, z):

diff --git a/betareg.py b/betareg.py
@@ -6,6 +6,7 @@
 import statsmodels.api as sm
 
 #see http://cran.r-project.org/web/packages/betareg/vignettes/betareg-ext.pdf
+
 class Logit(sm.families.links.Logit):
     def inverse(self, z):
         return 1 / (1. + np.exp(-z))
@@ -31,7 +32,7 @@ def fit(self, start_params=None, maxiter=1000000, maxfun=50000,
             method='bfgs', **kwds):
         if start_params is None:
             start_params = GLM(self.endog, self.exog, family=Binomial()).fit().params
-            start_params = np.append(start_params, [0.5] * self.Z.shape[1])
+            start_params = np.append(start_params, [1] * self.Z.shape[1])
             #start_params = np.append(np.zeros(self.exog.shape[1]), 0.5)
         #self.exog[0] = np.mean(self.endog)
 
@@ -49,15 +50,22 @@ def _ll_br(self, y, X, Z, params):
         mu = self.link.inverse(np.dot(X, Xparams))
         phi = self.link_phi.inverse(np.dot(Z, Zparams))
 
+        if np.any(phi <= np.finfo(float).eps): return np.array(-inf)
+
         ll = lgamma(phi) - lgamma(mu * phi) - lgamma((1 - mu) * phi) \
                 + (mu * phi - 1) * np.log(y) + (((1 - mu) * phi) - 1) * np.log(1 - y)
         return ll
 
 if __name__ == "__main__":
 
     import pandas as pd
+    import patsy
     dat = pd.read_table('gasoline.txt')
-    m = BetaReg.from_formula('iyield ~ C(batch, Treatment(10)) + temp', dat)
+    Z = patsy.dmatrix('~ temp', dat, return_type='dataframe')
+    # using other precison params with
+    m = BetaReg.from_formula('iyield ~ C(batch, Treatment(10)) + temp', dat,
+            Z=Z, link_phi=sm.families.links.identity())
+    print m.fit().summary()
 
     fex = pd.read_csv('foodexpenditure.csv')
     m = BetaReg.from_formula(' I(food/income) ~ income + persons', fex)

diff --git a/ex.R b/ex.R
@@ -3,9 +3,16 @@ library(betareg)
 data("GasolineYield", package = "betareg")
 data("FoodExpenditure", package = "betareg")
 
-m = betareg(yield ~ batch + temp, data = GasolineYield)
-print(summary(m))
-
 fe_beta = betareg(I(food/income) ~ income + persons, data = FoodExpenditure)
 print(summary(fe_beta))
 
+
+m = betareg(yield ~ batch + temp, data = GasolineYield)
+#print(summary(m))
+
+
+
+gy2 <- betareg(yield ~ batch + temp | temp, data = GasolineYield, link.phi="identity")
+#print(summary(gy2))
+
+
diff --git a/betareg.py b/betareg.py
@@ -3,18 +3,20 @@
 from statsmodels.base.model import GenericLikelihoodModel
 from statsmodels.api import GLM
 from statsmodels.genmod.families import Binomial
+import statsmodels.api as sm
 
 #see http://cran.r-project.org/web/packages/betareg/vignettes/betareg-ext.pdf
-
-def ilogit(a):
-    return 1 / (1. + np.exp(-a))
-
-def logit(a):
-    return np.log(p / (1. - p))
+class Logit(sm.families.links.Logit):
+    def inverse(self, z):
+        return 1 / (1. + np.exp(-z))
 
 class BetaReg(GenericLikelihoodModel):
-    def __init__(self, endog, exog, Z=None, **kwds):
+    def __init__(self, endog, exog, Z=None, link=Logit(),
+            link_phi=sm.families.links.Log(), **kwds):
+
         super(BetaReg, self).__init__(endog, exog, **kwds)
+        self.link = link
+        self.link_phi = link_phi
         # how to set default Z?
         if Z is None:
             self.Z = np.ones((self.endog.shape[0], 1), dtype='f')
@@ -25,7 +27,8 @@ def __init__(self, endog, exog, Z=None, **kwds):
     def nloglikeobs(self, params):
         return -self._ll_br(self.endog, self.exog, self.Z, params)
 
-    def fit(self, start_params=None, maxiter=1000000, maxfun=50000, **kwds):
+    def fit(self, start_params=None, maxiter=1000000, maxfun=50000,
+            method='bfgs', **kwds):
         if start_params is None:
             start_params = GLM(self.endog, self.exog, family=Binomial()).fit().params
             start_params = np.append(start_params, [0.5] * self.Z.shape[1])
@@ -35,15 +38,16 @@ def fit(self, start_params=None, maxiter=1000000, maxfun=50000, **kwds):
         return super(BetaReg, self).fit(start_params=start_params,
                                              maxiter=maxiter,
                                              maxfun=maxfun,
+                                             method=method,
                                              **kwds)
     def _ll_br(self, y, X, Z, params):
         nz = self.Z.shape[1]
 
         Xparams = params[:-nz]
         Zparams = params[-nz:]
 
-        mu = ilogit(np.dot(X, Xparams))
-        phi = np.exp(np.dot(Z, Zparams))
+        mu = self.link.inverse(np.dot(X, Xparams))
+        phi = self.link_phi.inverse(np.dot(Z, Zparams))
 
         ll = lgamma(phi) - lgamma(mu * phi) - lgamma((1 - mu) * phi) \
                 + (mu * phi - 1) * np.log(y) + (((1 - mu) * phi) - 1) * np.log(1 - y)

diff --git a/betareg.py b/betareg.py
@@ -47,13 +47,15 @@ def _ll_br(self, y, X, Z, params):
 
         ll = lgamma(phi) - lgamma(mu * phi) - lgamma((1 - mu) * phi) \
                 + (mu * phi - 1) * np.log(y) + (((1 - mu) * phi) - 1) * np.log(1 - y)
-        print ll.sum()
         return ll
 
 if __name__ == "__main__":
 
     import pandas as pd
     dat = pd.read_table('gasoline.txt')
-    m = BetaReg.from_formula('iyield ~ C(batch) + temp', dat)
+    m = BetaReg.from_formula('iyield ~ C(batch, Treatment(10)) + temp', dat)
+
+    fex = pd.read_csv('foodexpenditure.csv')
+    m = BetaReg.from_formula(' I(food/income) ~ income + persons', fex)
     print m.fit().summary()
     #print GLM.from_formula('iyield ~ C(batch) + temp', dat, family=Binomial()).fit().summary()
diff --git a/ex.R b/ex.R
@@ -1,6 +1,11 @@
 library(betareg)
 
 data("GasolineYield", package = "betareg")
+data("FoodExpenditure", package = "betareg")
+
 m = betareg(yield ~ batch + temp, data = GasolineYield)
 print(summary(m))
 
+fe_beta = betareg(I(food/income) ~ income + persons, data = FoodExpenditure)
+print(summary(fe_beta))
+
diff --git a/foodexpenditure.csv b/foodexpenditure.csv
@@ -0,0 +1,39 @@
+food,income,persons
+15.998,62.476,1
+16.652,82.304,5
+21.741,74.679,3
+7.431,39.151,3
+10.481,64.724,5
+13.548,36.786,3
+23.256,83.052,4
+17.976,86.935,1
+14.161,88.233,2
+8.825,38.695,2
+14.184,73.831,7
+19.604,77.122,3
+13.728,45.519,2
+21.141,82.251,2
+17.446,59.862,3
+9.629,26.563,3
+14.005,61.818,2
+9.16,29.682,1
+18.831,50.825,5
+7.641,71.062,4
+13.882,41.99,4
+9.67,37.324,3
+21.604,86.352,5
+10.866,45.506,2
+28.98,69.929,6
+10.882,61.041,2
+18.561,82.469,1
+11.629,44.208,2
+18.067,49.467,5
+14.539,25.905,5
+19.192,79.178,5
+25.918,75.811,3
+28.833,82.718,6
+15.869,48.311,4
+14.91,42.494,5
+9.55,40.573,4
+23.066,44.872,6
+14.751,27.167,7
diff --git a/betabinomial.py → betareg.py b/betabinomial.py → betareg.py
@@ -12,9 +12,9 @@ def ilogit(a):
 def logit(a):
     return np.log(p / (1. - p))
 
-class BetaBinomial(GenericLikelihoodModel):
+class BetaReg(GenericLikelihoodModel):
     def __init__(self, endog, exog, Z=None, **kwds):
-        super(BetaBinomial, self).__init__(endog, exog, **kwds)
+        super(BetaReg, self).__init__(endog, exog, **kwds)
         # how to set default Z?
         if Z is None:
             self.Z = np.ones((self.endog.shape[0], 1), dtype='f')
@@ -23,7 +23,7 @@ def __init__(self, endog, exog, Z=None, **kwds):
             assert len(self.Z) == len(self.endog)
 
     def nloglikeobs(self, params):
-        return -self._ll_bb(self.endog, self.exog, self.Z, params)
+        return -self._ll_br(self.endog, self.exog, self.Z, params)
 
     def fit(self, start_params=None, maxiter=1000000, maxfun=50000, **kwds):
         if start_params is None:
@@ -32,11 +32,11 @@ def fit(self, start_params=None, maxiter=1000000, maxfun=50000, **kwds):
             #start_params = np.append(np.zeros(self.exog.shape[1]), 0.5)
         #self.exog[0] = np.mean(self.endog)
 
-        return super(BetaBinomial, self).fit(start_params=start_params,
+        return super(BetaReg, self).fit(start_params=start_params,
                                              maxiter=maxiter,
                                              maxfun=maxfun,
                                              **kwds)
-    def _ll_bb(self, y, X, Z, params):
+    def _ll_br(self, y, X, Z, params):
         nz = self.Z.shape[1]
 
         Xparams = params[:-nz]
@@ -54,5 +54,6 @@ def _ll_bb(self, y, X, Z, params):
 
     import pandas as pd
     dat = pd.read_table('gasoline.txt')
-    m = BetaBinomial.from_formula('iyield ~ C(batch) + temp', dat)
+    m = BetaReg.from_formula('iyield ~ C(batch) + temp', dat)
     print m.fit().summary()
+    #print GLM.from_formula('iyield ~ C(batch) + temp', dat, family=Binomial()).fit().summary()
diff --git a/gistfile1.txt b/gistfile1.txt
@@ -1 +0,0 @@
-d

diff --git a/gistfile1.txt b/gistfile1.txt
@@ -0,0 +1 @@
+d
diff --git a/gistfile1.txt b/gistfile1.txt
@@ -0,0 +1 @@
+d
diff --git a/betabinomial.py b/betabinomial.py
@@ -0,0 +1,58 @@
+import numpy as np
+from scipy.special import gammaln as lgamma
+from statsmodels.base.model import GenericLikelihoodModel
+from statsmodels.api import GLM
+from statsmodels.genmod.families import Binomial
+
+#see http://cran.r-project.org/web/packages/betareg/vignettes/betareg-ext.pdf
+
+def ilogit(a):
+    return 1 / (1. + np.exp(-a))
+
+def logit(a):
+    return np.log(p / (1. - p))
+
+class BetaBinomial(GenericLikelihoodModel):
+    def __init__(self, endog, exog, Z=None, **kwds):
+        super(BetaBinomial, self).__init__(endog, exog, **kwds)
+        # how to set default Z?
+        if Z is None:
+            self.Z = np.ones((self.endog.shape[0], 1), dtype='f')
+        else:
+            self.Z = np.asarray(Z)
+            assert len(self.Z) == len(self.endog)
+
+    def nloglikeobs(self, params):
+        return -self._ll_bb(self.endog, self.exog, self.Z, params)
+
+    def fit(self, start_params=None, maxiter=1000000, maxfun=50000, **kwds):
+        if start_params is None:
+            start_params = GLM(self.endog, self.exog, family=Binomial()).fit().params
+            start_params = np.append(start_params, [0.5] * self.Z.shape[1])
+            #start_params = np.append(np.zeros(self.exog.shape[1]), 0.5)
+        #self.exog[0] = np.mean(self.endog)
+
+        return super(BetaBinomial, self).fit(start_params=start_params,
+                                             maxiter=maxiter,
+                                             maxfun=maxfun,
+                                             **kwds)
+    def _ll_bb(self, y, X, Z, params):
+        nz = self.Z.shape[1]
+
+        Xparams = params[:-nz]
+        Zparams = params[-nz:]
+
+        mu = ilogit(np.dot(X, Xparams))
+        phi = np.exp(np.dot(Z, Zparams))
+
+        ll = lgamma(phi) - lgamma(mu * phi) - lgamma((1 - mu) * phi) \
+                + (mu * phi - 1) * np.log(y) + (((1 - mu) * phi) - 1) * np.log(1 - y)
+        print ll.sum()
+        return ll
+
+if __name__ == "__main__":
+
+    import pandas as pd
+    dat = pd.read_table('gasoline.txt')
+    m = BetaBinomial.from_formula('iyield ~ C(batch) + temp', dat)
+    print m.fit().summary()
diff --git a/ex.R b/ex.R
@@ -0,0 +1,6 @@
+library(betareg)
+
+data("GasolineYield", package = "betareg")
+m = betareg(yield ~ batch + temp, data = GasolineYield)
+print(summary(m))
+
diff --git a/gasoline.txt b/gasoline.txt
@@ -0,0 +1,33 @@
+iyield	gravity	pressure	temp10	temp	batch
+0.122	50.8	8.6	190	205	1
+0.223	50.8	8.6	190	275	1
+0.347	50.8	8.6	190	345	1
+0.457	50.8	8.6	190	407	1
+0.08	40.8	3.5	210	218	2
+0.131	40.8	3.5	210	273	2
+0.266	40.8	3.5	210	347	2
+0.074	40	6.1	217	212	3
+0.182	40	6.1	217	272	3
+0.304	40	6.1	217	340	3
+0.069	38.4	6.1	220	235	4
+0.152	38.4	6.1	220	300	4
+0.26	38.4	6.1	220	365	4
+0.336	38.4	6.1	220	410	4
+0.144	40.3	4.8	231	307	5
+0.268	40.3	4.8	231	367	5
+0.349	40.3	4.8	231	395	5
+0.1	32.2	5.2	236	267	6
+0.248	32.2	5.2	236	360	6
+0.317	32.2	5.2	236	402	6
+0.028	41.3	1.8	267	235	7
+0.064	41.3	1.8	267	275	7
+0.161	41.3	1.8	267	358	7
+0.278	41.3	1.8	267	416	7
+0.05	38.1	1.2	274	285	8
+0.176	38.1	1.2	274	365	8
+0.321	38.1	1.2	274	444	8
+0.14	32.2	2.4	284	351	9
+0.232	32.2	2.4	284	424	9
+0.085	31.8	0.2	316	365	10
+0.147	31.8	0.2	316	379	10
+0.18	31.8	0.2	316	428	10