spacepaste

  1.  
  2. import pandas as pd #version -.23.0
  3. import numpy as np #version 1.14.3
  4. def dummy_prep(data, method=None):
  5. varlist = data.columns[(data.dtypes == 'category').values]
  6. if not method:
  7. return pd.get_dummies(data.loc[:,data.dtypes == 'category'])
  8. if method == 'drop_first':
  9. return pd.get_dummies(data.loc[:,data.dtypes == 'category'], drop_first=True)
  10. if method == 'deviation':
  11. dummies = pd.get_dummies(data.loc[:,data.dtypes == 'category'])
  12. dummylist = {i:[x for x in dummies.columns if i in x] for i in varlist}
  13. for var in dummylist:
  14. dropout = dummylist[var][0]
  15. keepers = dummylist[var][1:]
  16. dummies.loc[dummies[dropout]==1, keepers] = -1
  17. del dummies[dropout]
  18. return dummies
  19. test1 = pd.DataFrame()
  20. test1['cat2'] = pd.Categorical(np.random.randint(low=0, high=2, size=100))
  21. test1['cat3'] = pd.Categorical(np.random.randint(low=0, high=3, size=100))
  22. test1['cat4'] = pd.Categorical(np.random.randint(low=0, high=4, size=100))
  23. print(test1.groupby('cat4').cat3.count())
  24. print(test1.head())
  25. dummy_prep(test1[['cat4','cat3','cat2']], method='deviation').head()
  26. ####################### OUTPUT#########################
  27. # https://imgur.com/a/o8mSqDC
  28.