LabelEncoder
import numpyas npimport pandasas pdfrom sklearn.preprocessingimport LabelEncoderdefloaddata():
columns=['sepal_length','speal_width','petal_length','petal_width','type']
data= pd.read_csv('data/iris.data', header=None, names=columns)
data= data.values
X= data[:,:-1]
y= data[:,-1]return X, yif __name__=='__main__':
X, y= loaddata()
label= np.unique(y)
le= LabelEncoder()
le.fit(label)
y= le.transform(y)print(y)
pd.Catrgorical(series).codes
import numpyas npimport pandasas pddefloaddata():
columns=['sepal_length','speal_width','petal_length','petal_width','type']
data= pd.read_csv('data/iris.data', header=None, names=columns)
data['type']= pd.Categorical(data['type']).codes
data= data.values
X= data[:,:-1]
y= data[:,-1]return X, yif __name__=='__main__':
X, y= loaddata()print(y)
字典映射
import numpyas npimport pandasas pddefloaddata():
columns=['sepal_length','speal_width','petal_length','petal_width','type']
data= pd.read_csv('iris.data', header=None, names=columns)
data['type']= data['type'].map({'Iris-setosa':0,'Iris-versicolor':1,'Iris-virginica':2}).astype(int)
data= data.values
X= data[:,:-1]
y= data[:,-1]return X, yif __name__=='__main__':
X, y= loaddata()print(y)
[0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.2.]