代码如下
from pyhdfs import HdfsClient
client = HdfsClient(hosts='192.168.1.1:50070')
# TypeError: cannot use a string pattern on a bytes-like object
#从hdfs中读取文件
file = client.open("/data/movielens/train/ra.train")
#获取内容
content = file.read()
#open后,file是二进制,str()转换为字符串并转码
s = str(content, "utf-8")
#打开本地文件.csv 并写入内容
file = open("/home/data/data.csv", "w")
file.write(s)
#pandas读取本地csv文件
train_data = pd.read_csv("/home/data/data.csv", sep=self.sep, header=None, usecols=[0, 1, 2],
names=['user', 'item', 'rating'])