环境
pip 22.1.2,python 3.6.4
安装依赖
yum install cyrus-sasl-devel -y
yum install gcc-c++ -y
pip3 install sasl
pip3 install thrift
pip3 install thrift-sasl
pip3 install pyhive
Python使用pyhive连接Hive(含Kerberos)
from pyhive import sqlalchemy_hive,hive
from krbcontext import krbcontext
config = {
"kerberos_principal": "dgfl",
"keytab_file": '/root/dgfl.keytab',
"kerberos_ccache_file": '/tmp/krb5cc_0',
"AUTH_MECHANISM": "GSSAPI"
}
with krbcontext(using_keytab=True,
principal=config['kerberos_principal'],
keytab_file=config['keytab_file'],
ccache_file=config['kerberos_ccache_file']):
con = hive.connect(host='172.23.6.47',port=10000,auth='KERBEROS',kerberos_service_name="hive")
cursor = con.cursor()
cursor.execute('select * from tmp.tmp0616 limit 5')
datas = cursor.fetchall()
print(datas)
cursor.close()
con.close()
Python使用impala连接Hive(含Kerberos)
from krbcontext import krbcontext
from impala.dbapi import connect
with krbcontext(using_keytab=True,
principal='dgfl@DGFL.ORG',
keytab_file='/root/dgfl.keytab'):
conn = connect(host='172.23.3.52',
port=21050,
database='tmp',
auth_mechanism= 'GSSAPI',
kerberos_service_name='impala')
query="select * from tmp.tmp0616 limit 5"
cur = conn.cursor()
cur.execute(query)
res = cur.fetchall()
print(res)
Python使用JayDeBeApi连接Hive
# -*- coding: utf8 -*-
import jaydebeapi
import os
class Jdbc:
base_sql = [
"set hive.mapred.mode=nonstrict",
"set hive.strict.checks.cartesian.product=false",
"set hive.execution.engine=tez"
]
def query(self, sql, db = 'tap_ods'):
url = 'jdbc:hive2://xx.xx:10000/' + db
dirver = 'org.apache.hive.jdbc.HiveDriver'
DIR = os.getcwd() + '/lib/'
jarFile = [
DIR + 'hive-jdbc-3.1.1.jar',
DIR + 'commons-logging-1.2.jar',
DIR + 'hive-service-3.1.1.jar',
DIR + 'hive-service-rpc-3.1.1.jar',
DIR + 'libthrift-0.12.0.jar',
DIR + 'httpclient-4.5.9.jar',
DIR + 'httpcore-4.4.11.jar',
DIR + 'slf4j-api-1.7.26.jar',
DIR + 'curator-framework-4.2.0.jar',
DIR + 'curator-recipes-4.2.0.jar',
DIR + 'curator-client-4.2.0.jar',
DIR + 'commons-lang-2.6.jar',
DIR + 'hadoop-common-3.2.0.jar',
DIR + 'httpcore-4.4.11.jar',
DIR + 'hive-common-3.1.1.jar',
DIR + 'hive-serde-3.1.1.jar',
DIR + 'guava-28.0-jre.jar'
]
conn = jaydebeapi.connect(dirver, url, ['user', 'password'], jarFile)
curs = conn.cursor()
for _sql in self.base_sql:
curs.execute(_sql)
curs.execute(sql)
result = curs.fetchall()
curs.close()
conn.close()
return result
可能遇到的问题:
一、使用impala连接hive失败,报错如下:ModuleNotFoundError: No module named ‘impala.dbapi’
解决:安装以下几个包:pip install …
six
bit_array
thrift
thrift_sasl
pandas(不装应该也可以)
sqlalchemy
pytest(不装应该也可以)
impyla
二、缺少依赖包
解决:安装响应依赖包