import os import sys BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, os.path.join(BASE_DIR)) from server.utils import HBaseUtils from server import pool, redis_client from datetime import datetime import logging logger = logging.getLogger('recommend') class ReadRecall(object): """召回结果读取服务 """ def __init__(self): self.client = redis_client self.hbu = HBaseUtils(pool) def read_hbase_recall_data(self, table_name, key_format, column_format): """ 读取用户的指定频道召回结果 :param table_name: :param key_format: :param column_format: :return: """ recall_list = [] try: data = self.hbu.get_table_cells(table_name, key_format, column_format) # 清空上一次离线或者在线存储的召回结果 except Exception as e: logger.warning("{} WARN read {} recall exception:{}".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), table_name, e)) data = [] # data是多个召回结果 for _ in data: recall_list = list(set(recall_list).union(set(eval(_)))) return recall_list def read_redis_new_article(self, channel_id): """读取指定频道的新文章召回结果 :param channel_id: :return: """ logger.warning("{} WARN read channel {} redis new article".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), channel_id)) _key = "ch:{}:new".format(channel_id) try: res = self.client.zrevrange(_key, 0, -1) except Exception as e: logger.warning("{} WARN read new article exception:{}".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), e)) res = [] return list(map(int, res)) def read_redis_hot_article(self, channel_id, hot_num): """ 读取新闻章召回结果 :param channel_id: 提供频道 :return: """ logger.warning("{} WARN read channel {} redis hot article".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), channel_id)) _key = "ch:{}:hot".format(channel_id) try: res = self.client.zrevrange(_key, 0, -1) except Exception as e: logger.warning( "{} WARN read new article exception:{}".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), e)) res = [] # 由于每个频道的热门文章有很多,因为保留文章点击次数 res = list(map(int, res)) if len(res) > hot_num: res = res[:hot_num] return res def read_hbase_article_similar(self, table_name, key_format, article_num): """获取文章相似结果 :param article_id: 文章id :param article_num: 文章数量 :return: """ # 第一种表结构方式测试: # create 'article_similar', 'similar' # put 'article_similar', '1', 'similar:1', 0.2 # put 'article_similar', '1', 'similar:2', 0.34 try: _dic = self.hbu.get_table_row(table_name, key_format) res = [] _srt = sorted(_dic.items(), key=lambda obj: obj[1], reverse=True) if len(_srt) > article_num: _srt = _srt[:article_num] for _ in _srt: res.append(int(_[0].decode().split(':')[1])) except Exception as e: logger.error( "{} ERROR read similar article exception: {}".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), e)) res = [] return res if __name__ == '__main__': rr = ReadRecall() # print(rr.read_hbase_recall_data('cb_recall', b'recall:user:1114864874141253632', b'als:18')) # print(rr.read_redis_new_article(18)) print(rr.read_redis_hot_article(18, 50))