1、引入相关库
import numpy as np
import json
import csv
import pandas as pd
2、从json数据集里获取数据
with open('E:/database/Amson/Toys/Toys_and_Games_5.json','r')as f:
data = f.readlines()
# 解析 JSON 数据
json_data = [json.loads(line) for line in data]
# 现在,json_data 是一个包含所有 JSON 对象的列表
# 可以按照需要访问和处理这些对象
click_v = []
click_list = []
user_ids = set()
item_ids = set()
user_count = 0
item_count = 0
rate_count = 0
for obj in json_data:
uid = obj['reviewerID']
iid = obj['asin']
rating = obj['overall']
review = obj.get('reviewText','')
user_ids.add(uid)
item_ids.add(iid)
if rating > rate_count:
rate_count = rating
click_v.append([uid,iid,rating,review])
click_list.append([uid,iid,rating])
# 打印提取的数据
user_count = len(user_ids)
item_count = len(item_ids)
print(user_count,item_count,rate_count)
#print(click_v)
print(click_list[:10])
输出结果:

3、创建用户ID和物品ID的映射字典,将字符串ID替换为数字索引
# 创建用户ID和物品ID的映射字典
user_id_map = {uid: i for i, uid in enumerate(set(row[0] for row in click_v))}
item_id_map = {iid: i for i, iid in enumerate(set(row[1] for row in click_v))}
# 将字符串ID替换为数字索引
for row in click_v:
row[0] = user_id_map[row[0]]
row[1] = item_id_map[row[1]]
print(click_v[:10])
结果:

4、将数据存入csv文件里
file_name = 'E:/database/Amson/Toys/Toys_and_Games_5.csv'
header = ['uid','iid','rating','review']
# 使用 csv 模块写入数据到 CSV 文件
with open(file_name, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
# 写入表头
writer.writerow(header)
# 写入数据
writer.writerows(click_v)
5、查看该csv文件里的内容
data = pd.read_csv('E:/database/Amson/Toys/Toys_and_Games_5.csv')
data.head()
结果:

文章讲述了如何使用Python的numpy、json、csv和pandas库,从一个JSON数据集中提取用户和物品评分,创建映射字典,然后将数据转换并存储到CSV文件中。
1602

被折叠的 条评论
为什么被折叠?



