while True:
print("Getting page {}...".format(page))
params["page"] = page
data = requests.post(url, json=params).json()
# # uncomment to see all data:
# # print(json.dumps(data, indent=4))
if not data["data"]:
break
# # print some data:
for d in data["data"]:
print("{:<60} {} {}".format(d["mobile_title"], d["assets"], d["price"]))
page += 1
I'm in the process of coding to organize the data after crawling.
Currently, when coding like this, the results are listed as below for each page I want to collect these results into a data frame, but what do I do? I'm going to code it by collecting it into a data frame and saving it as an Excel file with to_csv.
<Result value>
Getting page 1...
1 2 3
1 2 3
1 2 3
Getting page 2...
1 2 3
1 2 3
1 2 3
Each crawled content is made into a dictionary format, and it is collected on the list.
In addition, a data frame is created by passing the list of dictionaries collected last to the generators of pd.DataFrame.
data_dict_list = []
while True:
print("Getting page {}...".format(page))
params["page"] = page
data = requests.post(url, json=params).json()
# # uncomment to see all data:
# # print(json.dumps(data, indent=4))
if not data["data"]:
break
# # print some data:
for d in data["data"]:
data_dict_list.append({ "mobile_title":d["mobile_title"], "assets":d["assets"], "price":d["price"]})
#print("{:<60} {} {}".format(d["mobile_title"], d["assets"], d["price"]))
page += 1
df = pd.DataFrame(data_dict_list)
df.to_csv ("Crawling Collection Results"csv", encoding="utf-8", index=False)
© 2024 OneMinuteCode. All rights reserved.