Jun-02-2020, 05:49 PM
I'm trying to scrape data from a game's market and use the data to track it.
So I have the scraper log in, it goes and pulls some items on the market and stores them in a table.
Now I'm trying to have it use those stored items in the table, to compare it to similar items being sold on the market. I have it so after it stores each item in the table, it goes and checks for the item id on each item, then looks them up.
When it goes to look them up again, it is making me log into the game again. Is this because it isn't passing cookies? Also, am I doing all this correctly? I want it to be optimized as best I can.
Here is my code:
So I have the scraper log in, it goes and pulls some items on the market and stores them in a table.
Now I'm trying to have it use those stored items in the table, to compare it to similar items being sold on the market. I have it so after it stores each item in the table, it goes and checks for the item id on each item, then looks them up.
When it goes to look them up again, it is making me log into the game again. Is this because it isn't passing cookies? Also, am I doing all this correctly? I want it to be optimized as best I can.
Here is my code:
from bs4 import BeautifulSoup
from lxml import html
import requests
import re
from collections import defaultdict
# Start the session
session = requests.Session()
market_items = defaultdict(dict)
compare_prices = defaultdict(dict)
# Create the payload
username = "EMAIL"
password = "PASSWORD"
authenticity_token = 0
LOGIN_URL = "https://web.simple-mmo.com/login"
URL = "https://web.simple-mmo.com/market/collectables/all"
PriceURL = "https://web.simple-mmo.com/market/all/all"
def findAveragePrice():
session_requests = requests.session()
key_items = market_items.items()
t = len(key_items)
for key in market_items:
lookID = market_items[key]["ID"]
print(lookID)
payload = {
"itemid": lookID,
"new_page": "true",
"_token": authenticity_token
}
result = session_requests.get(PriceURL, data = payload, headers = dict(referer = PriceURL))
soup = BeautifulSoup(result.content, 'html.parser')
print(result.text)
#print(market_items[key]["PRICE"])
pricematch = soup.find_all('div', class_='individual-item')
print(pricematch)
for match in pricematch:
x = match.find('a')['onclick']
title = x.split("retrieveMarketItem(")[1].strip().split(')')[0]
ITEMID = title.split(",")[0].lstrip()
RdmNum = title.split(",'")[1].lstrip().split("'")[0]
price = title.split(" '")[1].lstrip().split("',")[0]
player = title.split(" '")[2].lstrip().split("'")[0]
time = title.split(" '")[3].lstrip().split("'")[0]
compare_prices[key]["PRICE"]
print(compare_prices)
def main():
session_requests = requests.session()
result = session_requests.get(LOGIN_URL)
tree = html.fromstring(result.text)
authenticity_token = list(set(tree.xpath("//input[@name='_token']/@value")))[0]
payload = {
"email": username,
"password": password,
"_token": authenticity_token
}
result = session_requests.post(LOGIN_URL, data = payload, headers = dict(referer = LOGIN_URL))
result = session_requests.get(URL, headers = dict(referer = URL))
tree = html.fromstring(result.content)
bucket_names = tree.xpath("//div[@class='individual-item']/span/text()")
soup = BeautifulSoup(result.content, 'html.parser')
#print(soup)
collectables = soup.find_all('div', class_='individual-item')
for collectable in collectables:
x = collectable.find('a')['onclick']
#print(x)
title = x.split("retrieveMarketItem(")[1].strip().split(')')[0]
ITEMID = title.split(",")[0].lstrip()
RdmNum = title.split(",'")[1].lstrip().split("'")[0]
price = title.split(" '")[1].lstrip().split("',")[0]
player = title.split(" '")[2].lstrip().split("'")[0]
time = title.split(" '")[3].lstrip().split("'")[0]
#print(title)
#print("ID: " + ITEMID)
#print("UniqueID: " + RdmNum)
#print("PRICE: " + price)
#print("PLAYER: " + player)
#print("TIME: " + time)
#market_items["UniqueID"] = RdmNum
market_items[RdmNum]["ID"] = ITEMID
market_items[RdmNum]["PRICE"] = price
market_items[RdmNum]["SELLER"] = player
market_items[RdmNum]["TIME"] = time
#print(market_items)
findAveragePrice()
if __name__ == '__main__':
main()
So right now, all I'm getting returned is the login page. I think its because cookies aren't being passed through? But Im not sure how to fix that.
