2022-10-09 11:48:31 -04:00
|
|
|
import requests,time,json
|
|
|
|
ideal_delta = 60*5 #5 minutes
|
|
|
|
|
|
|
|
crawler_header = {'User-agent': 'interverse-crawler','info':'https://libresolutions.network/videos/interverse-demo-1/'}
|
|
|
|
schemes = ['http://','https://']
|
|
|
|
locations = [
|
|
|
|
'/.well-known/discover.json',
|
|
|
|
'/.well-known/interverse',
|
|
|
|
'/interverse.json',
|
|
|
|
'/discover.json'
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Cache:
|
|
|
|
def __init__(self,delta=None):
|
|
|
|
if delta==None:
|
|
|
|
self.delta = ideal_delta
|
|
|
|
else:
|
|
|
|
self.delta = delta
|
|
|
|
self.links={}
|
|
|
|
# link = key:{data,time}
|
|
|
|
|
|
|
|
|
|
|
|
def load_data(self,url):
|
|
|
|
data = None
|
|
|
|
t = time.time()
|
|
|
|
if url in self.links:
|
|
|
|
if t - self.links[url]['time'] <= self.delta:
|
|
|
|
print(f"Using cached result for {url}")
|
|
|
|
return self.links[url]['data']
|
|
|
|
for s in schemes:
|
|
|
|
for l in locations:
|
|
|
|
try:
|
|
|
|
data = requests.get(s+url+l,headers=crawler_header,timeout=3).json()
|
|
|
|
if l.find('discover'):
|
|
|
|
#translate discover to interverse
|
|
|
|
data = json.loads(json.dumps(data).replace("preview_connections","connection_groups"))
|
|
|
|
print(f"Interverse connection found at {l}")
|
|
|
|
t = time.time()
|
|
|
|
self.links[url] = {
|
|
|
|
'time':t,
|
|
|
|
'data':data,
|
|
|
|
}
|
|
|
|
|
|
|
|
return data
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
if data != None:
|
|
|
|
t = time.time()
|
|
|
|
self.links[url] = {
|
|
|
|
'time':t,
|
|
|
|
'data':data,
|
|
|
|
}
|
|
|
|
|
|
|
|
if data == None:
|
|
|
|
#If no data is returned, wait longer before attempting again
|
|
|
|
self.links[url] = {
|
|
|
|
'data':None,
|
|
|
|
'time':t+ideal_delta
|
|
|
|
}
|
|
|
|
return data
|
2022-10-10 04:22:04 -04:00
|
|
|
def get_interverse_data(self,url):
|
|
|
|
origin = self.load_data(url)
|
|
|
|
connections = {}
|
|
|
|
for con in origin['connections']:
|
|
|
|
dat = self.load_data(con.replace('https://','').replace('http://','').replace("/",''))
|
|
|
|
if dat != None:
|
|
|
|
connections[con] = dat
|
|
|
|
for g in origin['connection_groups']:
|
|
|
|
for con in origin['connection_groups'][g]:
|
|
|
|
dat = self.load_data(con.replace('https://','').replace('http://','').replace("/",''))
|
|
|
|
if dat != None:
|
|
|
|
connections[con] = dat
|
|
|
|
return {
|
|
|
|
'main':origin,
|
|
|
|
'connections':connections
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
c = Cache()
|
|
|
|
|
|
|
|
|