You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

120 lines
3.5 KiB
Python

import requests
import time,json,os,random
from joblib import Parallel, delayed
ideal_delta = 60 * 15 # 15 minutes
request_timeout = 0.
cache_origin = 'https://libresolutions.network'
crawler_header = {'User-agent': 'interverse-crawler',
'info': 'https://libresolutions.network/videos/interverse-demo-1/'}
schemes = ['https://']
locations = [
'/.well-known/discover.json',
'/.well-known/interverse',
'/interverse.json'
]
def dictify(lst):
dat = {}
for i in lst:
if i != None:
dat[i['location']]=i
return dat
class Cache:
def __init__(self, delta=None):
if delta == None:
self.delta = ideal_delta
else:
self.delta = delta
self.links = {}
self.build_cache()
# link = key:{data,time}
def load_data(self, url):
print(f"Loading interverse data for :{url}")
data = None
t = time.time()
if url in self.links:
if t - self.links[url]['time'] <= self.delta:
print(f"Using cached result for {url}")
return self.links[url]['data']
for s in schemes:
for l in locations:
try:
data = requests.get(
s+url.replace(
'https://', '').replace('http://', '').replace("/", '')+l, headers=crawler_header, timeout=1).json()
if l.find('discover'):
# translate discover to interverse
data = json.loads(json.dumps(data).replace(
"preview_connections", "connection_groups"))
print(f"Interverse connection found at {l}")
t = time.time()
self.links[url] = {
'time': t,
'data': data,
}
return data
except:
pass
if data != None:
t = time.time()
self.links[url] = {
'time': t+ideal_delta,
'data': data,
}
if data == None:
# If no data is returned, wait longer before attempting again
self.links[url] = {
'data': None,
'time': t + (60*60) * random.randint(8,16)
}
return data
def get_interverse_data(self, url):
origin = self.load_data(url)
connections = []
try:
for con in origin['connections']:
connections.append(con)
except:
pass
try:
for g in origin['connection_groups']:
for con in origin['connection_groups'][g]:
connections.append(con)
except:
pass
c = Parallel()(delayed(self.load_data)(i) for i in connections)
return{
'main': origin,
'connections': dictify(c)
}
def build_cache(self):
print("Building cache..\nThis may take some time")
origin = self.load_data(cache_origin)
connections = []
try:
for con in origin['connections']:
connections.append(con)
except:
pass
try:
for g in origin['connection_groups']:
for con in origin['connection_groups'][g]:
connections.append(con)
except:
pass
c = Parallel()(delayed(self.get_interverse_data)(i) for i in connections)
if __name__ == '__main__':
cache = Cache()