119 lines
3.5 KiB
Python
119 lines
3.5 KiB
Python
import requests
|
|
import time,json,os,random
|
|
from joblib import Parallel, delayed
|
|
|
|
ideal_delta = 60 * 15 # 15 minutes
|
|
request_timeout = 0.
|
|
cache_origin = 'https://libresolutions.network'
|
|
|
|
|
|
|
|
crawler_header = {'User-agent': 'interverse-crawler',
|
|
'info': 'https://libresolutions.network/videos/interverse-demo-1/'}
|
|
schemes = ['https://']
|
|
locations = [
|
|
'/.well-known/discover.json',
|
|
'/.well-known/interverse',
|
|
'/interverse.json'
|
|
]
|
|
|
|
|
|
def dictify(lst):
|
|
dat = {}
|
|
for i in lst:
|
|
if i != None:
|
|
dat[i['location']]=i
|
|
return dat
|
|
|
|
class Cache:
|
|
def __init__(self, delta=None):
|
|
if delta == None:
|
|
self.delta = ideal_delta
|
|
else:
|
|
self.delta = delta
|
|
self.links = {}
|
|
self.build_cache()
|
|
# link = key:{data,time}
|
|
|
|
def load_data(self, url):
|
|
print(f"Loading interverse data for :{url}")
|
|
data = None
|
|
t = time.time()
|
|
if url in self.links:
|
|
if t - self.links[url]['time'] <= self.delta:
|
|
print(f"Using cached result for {url}")
|
|
return self.links[url]['data']
|
|
for s in schemes:
|
|
for l in locations:
|
|
try:
|
|
data = requests.get(
|
|
s+url.replace(
|
|
'https://', '').replace('http://', '').replace("/", '')+l, headers=crawler_header, timeout=1).json()
|
|
if l.find('discover'):
|
|
# translate discover to interverse
|
|
data = json.loads(json.dumps(data).replace(
|
|
"preview_connections", "connection_groups"))
|
|
print(f"Interverse connection found at {l}")
|
|
t = time.time()
|
|
self.links[url] = {
|
|
'time': t,
|
|
'data': data,
|
|
}
|
|
return data
|
|
except:
|
|
pass
|
|
if data != None:
|
|
t = time.time()
|
|
self.links[url] = {
|
|
'time': t+ideal_delta,
|
|
'data': data,
|
|
}
|
|
|
|
if data == None:
|
|
# If no data is returned, wait longer before attempting again
|
|
self.links[url] = {
|
|
'data': None,
|
|
'time': t + (60*60) * random.randint(8,16)
|
|
}
|
|
return data
|
|
|
|
def get_interverse_data(self, url):
|
|
origin = self.load_data(url)
|
|
connections = []
|
|
try:
|
|
for con in origin['connections']:
|
|
connections.append(con)
|
|
except:
|
|
pass
|
|
try:
|
|
for g in origin['connection_groups']:
|
|
for con in origin['connection_groups'][g]:
|
|
connections.append(con)
|
|
except:
|
|
pass
|
|
c = Parallel()(delayed(self.load_data)(i) for i in connections)
|
|
return{
|
|
'main': origin,
|
|
'connections': dictify(c)
|
|
}
|
|
|
|
def build_cache(self):
|
|
print("Building cache..\nThis may take some time")
|
|
origin = self.load_data(cache_origin)
|
|
connections = []
|
|
try:
|
|
for con in origin['connections']:
|
|
connections.append(con)
|
|
except:
|
|
pass
|
|
try:
|
|
for g in origin['connection_groups']:
|
|
for con in origin['connection_groups'][g]:
|
|
connections.append(con)
|
|
except:
|
|
pass
|
|
c = Parallel()(delayed(self.get_interverse_data)(i) for i in connections)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
cache = Cache()
|