interverse/server/simple_cache.py

120 lines
3.5 KiB
Python
Raw Normal View History

2022-10-10 07:04:14 -04:00
import requests
import time,json,os
from joblib import Parallel, delayed
2022-10-10 07:04:14 -04:00
ideal_delta = 60 * 15 # 15 minutes
request_timeout = 0.
cache_origin = 'https://libresolutions.network'
crawler_header = {'User-agent': 'interverse-crawler',
'info': 'https://libresolutions.network/videos/interverse-demo-1/'}
schemes = ['https://']
locations = [
'/.well-known/discover.json',
'/.well-known/interverse',
2022-10-10 07:04:14 -04:00
'/interverse.json'
]
2022-10-10 07:04:14 -04:00
def dictify(lst):
dat = {}
for i in lst:
if i != None:
dat[i['location']]=i
return dat
class Cache:
2022-10-10 07:04:14 -04:00
def __init__(self, delta=None):
if delta == None:
self.delta = ideal_delta
else:
self.delta = delta
2022-10-10 07:04:14 -04:00
self.links = {}
self.build_cache()
# link = key:{data,time}
2022-10-10 07:04:14 -04:00
def load_data(self, url):
print(f"Loading interverse data for :{url}")
data = None
t = time.time()
if url in self.links:
if t - self.links[url]['time'] <= self.delta:
print(f"Using cached result for {url}")
return self.links[url]['data']
for s in schemes:
for l in locations:
try:
2022-10-10 07:04:14 -04:00
data = requests.get(
s+url.replace(
'https://', '').replace('http://', '').replace("/", '')+l, headers=crawler_header, timeout=1).json()
if l.find('discover'):
2022-10-10 07:04:14 -04:00
# translate discover to interverse
data = json.loads(json.dumps(data).replace(
"preview_connections", "connection_groups"))
print(f"Interverse connection found at {l}")
2022-10-10 07:04:14 -04:00
t = time.time()
self.links[url] = {
2022-10-10 07:04:14 -04:00
'time': t,
'data': data,
}
return data
except:
pass
if data != None:
2022-10-10 07:04:14 -04:00
t = time.time()
self.links[url] = {
2022-10-10 07:04:14 -04:00
'time': t+ideal_delta,
'data': data,
}
2022-10-10 07:04:14 -04:00
if data == None:
2022-10-10 07:04:14 -04:00
# If no data is returned, wait longer before attempting again
self.links[url] = {
2022-10-10 07:04:14 -04:00
'data': None,
'time': t+60*60 #1 hour
}
return data
2022-10-10 07:04:14 -04:00
def get_interverse_data(self, url):
2022-10-10 04:22:04 -04:00
origin = self.load_data(url)
2022-10-10 07:04:14 -04:00
connections = []
try:
for con in origin['connections']:
connections.append(con)
except:
pass
try:
for g in origin['connection_groups']:
for con in origin['connection_groups'][g]:
connections.append(con)
except:
pass
c = Parallel()(delayed(self.load_data)(i) for i in connections)
return{
'main': origin,
'connections': dictify(c)
2022-10-10 04:22:04 -04:00
}
2022-10-10 07:04:14 -04:00
def build_cache(self):
print("Building cache..\nThis may take some time")
origin = self.load_data(cache_origin)
connections = []
try:
for con in origin['connections']:
connections.append(con)
except:
pass
try:
for g in origin['connection_groups']:
for con in origin['connection_groups'][g]:
connections.append(con)
except:
pass
c = Parallel()(delayed(self.get_interverse_data)(i) for i in connections)
2022-10-10 04:22:04 -04:00
2022-10-10 07:04:14 -04:00
if __name__ == '__main__':
cache = Cache()