import requests,time,json ideal_delta = 60*5 #5 minutes crawler_header = {'User-agent': 'interverse-crawler','info':'https://libresolutions.network/videos/interverse-demo-1/'} schemes = ['http://','https://'] locations = [ '/.well-known/discover.json', '/.well-known/interverse', '/interverse.json', '/discover.json' ] class Cache: def __init__(self,delta=None): if delta==None: self.delta = ideal_delta else: self.delta = delta self.links={} # link = key:{data,time} def load_data(self,url): data = None t = time.time() if url in self.links: if t - self.links[url]['time'] <= self.delta: print(f"Using cached result for {url}") return self.links[url]['data'] for s in schemes: for l in locations: try: data = requests.get(s+url+l,headers=crawler_header,timeout=3).json() if l.find('discover'): #translate discover to interverse data = json.loads(json.dumps(data).replace("preview_connections","connection_groups")) print(f"Interverse connection found at {l}") t = time.time() self.links[url] = { 'time':t, 'data':data, } return data except: pass if data != None: t = time.time() self.links[url] = { 'time':t, 'data':data, } if data == None: #If no data is returned, wait longer before attempting again self.links[url] = { 'data':None, 'time':t+ideal_delta } return data def get_interverse_data(self,url): origin = self.load_data(url) connections = {} for con in origin['connections']: dat = self.load_data(con.replace('https://','').replace('http://','').replace("/",'')) if dat != None: connections[con] = dat for g in origin['connection_groups']: for con in origin['connection_groups'][g]: dat = self.load_data(con.replace('https://','').replace('http://','').replace("/",'')) if dat != None: connections[con] = dat return { 'main':origin, 'connections':connections } if __name__ == '__main__': c = Cache()