32 from requests.utils
import requote_uri
37 from cfsp_user
import *
38 from cfsp_globals
import *
40 from cfsp_util
import errorReqExit
51 """Representation of a specific cloudFPGA cluster"""
53 def __init__(self, user: cFuser, cluster_data):
56 self.
idid = cluster_data[
'cluster_id']
62 def post_cluster(user: cFuser, number_of_FPGA_nodes, role_image_id, host_address):
64 print(
"Creating FPGA cluster...")
69 rank0node = {
'image_id': __NON_FPGA_IDENTIFIER__,
71 'node_ip': host_address}
72 cluster_req.append(rank0node)
73 size = number_of_FPGA_nodes + 1
74 for i
in range(1, size):
76 'image_id':
str(role_image_id),
79 cluster_req.append(fpgaNode)
82 "http://" + __cf_manager_url__ +
"/clusters?{0}&dont_verify_memory=0".format(
83 user.get_auth_string(with_project=
True)),
84 json=cluster_req, timeout=__POST_CLUSTER_TIMEOUT__)
85 elapsed = time.time() - start
87 if r1.status_code != 200:
91 cluster_data = json.loads(r1.text)
92 print(
"Id of new cluster: {}".format(cluster_data[
'cluster_id']))
93 print(
"Time for POST cluster: \t{0}s\n".format(elapsed))
94 new_cluster =
cFcluster(user, cluster_data)
96 except requests.exceptions.Timeout
as e:
99 print(
"ERROR: Something went wrong with post_cluster request and it reached timeout="+
str(__POST_CLUSTER_TIMEOUT__)+
". Maybe retry or increase timeout value.\n")
104 print(
"Requesting cluster data for cluster_id={0} ...".format(cluster.get_id()))
108 "http://" + __cf_manager_url__ +
"/clusters/" +
str(cluster.get_id()) +
"?{0}".format(
109 cluster.user.get_auth_string()), timeout=__GET_CLUSTER_TIMEOUT__)
110 elapsed = time.time() - start
111 print(
"Time for GET cluster: \t{0}s\n".format(elapsed))
112 if r1.status_code != 200:
116 cluster_data = json.loads(r1.text)
118 cluster.cluster_data = cluster_data
120 except requests.exceptions.Timeout
as e:
123 print(
"ERROR: Something went wrong with get_cluster_data request and it reached timeout="+
str(__GET_CLUSTER_TIMEOUT__)+
". Maybe retry or increase timeout value.\n")
127 print(
"Requesting clusters data (limit="+
str(limit)+
")...")
131 "http://" + __cf_manager_url__ +
"/clusters" +
"?{0}&limit={1}".format(
132 user.get_auth_string(), limit), timeout=__GET_CLUSTER_TIMEOUT__)
133 elapsed = time.time() - start
134 print(r1.request.url)
135 print(
"Time for GET clusters: \t{0}s\n".format(elapsed))
136 if r1.status_code != 200:
139 clusters_data = json.loads(r1.text)
141 except requests.exceptions.Timeout
as e:
144 print(
"ERROR: Something went wrong with get_cluster request and it reached timeout="+
str(__GET_CLUSTER_TIMEOUT__)+
". Maybe retry or increase timeout value.\n")
148 print(
"Requesting delete cluster_id={0} ...".format(cluster.get_id()))
151 r1 = requests.delete(
152 "http://" + __cf_manager_url__ +
"/clusters/" +
str(cluster.get_id()) +
"?{0}".format(
153 cluster.user.get_auth_string()), timeout=__DELETE_CLUSTER_TIMEOUT__)
154 elapsed = time.time() - start
155 print(
"Time for DELETE cluster: \t{0}s\n".format(elapsed))
156 if r1.status_code != 204:
160 except requests.exceptions.Timeout
as e:
163 print(
"ERROR: Something went wrong with delete_cluster_data request and it reached timeout="+
str(__DELETE_CLUSTER_TIMEOUT__)+
". Maybe retry or increase timeout value.\n")
167 print(
"Requesting restart for (all) FPGA(s) of cluster_id={0} ...".format(cluster.get_id()))
171 "http://" + __cf_manager_url__ +
"/clusters/" +
str(cluster.get_id()) +
"/restart?{0}".format(
172 cluster.user.get_auth_string()))
173 elapsed = time.time() - start
174 print(
"Time for RESTART cluster: \t{0}s\n".format(elapsed))
176 if r1.status_code != 200:
178 return errorReqExit(
"PATCH cluster restart", r1.status_code)
179 print(r1.content.decode())
181 except Exception
as e:
182 print(
"ERROR: Failed to reset the FPGA(s) role(s)")
196 self.
idid = instance_data[
'instance_id']
202 print(
"Requesting instances data (limit="+
str(limit)+
")...")
206 "http://" + __cf_manager_url__ +
"/instances" +
"?{0}&limit={1}".format(
207 user.get_auth_string(), limit), timeout=__GET_INSTANCE_TIMEOUT__)
208 elapsed = time.time() - start
209 print(r1.request.url)
210 print(
"Time for GET instances: \t{0}s\n".format(elapsed))
211 if r1.status_code != 200:
214 instances_data = json.loads(r1.text)
215 return instances_data
216 except requests.exceptions.Timeout
as e:
219 print(
"ERROR: Something went wrong with get_instances request and it reached timeout="+
str(__GET_INSTANCE_TIMEOUT__)+
". Maybe retry or increase timeout value.\n")
226 print(
"Requesting instance data for instance_id={0} ...".format(instance.get_id()))
230 "http://" + __cf_manager_url__ +
"/instances/" +
str(instance.get_id()) +
"?{0}".format(
231 instance.user.get_auth_string()), timeout=__GET_INSTANCE_TIMEOUT__)
232 elapsed = time.time() - start
233 print(
"Time for GET instance: \t{0}s\n".format(elapsed))
234 if r1.status_code != 200:
238 instance_data = json.loads(r1.text)
240 instance.instance_data = instance_data
242 except requests.exceptions.Timeout
as e:
245 print(
"ERROR: Something went wrong with get_instance_data request and it reached timeout="+
str(__GET_INSTANCE_TIMEOUT__)+
". Maybe retry or increase timeout value.\n")
258 print(
"Requesting restart for instance_id={0} ...".format(instance.get_id()))
262 "http://" + __cf_manager_url__ +
"/instances/" +
str(instance.get_id()) +
"/app_restart?{0}".format(
263 instance.user.get_auth_string()))
264 elapsed = time.time() - start
265 print(
"Time for RESTART instance: \t{0}s\n".format(elapsed))
267 if r1.status_code != 200:
269 return errorReqExit(
"PATCH instance restart", r1.status_code)
270 print(r1.content.decode())
272 except Exception
as e:
273 print(
"ERROR: Failed to reset the FPGA role")
278 print(
"deleting instance {}".format(instance.id))
280 r1 = requests.delete(
281 "http://" + __cf_manager_url__ +
"/instances/{0}?{1}".format(instance.id, instance.user.get_auth_string()))
283 if r1.status_code > 204:
289 return r1.status_code
291 print(
"Instance {} removed".format(instance.id))
293 instance_data = r1.status_code
306 self.
idid = image_data[
'id']
334 print(
"Requesting resource status...")
336 "http://" + __cf_manager_url__ +
"/resources/" +
str(
337 resource_id) +
"/status/" +
"?{0}".format(admin.get_auth_string()))
339 if r1.status_code != 200:
341 return errorReqExit(
"GET resource status", r1.status_code)
343 resource_status = json.loads(r1.text)
344 return resource_status
352 "http://" + __cf_manager_url__ +
"/resources/{0}/status/?{1}&new_status={2}".format(
353 resource_id, admin.get_auth_string(), new_status))
355 if r1.status_code != 204:
357 return errorReqExit(
"PUT /resources/{resource_id}/status/", r1.status_code)
359 print(
"Resource {} set to {}".format(resource_id, new_status))
361 resource_data = r1.status_code
def __init__(self, cFuser user, cluster_data)
def __init__(self, cFuser user, image_data)
def __init__(self, cFuser user, instance_data)
def errorReqExit(msg, code)
def api_request_instance()
def get_cluster_data(cFcluster cluster)
def get_instance_data(cFinstance instance)
def delete_instance(cFinstance instance)
def get_instances_data(cFuser user, limit=100)
def post_cluster(cFuser user, number_of_FPGA_nodes, role_image_id, host_address)
def set_resource_status(resource_id, new_status, cFuser admin)
def get_image(cFimage image)
def delete_cluster_data(cFcluster cluster)
def get_images(cFuser user)
def restart_instance_app(cFinstance instance)
def get_resource_status(resource_id, cFuser admin)
Resources functions (admin only)
def restart_cluster_apps(cFcluster cluster)
def delete_image(cFimage image)
def get_clusters_data(cFuser user, limit=100)
def post_image(cFimage image)