2
0
mirror of https://github.com/xcat2/confluent.git synced 2024-11-28 20:39:40 +00:00

Make syncfile step robust or pause

If syncfiles fails, keep it retrying.

Also, slow down sync checking to avoid hammering the system.

Further, randomized delay to spread highly synchronized requestors.

Block attempts to do multiple concurrent syncfile runs.
This commit is contained in:
Jarrod Johnson 2024-04-09 11:07:11 -04:00
parent a4e152c17d
commit f68f9f4693
13 changed files with 176 additions and 10 deletions

View File

@ -1,4 +1,5 @@
#!/usr/bin/python
import time
import importlib
import tempfile
import json
@ -223,6 +224,7 @@ def synchronize():
if status == 202:
lastrsp = ''
while status != 204:
time.sleep(2)
status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles')
if not isinstance(rsp, str):
rsp = rsp.decode('utf8')

View File

@ -5,6 +5,7 @@ import json
import os
import shutil
import pwd
import time
import grp
try:
from importlib.machinery import SourceFileLoader
@ -223,6 +224,7 @@ def synchronize():
if status == 202:
lastrsp = ''
while status != 204:
time.sleep(2)
status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles')
if not isinstance(rsp, str):
rsp = rsp.decode('utf8')

View File

@ -1,4 +1,6 @@
#!/usr/bin/python3
import random
import time
import subprocess
import importlib
import tempfile
@ -227,9 +229,14 @@ def synchronize():
myips.append(addr)
data = json.dumps({'merge': tmpdir, 'appendonce': appendoncedir, 'myips': myips})
status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data)
if status >= 300:
sys.stderr.write("Error starting syncfiles - {}:\n".format(status))
sys.stderr.write(repr(rsp))
return status
if status == 202:
lastrsp = ''
while status != 204:
time.sleep(1+(2*random.random(a)))
status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles')
if not isinstance(rsp, str):
rsp = rsp.decode('utf8')
@ -277,10 +284,19 @@ def synchronize():
os.chmod(fname, int(opts[fname][opt], 8))
if uid != -1 or gid != -1:
os.chown(fname, uid, gid)
return status
finally:
shutil.rmtree(tmpdir)
shutil.rmtree(appendoncedir)
if __name__ == '__main__':
synchronize()
status = 202
while status not in (204, 200):
try:
status = synchronize()
except Exception as e:
sys.stderr.write(str(e))
status = 300
if status not in (204, 200):
time.sleep((random.random()*3)+2)

View File

@ -1,4 +1,6 @@
#!/usr/bin/python3
import random
import time
import subprocess
import importlib
import tempfile
@ -227,9 +229,14 @@ def synchronize():
myips.append(addr)
data = json.dumps({'merge': tmpdir, 'appendonce': appendoncedir, 'myips': myips})
status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data)
if status >= 300:
sys.stderr.write("Error starting syncfiles - {}:\n".format(status))
sys.stderr.write(repr(rsp))
return status
if status == 202:
lastrsp = ''
while status != 204:
time.sleep(1+(2*random.random(a)))
status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles')
if not isinstance(rsp, str):
rsp = rsp.decode('utf8')
@ -277,10 +284,19 @@ def synchronize():
os.chmod(fname, int(opts[fname][opt], 8))
if uid != -1 or gid != -1:
os.chown(fname, uid, gid)
return status
finally:
shutil.rmtree(tmpdir)
shutil.rmtree(appendoncedir)
if __name__ == '__main__':
synchronize()
status = 202
while status not in (204, 200):
try:
status = synchronize()
except Exception as e:
sys.stderr.write(str(e))
status = 300
if status not in (204, 200):
time.sleep((random.random()*3)+2)

View File

@ -1,4 +1,6 @@
#!/usr/bin/python3
import random
import time
import subprocess
import importlib
import tempfile
@ -227,9 +229,14 @@ def synchronize():
myips.append(addr)
data = json.dumps({'merge': tmpdir, 'appendonce': appendoncedir, 'myips': myips})
status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data)
if status >= 300:
sys.stderr.write("Error starting syncfiles - {}:\n".format(status))
sys.stderr.write(repr(rsp))
return status
if status == 202:
lastrsp = ''
while status != 204:
time.sleep(1+(2*random.random(a)))
status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles')
if not isinstance(rsp, str):
rsp = rsp.decode('utf8')
@ -277,10 +284,19 @@ def synchronize():
os.chmod(fname, int(opts[fname][opt], 8))
if uid != -1 or gid != -1:
os.chown(fname, uid, gid)
return status
finally:
shutil.rmtree(tmpdir)
shutil.rmtree(appendoncedir)
if __name__ == '__main__':
synchronize()
status = 202
while status not in (204, 200):
try:
status = synchronize()
except Exception as e:
sys.stderr.write(str(e))
status = 300
if status not in (204, 200):
time.sleep((random.random()*3)+2)

View File

@ -1,4 +1,6 @@
#!/usr/bin/python3
import random
import time
import subprocess
import importlib
import tempfile
@ -227,9 +229,14 @@ def synchronize():
myips.append(addr)
data = json.dumps({'merge': tmpdir, 'appendonce': appendoncedir, 'myips': myips})
status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data)
if status >= 300:
sys.stderr.write("Error starting syncfiles - {}:\n".format(status))
sys.stderr.write(repr(rsp))
return status
if status == 202:
lastrsp = ''
while status != 204:
time.sleep(1+(2*random.random(a)))
status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles')
if not isinstance(rsp, str):
rsp = rsp.decode('utf8')
@ -277,10 +284,19 @@ def synchronize():
os.chmod(fname, int(opts[fname][opt], 8))
if uid != -1 or gid != -1:
os.chown(fname, uid, gid)
return status
finally:
shutil.rmtree(tmpdir)
shutil.rmtree(appendoncedir)
if __name__ == '__main__':
synchronize()
status = 202
while status not in (204, 200):
try:
status = synchronize()
except Exception as e:
sys.stderr.write(str(e))
status = 300
if status not in (204, 200):
time.sleep((random.random()*3)+2)

View File

@ -1,4 +1,6 @@
#!/usr/bin/python3
import random
import time
import subprocess
import importlib
import tempfile
@ -227,9 +229,14 @@ def synchronize():
myips.append(addr)
data = json.dumps({'merge': tmpdir, 'appendonce': appendoncedir, 'myips': myips})
status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data)
if status >= 300:
sys.stderr.write("Error starting syncfiles - {}:\n".format(status))
sys.stderr.write(repr(rsp))
return status
if status == 202:
lastrsp = ''
while status != 204:
time.sleep(1+(2*random.random(a)))
status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles')
if not isinstance(rsp, str):
rsp = rsp.decode('utf8')
@ -277,10 +284,19 @@ def synchronize():
os.chmod(fname, int(opts[fname][opt], 8))
if uid != -1 or gid != -1:
os.chown(fname, uid, gid)
return status
finally:
shutil.rmtree(tmpdir)
shutil.rmtree(appendoncedir)
if __name__ == '__main__':
synchronize()
status = 202
while status not in (204, 200):
try:
status = synchronize()
except Exception as e:
sys.stderr.write(str(e))
status = 300
if status not in (204, 200):
time.sleep((random.random()*3)+2)

View File

@ -1,4 +1,6 @@
#!/usr/bin/python3
import random
import time
import subprocess
import importlib
import tempfile
@ -227,9 +229,14 @@ def synchronize():
myips.append(addr)
data = json.dumps({'merge': tmpdir, 'appendonce': appendoncedir, 'myips': myips})
status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data)
if status >= 300:
sys.stderr.write("Error starting syncfiles - {}:\n".format(status))
sys.stderr.write(repr(rsp))
return status
if status == 202:
lastrsp = ''
while status != 204:
time.sleep(1+(2*random.random(a)))
status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles')
if not isinstance(rsp, str):
rsp = rsp.decode('utf8')
@ -277,10 +284,19 @@ def synchronize():
os.chmod(fname, int(opts[fname][opt], 8))
if uid != -1 or gid != -1:
os.chown(fname, uid, gid)
return status
finally:
shutil.rmtree(tmpdir)
shutil.rmtree(appendoncedir)
if __name__ == '__main__':
synchronize()
status = 202
while status not in (204, 200):
try:
status = synchronize()
except Exception as e:
sys.stderr.write(str(e))
status = 300
if status not in (204, 200):
time.sleep((random.random()*3)+2)

View File

@ -1,4 +1,6 @@
#!/usr/bin/python3
import random
import time
import subprocess
import importlib
import tempfile
@ -227,9 +229,14 @@ def synchronize():
myips.append(addr)
data = json.dumps({'merge': tmpdir, 'appendonce': appendoncedir, 'myips': myips})
status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data)
if status >= 300:
sys.stderr.write("Error starting syncfiles - {}:\n".format(status))
sys.stderr.write(repr(rsp))
return status
if status == 202:
lastrsp = ''
while status != 204:
time.sleep(1+(2*random.random(a)))
status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles')
if not isinstance(rsp, str):
rsp = rsp.decode('utf8')
@ -277,10 +284,19 @@ def synchronize():
os.chmod(fname, int(opts[fname][opt], 8))
if uid != -1 or gid != -1:
os.chown(fname, uid, gid)
return status
finally:
shutil.rmtree(tmpdir)
shutil.rmtree(appendoncedir)
if __name__ == '__main__':
synchronize()
status = 202
while status not in (204, 200):
try:
status = synchronize()
except Exception as e:
sys.stderr.write(str(e))
status = 300
if status not in (204, 200):
time.sleep((random.random()*3)+2)

View File

@ -1,4 +1,6 @@
#!/usr/bin/python3
import random
import time
import subprocess
import importlib
import tempfile
@ -227,9 +229,14 @@ def synchronize():
myips.append(addr)
data = json.dumps({'merge': tmpdir, 'appendonce': appendoncedir, 'myips': myips})
status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data)
if status >= 300:
sys.stderr.write("Error starting syncfiles - {}:\n".format(status))
sys.stderr.write(repr(rsp))
return status
if status == 202:
lastrsp = ''
while status != 204:
time.sleep(1+(2*random.random(a)))
status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles')
if not isinstance(rsp, str):
rsp = rsp.decode('utf8')
@ -277,10 +284,19 @@ def synchronize():
os.chmod(fname, int(opts[fname][opt], 8))
if uid != -1 or gid != -1:
os.chown(fname, uid, gid)
return status
finally:
shutil.rmtree(tmpdir)
shutil.rmtree(appendoncedir)
if __name__ == '__main__':
synchronize()
status = 202
while status not in (204, 200):
try:
status = synchronize()
except Exception as e:
sys.stderr.write(str(e))
status = 300
if status not in (204, 200):
time.sleep((random.random()*3)+2)

View File

@ -1,4 +1,6 @@
#!/usr/bin/python3
import random
import time
import subprocess
import importlib
import tempfile
@ -227,9 +229,14 @@ def synchronize():
myips.append(addr)
data = json.dumps({'merge': tmpdir, 'appendonce': appendoncedir, 'myips': myips})
status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data)
if status >= 300:
sys.stderr.write("Error starting syncfiles - {}:\n".format(status))
sys.stderr.write(repr(rsp))
return status
if status == 202:
lastrsp = ''
while status != 204:
time.sleep(1+(2*random.random(a)))
status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles')
if not isinstance(rsp, str):
rsp = rsp.decode('utf8')
@ -277,10 +284,19 @@ def synchronize():
os.chmod(fname, int(opts[fname][opt], 8))
if uid != -1 or gid != -1:
os.chown(fname, uid, gid)
return status
finally:
shutil.rmtree(tmpdir)
shutil.rmtree(appendoncedir)
if __name__ == '__main__':
synchronize()
status = 202
while status not in (204, 200):
try:
status = synchronize()
except Exception as e:
sys.stderr.write(str(e))
status = 300
if status not in (204, 200):
time.sleep((random.random()*3)+2)

View File

@ -1,4 +1,6 @@
#!/usr/bin/python3
import random
import time
import subprocess
import importlib
import tempfile
@ -227,9 +229,14 @@ def synchronize():
myips.append(addr)
data = json.dumps({'merge': tmpdir, 'appendonce': appendoncedir, 'myips': myips})
status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data)
if status >= 300:
sys.stderr.write("Error starting syncfiles - {}:\n".format(status))
sys.stderr.write(repr(rsp))
return status
if status == 202:
lastrsp = ''
while status != 204:
time.sleep(1+(2*random.random(a)))
status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles')
if not isinstance(rsp, str):
rsp = rsp.decode('utf8')
@ -277,10 +284,19 @@ def synchronize():
os.chmod(fname, int(opts[fname][opt], 8))
if uid != -1 or gid != -1:
os.chown(fname, uid, gid)
return status
finally:
shutil.rmtree(tmpdir)
shutil.rmtree(appendoncedir)
if __name__ == '__main__':
synchronize()
status = 202
while status not in (204, 200):
try:
status = synchronize()
except Exception as e:
sys.stderr.write(str(e))
status = 300
if status not in (204, 200):
time.sleep((random.random()*3)+2)

View File

@ -289,6 +289,8 @@ syncrunners = {}
def start_syncfiles(nodename, cfg, suffixes, principals=[]):
peerip = None
if nodename in syncrunners:
return '503 Synchronization already in progress '
if 'myips' in suffixes:
targips = suffixes['myips']
del suffixes['myips']