updating for robustness

This commit is contained in:
hyung-hwan 2024-11-24 22:33:19 +09:00
parent 93f84fbc98
commit 903e4cf6d3
2 changed files with 53 additions and 86 deletions

138
client.go
View File

@ -7,7 +7,6 @@ import "crypto/tls"
import "encoding/json" import "encoding/json"
import "errors" import "errors"
import "fmt" import "fmt"
import "io"
import "net" import "net"
import "net/http" import "net/http"
import "sync" import "sync"
@ -16,7 +15,9 @@ import "time"
//import "github.com/google/uuid" //import "github.com/google/uuid"
import "google.golang.org/grpc" import "google.golang.org/grpc"
import "google.golang.org/grpc/codes"
import "google.golang.org/grpc/credentials/insecure" import "google.golang.org/grpc/credentials/insecure"
import "google.golang.org/grpc/status"
const PTC_LIMIT = 8192 const PTC_LIMIT = 8192
@ -94,8 +95,6 @@ type ClientRoute struct {
ptc_mtx sync.Mutex ptc_mtx sync.Mutex
ptc_map ClientPeerConnMap ptc_map ClientPeerConnMap
ptc_cancel_map ClientPeerCancelFuncMap ptc_cancel_map ClientPeerCancelFuncMap
//ptc_limit int
//ptc_last_id uint32
ptc_wg sync.WaitGroup ptc_wg sync.WaitGroup
stop_req atomic.Bool stop_req atomic.Bool
@ -136,10 +135,8 @@ func NewClientRoute(cts *ServerConn, id uint32, addr *net.TCPAddr, proto ROUTE_P
r.cts = cts r.cts = cts
r.id = id r.id = id
//r.ptc_limit = PTC_LIMIT
r.ptc_map = make(ClientPeerConnMap) r.ptc_map = make(ClientPeerConnMap)
r.ptc_cancel_map = make(ClientPeerCancelFuncMap) r.ptc_cancel_map = make(ClientPeerCancelFuncMap)
//r.ptc_last_id = 0
r.proto = proto r.proto = proto
r.peer_addr = addr r.peer_addr = addr
r.stop_req.Store(false) r.stop_req.Store(false)
@ -149,10 +146,18 @@ func NewClientRoute(cts *ServerConn, id uint32, addr *net.TCPAddr, proto ROUTE_P
} }
func (r *ClientRoute) RunTask(wg *sync.WaitGroup) { func (r *ClientRoute) RunTask(wg *sync.WaitGroup) {
var err error
// this task on the route object isn't actually necessary. // this task on the route object isn't actually necessary.
// most useful works are triggered by ReportEvent() and done by ConnectToPeer() // most useful works are triggered by ReportEvent() and done by ConnectToPeer()
defer wg.Done() defer wg.Done()
err = r.cts.psc.Send(MakeRouteStartPacket(r.id, r.proto, r.peer_addr.String()))
if err != nil {
//return fmt.Errorf("unable to send route-start packet - %s", err.Error())
goto done;
}
main_loop: main_loop:
for { for {
select { select {
@ -161,7 +166,12 @@ main_loop:
} }
} }
done:
r.ReqStop()
r.ptc_wg.Wait() // wait for all peer tasks are finished r.ptc_wg.Wait() // wait for all peer tasks are finished
r.cts.psc.Send(MakeRouteStopPacket(r.id, r.proto, r.peer_addr.String()))
r.cts.RemoveClientRoute(r)
fmt.Printf ("*** End fo Client Roue Task\n") fmt.Printf ("*** End fo Client Roue Task\n")
} }
@ -171,7 +181,6 @@ func (r *ClientRoute) ReqStop() {
for _, ptc = range r.ptc_map { for _, ptc = range r.ptc_map {
ptc.ReqStop() ptc.ReqStop()
} }
r.stop_chan <- true r.stop_chan <- true
} }
fmt.Printf ("*** Sent stop request to Route..\n") fmt.Printf ("*** Sent stop request to Route..\n")
@ -404,7 +413,6 @@ func (cts *ServerConn) AddClientRoutes (peer_addrs []string) error {
var v string var v string
var addr *net.TCPAddr var addr *net.TCPAddr
var proto ROUTE_PROTO var proto ROUTE_PROTO
var r *ClientRoute
var err error var err error
for i, v = range peer_addrs { for i, v = range peer_addrs {
@ -425,58 +433,37 @@ func (cts *ServerConn) AddClientRoutes (peer_addrs []string) error {
} }
} }
// TODO: mutex protection
for _, r = range cts.route_map {
err = cts.psc.Send(MakeRouteStartPacket(r.id, r.proto, addr.String()))
if err != nil {
// TODO: remove all routes???
return fmt.Errorf("unable to send route-start packet - %s", err.Error())
}
}
return nil return nil
} }
func (cts *ServerConn) RemoveClientRoutes () { func (cts *ServerConn) disconnect_from_server() {
var r *ClientRoute if cts.conn != nil {
var id uint32 var r* ClientRoute
cts.route_mtx.Lock() cts.route_mtx.Lock()
for _, r = range cts.route_map { for _, r = range cts.route_map {
r.ReqStop() r.ReqStop()
} }
cts.route_mtx.Unlock()
for id, r = range cts.route_map { cts.conn.Close()
delete(cts.route_map, id) // don't reset cts.conn to nil here
} // if this function is called from RunTask()
// for reconnection, it will be set to a new value
cts.route_map = make(ClientRouteMap) // immediately after the start_over lable in it.
cts.route_mtx.Unlock() // if it's called from ReqStop(), we don't really
// need to care about it.
// TODO: mutex protection?
for _, r = range cts.route_map {
cts.psc.Send(MakeRouteStopPacket(r.id, r.proto, r.peer_addr.String()))
} }
} }
func (cts *ServerConn) ReqStop() { func (cts *ServerConn) ReqStop() {
if cts.stop_req.CompareAndSwap(false, true) { if cts.stop_req.CompareAndSwap(false, true) {
var r *ClientRoute cts.disconnect_from_server()
cts.route_mtx.Lock()
for _, r = range cts.route_map {
cts.psc.Send(MakeRouteStopPacket(r.id, r.proto, r.peer_addr.String())) // don't care about failure
r.ReqStop()
}
cts.route_mtx.Unlock()
cts.stop_chan <- true cts.stop_chan <- true
} }
} }
func (cts *ServerConn) RunTask(wg *sync.WaitGroup) { func (cts *ServerConn) RunTask(wg *sync.WaitGroup) {
var conn *grpc.ClientConn = nil
var hdc HoduClient
var psc PacketStreamClient var psc PacketStreamClient
var slpctx context.Context var slpctx context.Context
var c_seed Seed var c_seed Seed
@ -485,23 +472,25 @@ func (cts *ServerConn) RunTask(wg *sync.WaitGroup) {
defer wg.Done() // arrange to call at the end of this function defer wg.Done() // arrange to call at the end of this function
// TODO: HANDLE connection timeout..
// ctx, _/*cancel*/ := context.WithTimeout(context.Background(), time.Second)
start_over: start_over:
cts.cli.log.Write ("", LOG_DEBUG, "Total number of server connections = %d", len(cts.cli.cts_map))
cts.cli.log.Write("", LOG_INFO, "Connecting to server %s", cts.saddr.String()) cts.cli.log.Write("", LOG_INFO, "Connecting to server %s", cts.saddr.String())
conn, err = grpc.NewClient(cts.saddr.String(), grpc.WithTransportCredentials(insecure.NewCredentials())) cts.conn, err = grpc.NewClient(cts.saddr.String(), grpc.WithTransportCredentials(insecure.NewCredentials()))
if err != nil { if err != nil {
cts.cli.log.Write("", LOG_ERROR, "Failed to connect to server %s - %s", cts.saddr.String(), err.Error()) cts.cli.log.Write("", LOG_ERROR, "Failed to make client to server %s - %s", cts.saddr.String(), err.Error())
goto reconnect_to_server goto reconnect_to_server
} }
cts.hdc = NewHoduClient(cts.conn)
hdc = NewHoduClient(conn) // TODO: HANDLE connection timeout.. may have to run GetSeed or PacketStream in anther goroutnine
// ctx, _/*cancel*/ := context.WithTimeout(context.Background(), time.Second)
// seed exchange is for furture expansion of the protocol // seed exchange is for furture expansion of the protocol
// there is nothing to do much about it for now. // there is nothing to do much about it for now.
c_seed.Version = HODU_VERSION c_seed.Version = HODU_VERSION
c_seed.Flags = 0 c_seed.Flags = 0
s_seed, err = hdc.GetSeed(cts.cli.ctx, &c_seed) s_seed, err = cts.hdc.GetSeed(cts.cli.ctx, &c_seed)
if err != nil { if err != nil {
cts.cli.log.Write("", LOG_ERROR, "Failed to get seed from server %s - %s", cts.saddr.String(), err.Error()) cts.cli.log.Write("", LOG_ERROR, "Failed to get seed from server %s - %s", cts.saddr.String(), err.Error())
goto reconnect_to_server goto reconnect_to_server
@ -509,7 +498,9 @@ start_over:
cts.s_seed = *s_seed cts.s_seed = *s_seed
cts.c_seed = c_seed cts.c_seed = c_seed
psc, err = hdc.PacketStream(cts.cli.ctx) cts.cli.log.Write("", LOG_INFO, "Got seed from server %s - ver=%#x", cts.saddr.String(), cts.s_seed.Version)
psc, err = cts.hdc.PacketStream(cts.cli.ctx)
if err != nil { if err != nil {
cts.cli.log.Write("", LOG_ERROR, "Failed to get packet stream from server %s - %s", cts.saddr.String(), err.Error()) cts.cli.log.Write("", LOG_ERROR, "Failed to get packet stream from server %s - %s", cts.saddr.String(), err.Error())
goto reconnect_to_server goto reconnect_to_server
@ -517,9 +508,6 @@ start_over:
cts.cli.log.Write("", LOG_INFO, "Got packet stream from server %s", cts.saddr.String()) cts.cli.log.Write("", LOG_INFO, "Got packet stream from server %s", cts.saddr.String())
cts.conn = conn
cts.hdc = hdc
//cts.psc = &GuardedPacketStreamClient{psc: psc}
cts.psc = &GuardedPacketStreamClient{Hodu_PacketStreamClient: psc} cts.psc = &GuardedPacketStreamClient{Hodu_PacketStreamClient: psc}
// the connection structure to a server is ready. // the connection structure to a server is ready.
@ -550,7 +538,7 @@ fmt.Printf("context doine... error - %s\n", cts.cli.ctx.Err().Error())
pkt, err = psc.Recv() pkt, err = psc.Recv()
if err != nil { if err != nil {
if errors.Is(err, io.EOF) { if status.Code(err) == codes.Canceled || errors.Is(err, net.ErrClosed) {
goto reconnect_to_server goto reconnect_to_server
} else { } else {
cts.cli.log.Write("", LOG_INFO, "Failed to receive packet form server %s - %s", cts.saddr.String(), err.Error()) cts.cli.log.Write("", LOG_INFO, "Failed to receive packet form server %s - %s", cts.saddr.String(), err.Error())
@ -660,14 +648,16 @@ fmt.Printf("context doine... error - %s\n", cts.cli.ctx.Err().Error())
done: done:
cts.cli.log.Write("", LOG_INFO, "Disconnected from server %s", cts.saddr.String()) cts.cli.log.Write("", LOG_INFO, "Disconnected from server %s", cts.saddr.String())
cts.RemoveClientRoutes() //cts.RemoveClientRoutes()
if conn != nil { conn.Close() } cts.ReqStop()
wait_for_termination:
cts.route_wg.Wait() // wait until all route tasks are finished cts.route_wg.Wait() // wait until all route tasks are finished
cts.cli.RemoveServerConn(cts)
return return
reconnect_to_server: reconnect_to_server:
cts.RemoveClientRoutes() cts.disconnect_from_server()
if conn != nil { conn.Close() }
// wait for 2 seconds // wait for 2 seconds
slpctx, _ = context.WithTimeout(cts.cli.ctx, 2 * time.Second) slpctx, _ = context.WithTimeout(cts.cli.ctx, 2 * time.Second)
select { select {
@ -675,7 +665,9 @@ reconnect_to_server:
fmt.Printf("context doine... error - %s\n", cts.cli.ctx.Err().Error()) fmt.Printf("context doine... error - %s\n", cts.cli.ctx.Err().Error())
goto done goto done
case <-cts.stop_chan: case <-cts.stop_chan:
goto done // this signal indicates that ReqStop() has been called
// so jumt to the waiting label
goto wait_for_termination
case <- slpctx.Done(): case <- slpctx.Done():
// do nothing // do nothing
} }
@ -700,35 +692,12 @@ func (cts *ServerConn) ReportEvent (route_id uint32, pts_id uint32, event_type P
func (r *ClientRoute) AddNewClientPeerConn (c *net.TCPConn, pts_id uint32) (*ClientPeerConn, error) { func (r *ClientRoute) AddNewClientPeerConn (c *net.TCPConn, pts_id uint32) (*ClientPeerConn, error) {
var ptc *ClientPeerConn var ptc *ClientPeerConn
//var ok bool
//var start_id uint32
r.ptc_mtx.Lock() r.ptc_mtx.Lock()
defer r.ptc_mtx.Unlock() defer r.ptc_mtx.Unlock()
/*
if len(r.ptc_map) >= r.ptc_limit {
return nil, fmt.Errorf("peer-to-client connection table full")
}
start_id = r.ptc_last_id
for {
_, ok = r.ptc_map[r.ptc_last_id]
if !ok {
break
}
r.ptc_last_id++
if r.ptc_last_id == start_id {
// unlikely to happen but it cycled through the whole range.
return nil, fmt.Errorf("failed to assign peer-to-table connection id")
}
}
ptc = NewClientPeerConn(r, c, r.ptc_last_id)
*/
ptc = NewClientPeerConn(r, c, pts_id) ptc = NewClientPeerConn(r, c, pts_id)
r.ptc_map[ptc.conn_id] = ptc r.ptc_map[ptc.conn_id] = ptc
//r.ptc_last_id++
return ptc, nil return ptc, nil
} }
@ -777,7 +746,7 @@ fmt.Printf ("ADD total servers %d\n", len(c.cts_map))
func (c *Client) RemoveServerConn(cts *ServerConn) { func (c *Client) RemoveServerConn(cts *ServerConn) {
c.cts_mtx.Lock() c.cts_mtx.Lock()
delete(c.cts_map, cts.saddr) delete(c.cts_map, cts.saddr)
fmt.Printf ("REMOVE total servers %d\n", len(c.cts_map)) fmt.Printf ("REMOVEDDDDDD CONNECTION FROM %s total servers %d\n", cts.saddr, len(c.cts_map))
c.cts_mtx.Unlock() c.cts_mtx.Unlock()
} }
@ -793,7 +762,6 @@ func (c *Client) ReqStop() {
cts.ReqStop() cts.ReqStop()
} }
// TODO: notify the server.. send term command???
c.stop_chan <- true c.stop_chan <- true
c.ctx_cancel() c.ctx_cancel()
} }

View File

@ -18,7 +18,6 @@ import "google.golang.org/grpc/peer"
import "google.golang.org/grpc/stats" import "google.golang.org/grpc/stats"
const PTS_LIMIT = 8192 const PTS_LIMIT = 8192
//const CTS_LIMIT = 2048
type ClientConnMap = map[net.Addr]*ClientConn type ClientConnMap = map[net.Addr]*ClientConn
type ServerPeerConnMap = map[uint32]*ServerPeerConn type ServerPeerConnMap = map[uint32]*ServerPeerConn