updating for robustness
This commit is contained in:
parent
93f84fbc98
commit
903e4cf6d3
138
client.go
138
client.go
@ -7,7 +7,6 @@ import "crypto/tls"
|
|||||||
import "encoding/json"
|
import "encoding/json"
|
||||||
import "errors"
|
import "errors"
|
||||||
import "fmt"
|
import "fmt"
|
||||||
import "io"
|
|
||||||
import "net"
|
import "net"
|
||||||
import "net/http"
|
import "net/http"
|
||||||
import "sync"
|
import "sync"
|
||||||
@ -16,7 +15,9 @@ import "time"
|
|||||||
|
|
||||||
//import "github.com/google/uuid"
|
//import "github.com/google/uuid"
|
||||||
import "google.golang.org/grpc"
|
import "google.golang.org/grpc"
|
||||||
|
import "google.golang.org/grpc/codes"
|
||||||
import "google.golang.org/grpc/credentials/insecure"
|
import "google.golang.org/grpc/credentials/insecure"
|
||||||
|
import "google.golang.org/grpc/status"
|
||||||
|
|
||||||
const PTC_LIMIT = 8192
|
const PTC_LIMIT = 8192
|
||||||
|
|
||||||
@ -94,8 +95,6 @@ type ClientRoute struct {
|
|||||||
ptc_mtx sync.Mutex
|
ptc_mtx sync.Mutex
|
||||||
ptc_map ClientPeerConnMap
|
ptc_map ClientPeerConnMap
|
||||||
ptc_cancel_map ClientPeerCancelFuncMap
|
ptc_cancel_map ClientPeerCancelFuncMap
|
||||||
//ptc_limit int
|
|
||||||
//ptc_last_id uint32
|
|
||||||
ptc_wg sync.WaitGroup
|
ptc_wg sync.WaitGroup
|
||||||
|
|
||||||
stop_req atomic.Bool
|
stop_req atomic.Bool
|
||||||
@ -136,10 +135,8 @@ func NewClientRoute(cts *ServerConn, id uint32, addr *net.TCPAddr, proto ROUTE_P
|
|||||||
|
|
||||||
r.cts = cts
|
r.cts = cts
|
||||||
r.id = id
|
r.id = id
|
||||||
//r.ptc_limit = PTC_LIMIT
|
|
||||||
r.ptc_map = make(ClientPeerConnMap)
|
r.ptc_map = make(ClientPeerConnMap)
|
||||||
r.ptc_cancel_map = make(ClientPeerCancelFuncMap)
|
r.ptc_cancel_map = make(ClientPeerCancelFuncMap)
|
||||||
//r.ptc_last_id = 0
|
|
||||||
r.proto = proto
|
r.proto = proto
|
||||||
r.peer_addr = addr
|
r.peer_addr = addr
|
||||||
r.stop_req.Store(false)
|
r.stop_req.Store(false)
|
||||||
@ -149,10 +146,18 @@ func NewClientRoute(cts *ServerConn, id uint32, addr *net.TCPAddr, proto ROUTE_P
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (r *ClientRoute) RunTask(wg *sync.WaitGroup) {
|
func (r *ClientRoute) RunTask(wg *sync.WaitGroup) {
|
||||||
|
var err error
|
||||||
|
|
||||||
// this task on the route object isn't actually necessary.
|
// this task on the route object isn't actually necessary.
|
||||||
// most useful works are triggered by ReportEvent() and done by ConnectToPeer()
|
// most useful works are triggered by ReportEvent() and done by ConnectToPeer()
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
|
||||||
|
err = r.cts.psc.Send(MakeRouteStartPacket(r.id, r.proto, r.peer_addr.String()))
|
||||||
|
if err != nil {
|
||||||
|
//return fmt.Errorf("unable to send route-start packet - %s", err.Error())
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
main_loop:
|
main_loop:
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
@ -161,7 +166,12 @@ main_loop:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
done:
|
||||||
|
r.ReqStop()
|
||||||
r.ptc_wg.Wait() // wait for all peer tasks are finished
|
r.ptc_wg.Wait() // wait for all peer tasks are finished
|
||||||
|
|
||||||
|
r.cts.psc.Send(MakeRouteStopPacket(r.id, r.proto, r.peer_addr.String()))
|
||||||
|
r.cts.RemoveClientRoute(r)
|
||||||
fmt.Printf ("*** End fo Client Roue Task\n")
|
fmt.Printf ("*** End fo Client Roue Task\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -171,7 +181,6 @@ func (r *ClientRoute) ReqStop() {
|
|||||||
for _, ptc = range r.ptc_map {
|
for _, ptc = range r.ptc_map {
|
||||||
ptc.ReqStop()
|
ptc.ReqStop()
|
||||||
}
|
}
|
||||||
|
|
||||||
r.stop_chan <- true
|
r.stop_chan <- true
|
||||||
}
|
}
|
||||||
fmt.Printf ("*** Sent stop request to Route..\n")
|
fmt.Printf ("*** Sent stop request to Route..\n")
|
||||||
@ -404,7 +413,6 @@ func (cts *ServerConn) AddClientRoutes (peer_addrs []string) error {
|
|||||||
var v string
|
var v string
|
||||||
var addr *net.TCPAddr
|
var addr *net.TCPAddr
|
||||||
var proto ROUTE_PROTO
|
var proto ROUTE_PROTO
|
||||||
var r *ClientRoute
|
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
for i, v = range peer_addrs {
|
for i, v = range peer_addrs {
|
||||||
@ -425,58 +433,37 @@ func (cts *ServerConn) AddClientRoutes (peer_addrs []string) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: mutex protection
|
|
||||||
for _, r = range cts.route_map {
|
|
||||||
err = cts.psc.Send(MakeRouteStartPacket(r.id, r.proto, addr.String()))
|
|
||||||
if err != nil {
|
|
||||||
// TODO: remove all routes???
|
|
||||||
return fmt.Errorf("unable to send route-start packet - %s", err.Error())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cts *ServerConn) RemoveClientRoutes () {
|
func (cts *ServerConn) disconnect_from_server() {
|
||||||
var r *ClientRoute
|
if cts.conn != nil {
|
||||||
var id uint32
|
var r* ClientRoute
|
||||||
|
|
||||||
cts.route_mtx.Lock()
|
cts.route_mtx.Lock()
|
||||||
for _, r = range cts.route_map {
|
for _, r = range cts.route_map {
|
||||||
r.ReqStop()
|
r.ReqStop()
|
||||||
}
|
}
|
||||||
|
cts.route_mtx.Unlock()
|
||||||
|
|
||||||
for id, r = range cts.route_map {
|
cts.conn.Close()
|
||||||
delete(cts.route_map, id)
|
// don't reset cts.conn to nil here
|
||||||
}
|
// if this function is called from RunTask()
|
||||||
|
// for reconnection, it will be set to a new value
|
||||||
cts.route_map = make(ClientRouteMap)
|
// immediately after the start_over lable in it.
|
||||||
cts.route_mtx.Unlock()
|
// if it's called from ReqStop(), we don't really
|
||||||
|
// need to care about it.
|
||||||
// TODO: mutex protection?
|
|
||||||
for _, r = range cts.route_map {
|
|
||||||
cts.psc.Send(MakeRouteStopPacket(r.id, r.proto, r.peer_addr.String()))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cts *ServerConn) ReqStop() {
|
func (cts *ServerConn) ReqStop() {
|
||||||
if cts.stop_req.CompareAndSwap(false, true) {
|
if cts.stop_req.CompareAndSwap(false, true) {
|
||||||
var r *ClientRoute
|
cts.disconnect_from_server()
|
||||||
|
|
||||||
cts.route_mtx.Lock()
|
|
||||||
for _, r = range cts.route_map {
|
|
||||||
cts.psc.Send(MakeRouteStopPacket(r.id, r.proto, r.peer_addr.String())) // don't care about failure
|
|
||||||
r.ReqStop()
|
|
||||||
}
|
|
||||||
cts.route_mtx.Unlock()
|
|
||||||
|
|
||||||
cts.stop_chan <- true
|
cts.stop_chan <- true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cts *ServerConn) RunTask(wg *sync.WaitGroup) {
|
func (cts *ServerConn) RunTask(wg *sync.WaitGroup) {
|
||||||
var conn *grpc.ClientConn = nil
|
|
||||||
var hdc HoduClient
|
|
||||||
var psc PacketStreamClient
|
var psc PacketStreamClient
|
||||||
var slpctx context.Context
|
var slpctx context.Context
|
||||||
var c_seed Seed
|
var c_seed Seed
|
||||||
@ -485,23 +472,25 @@ func (cts *ServerConn) RunTask(wg *sync.WaitGroup) {
|
|||||||
|
|
||||||
defer wg.Done() // arrange to call at the end of this function
|
defer wg.Done() // arrange to call at the end of this function
|
||||||
|
|
||||||
// TODO: HANDLE connection timeout..
|
|
||||||
// ctx, _/*cancel*/ := context.WithTimeout(context.Background(), time.Second)
|
|
||||||
start_over:
|
start_over:
|
||||||
|
cts.cli.log.Write ("", LOG_DEBUG, "Total number of server connections = %d", len(cts.cli.cts_map))
|
||||||
|
|
||||||
cts.cli.log.Write("", LOG_INFO, "Connecting to server %s", cts.saddr.String())
|
cts.cli.log.Write("", LOG_INFO, "Connecting to server %s", cts.saddr.String())
|
||||||
conn, err = grpc.NewClient(cts.saddr.String(), grpc.WithTransportCredentials(insecure.NewCredentials()))
|
cts.conn, err = grpc.NewClient(cts.saddr.String(), grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cts.cli.log.Write("", LOG_ERROR, "Failed to connect to server %s - %s", cts.saddr.String(), err.Error())
|
cts.cli.log.Write("", LOG_ERROR, "Failed to make client to server %s - %s", cts.saddr.String(), err.Error())
|
||||||
goto reconnect_to_server
|
goto reconnect_to_server
|
||||||
}
|
}
|
||||||
|
cts.hdc = NewHoduClient(cts.conn)
|
||||||
|
|
||||||
hdc = NewHoduClient(conn)
|
// TODO: HANDLE connection timeout.. may have to run GetSeed or PacketStream in anther goroutnine
|
||||||
|
// ctx, _/*cancel*/ := context.WithTimeout(context.Background(), time.Second)
|
||||||
|
|
||||||
// seed exchange is for furture expansion of the protocol
|
// seed exchange is for furture expansion of the protocol
|
||||||
// there is nothing to do much about it for now.
|
// there is nothing to do much about it for now.
|
||||||
c_seed.Version = HODU_VERSION
|
c_seed.Version = HODU_VERSION
|
||||||
c_seed.Flags = 0
|
c_seed.Flags = 0
|
||||||
s_seed, err = hdc.GetSeed(cts.cli.ctx, &c_seed)
|
s_seed, err = cts.hdc.GetSeed(cts.cli.ctx, &c_seed)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cts.cli.log.Write("", LOG_ERROR, "Failed to get seed from server %s - %s", cts.saddr.String(), err.Error())
|
cts.cli.log.Write("", LOG_ERROR, "Failed to get seed from server %s - %s", cts.saddr.String(), err.Error())
|
||||||
goto reconnect_to_server
|
goto reconnect_to_server
|
||||||
@ -509,7 +498,9 @@ start_over:
|
|||||||
cts.s_seed = *s_seed
|
cts.s_seed = *s_seed
|
||||||
cts.c_seed = c_seed
|
cts.c_seed = c_seed
|
||||||
|
|
||||||
psc, err = hdc.PacketStream(cts.cli.ctx)
|
cts.cli.log.Write("", LOG_INFO, "Got seed from server %s - ver=%#x", cts.saddr.String(), cts.s_seed.Version)
|
||||||
|
|
||||||
|
psc, err = cts.hdc.PacketStream(cts.cli.ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cts.cli.log.Write("", LOG_ERROR, "Failed to get packet stream from server %s - %s", cts.saddr.String(), err.Error())
|
cts.cli.log.Write("", LOG_ERROR, "Failed to get packet stream from server %s - %s", cts.saddr.String(), err.Error())
|
||||||
goto reconnect_to_server
|
goto reconnect_to_server
|
||||||
@ -517,9 +508,6 @@ start_over:
|
|||||||
|
|
||||||
cts.cli.log.Write("", LOG_INFO, "Got packet stream from server %s", cts.saddr.String())
|
cts.cli.log.Write("", LOG_INFO, "Got packet stream from server %s", cts.saddr.String())
|
||||||
|
|
||||||
cts.conn = conn
|
|
||||||
cts.hdc = hdc
|
|
||||||
//cts.psc = &GuardedPacketStreamClient{psc: psc}
|
|
||||||
cts.psc = &GuardedPacketStreamClient{Hodu_PacketStreamClient: psc}
|
cts.psc = &GuardedPacketStreamClient{Hodu_PacketStreamClient: psc}
|
||||||
|
|
||||||
// the connection structure to a server is ready.
|
// the connection structure to a server is ready.
|
||||||
@ -550,7 +538,7 @@ fmt.Printf("context doine... error - %s\n", cts.cli.ctx.Err().Error())
|
|||||||
|
|
||||||
pkt, err = psc.Recv()
|
pkt, err = psc.Recv()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if errors.Is(err, io.EOF) {
|
if status.Code(err) == codes.Canceled || errors.Is(err, net.ErrClosed) {
|
||||||
goto reconnect_to_server
|
goto reconnect_to_server
|
||||||
} else {
|
} else {
|
||||||
cts.cli.log.Write("", LOG_INFO, "Failed to receive packet form server %s - %s", cts.saddr.String(), err.Error())
|
cts.cli.log.Write("", LOG_INFO, "Failed to receive packet form server %s - %s", cts.saddr.String(), err.Error())
|
||||||
@ -660,14 +648,16 @@ fmt.Printf("context doine... error - %s\n", cts.cli.ctx.Err().Error())
|
|||||||
|
|
||||||
done:
|
done:
|
||||||
cts.cli.log.Write("", LOG_INFO, "Disconnected from server %s", cts.saddr.String())
|
cts.cli.log.Write("", LOG_INFO, "Disconnected from server %s", cts.saddr.String())
|
||||||
cts.RemoveClientRoutes()
|
//cts.RemoveClientRoutes()
|
||||||
if conn != nil { conn.Close() }
|
cts.ReqStop()
|
||||||
|
wait_for_termination:
|
||||||
cts.route_wg.Wait() // wait until all route tasks are finished
|
cts.route_wg.Wait() // wait until all route tasks are finished
|
||||||
|
cts.cli.RemoveServerConn(cts)
|
||||||
return
|
return
|
||||||
|
|
||||||
reconnect_to_server:
|
reconnect_to_server:
|
||||||
cts.RemoveClientRoutes()
|
cts.disconnect_from_server()
|
||||||
if conn != nil { conn.Close() }
|
|
||||||
// wait for 2 seconds
|
// wait for 2 seconds
|
||||||
slpctx, _ = context.WithTimeout(cts.cli.ctx, 2 * time.Second)
|
slpctx, _ = context.WithTimeout(cts.cli.ctx, 2 * time.Second)
|
||||||
select {
|
select {
|
||||||
@ -675,7 +665,9 @@ reconnect_to_server:
|
|||||||
fmt.Printf("context doine... error - %s\n", cts.cli.ctx.Err().Error())
|
fmt.Printf("context doine... error - %s\n", cts.cli.ctx.Err().Error())
|
||||||
goto done
|
goto done
|
||||||
case <-cts.stop_chan:
|
case <-cts.stop_chan:
|
||||||
goto done
|
// this signal indicates that ReqStop() has been called
|
||||||
|
// so jumt to the waiting label
|
||||||
|
goto wait_for_termination
|
||||||
case <- slpctx.Done():
|
case <- slpctx.Done():
|
||||||
// do nothing
|
// do nothing
|
||||||
}
|
}
|
||||||
@ -700,35 +692,12 @@ func (cts *ServerConn) ReportEvent (route_id uint32, pts_id uint32, event_type P
|
|||||||
|
|
||||||
func (r *ClientRoute) AddNewClientPeerConn (c *net.TCPConn, pts_id uint32) (*ClientPeerConn, error) {
|
func (r *ClientRoute) AddNewClientPeerConn (c *net.TCPConn, pts_id uint32) (*ClientPeerConn, error) {
|
||||||
var ptc *ClientPeerConn
|
var ptc *ClientPeerConn
|
||||||
//var ok bool
|
|
||||||
//var start_id uint32
|
|
||||||
|
|
||||||
r.ptc_mtx.Lock()
|
r.ptc_mtx.Lock()
|
||||||
defer r.ptc_mtx.Unlock()
|
defer r.ptc_mtx.Unlock()
|
||||||
|
|
||||||
/*
|
|
||||||
if len(r.ptc_map) >= r.ptc_limit {
|
|
||||||
return nil, fmt.Errorf("peer-to-client connection table full")
|
|
||||||
}
|
|
||||||
|
|
||||||
start_id = r.ptc_last_id
|
|
||||||
for {
|
|
||||||
_, ok = r.ptc_map[r.ptc_last_id]
|
|
||||||
if !ok {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
r.ptc_last_id++
|
|
||||||
if r.ptc_last_id == start_id {
|
|
||||||
// unlikely to happen but it cycled through the whole range.
|
|
||||||
return nil, fmt.Errorf("failed to assign peer-to-table connection id")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ptc = NewClientPeerConn(r, c, r.ptc_last_id)
|
|
||||||
*/
|
|
||||||
ptc = NewClientPeerConn(r, c, pts_id)
|
ptc = NewClientPeerConn(r, c, pts_id)
|
||||||
r.ptc_map[ptc.conn_id] = ptc
|
r.ptc_map[ptc.conn_id] = ptc
|
||||||
//r.ptc_last_id++
|
|
||||||
|
|
||||||
return ptc, nil
|
return ptc, nil
|
||||||
}
|
}
|
||||||
@ -777,7 +746,7 @@ fmt.Printf ("ADD total servers %d\n", len(c.cts_map))
|
|||||||
func (c *Client) RemoveServerConn(cts *ServerConn) {
|
func (c *Client) RemoveServerConn(cts *ServerConn) {
|
||||||
c.cts_mtx.Lock()
|
c.cts_mtx.Lock()
|
||||||
delete(c.cts_map, cts.saddr)
|
delete(c.cts_map, cts.saddr)
|
||||||
fmt.Printf ("REMOVE total servers %d\n", len(c.cts_map))
|
fmt.Printf ("REMOVEDDDDDD CONNECTION FROM %s total servers %d\n", cts.saddr, len(c.cts_map))
|
||||||
c.cts_mtx.Unlock()
|
c.cts_mtx.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -793,7 +762,6 @@ func (c *Client) ReqStop() {
|
|||||||
cts.ReqStop()
|
cts.ReqStop()
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: notify the server.. send term command???
|
|
||||||
c.stop_chan <- true
|
c.stop_chan <- true
|
||||||
c.ctx_cancel()
|
c.ctx_cancel()
|
||||||
}
|
}
|
||||||
|
@ -18,7 +18,6 @@ import "google.golang.org/grpc/peer"
|
|||||||
import "google.golang.org/grpc/stats"
|
import "google.golang.org/grpc/stats"
|
||||||
|
|
||||||
const PTS_LIMIT = 8192
|
const PTS_LIMIT = 8192
|
||||||
//const CTS_LIMIT = 2048
|
|
||||||
|
|
||||||
type ClientConnMap = map[net.Addr]*ClientConn
|
type ClientConnMap = map[net.Addr]*ClientConn
|
||||||
type ServerPeerConnMap = map[uint32]*ServerPeerConn
|
type ServerPeerConnMap = map[uint32]*ServerPeerConn
|
||||||
|
Loading…
Reference in New Issue
Block a user