@@ -1244,8 +1244,7 @@ data::datax::recover_ropen(fuse_req_t req)
1244
1244
}
1245
1245
1246
1246
// Issue a new open requesting also a new TCP connection
1247
- const struct fuse_ctx * ctx = fuse_req_ctx (req);
1248
- XrdCl::shared_proxy newproxy = XrdCl::Proxy::Factory (ctx);
1247
+ XrdCl::shared_proxy newproxy = XrdCl::Proxy::Factory (proxy, true );
1249
1248
newproxy->OpenAsync (newproxy, mRemoteUrlRO .c_str (), targetFlags, mode, 0 );
1250
1249
// wait this time for completion
1251
1250
@@ -1407,8 +1406,7 @@ data::datax::try_ropen(fuse_req_t req, XrdCl::shared_proxy &proxy,
1407
1406
}
1408
1407
1409
1408
// Issue a new open requesting also a new TCP connection
1410
- const struct fuse_ctx * ctx = fuse_req_ctx (req);
1411
- XrdCl::shared_proxy newproxy = XrdCl::Proxy::Factory (ctx);
1409
+ XrdCl::shared_proxy newproxy = XrdCl::Proxy::Factory (proxy, true );
1412
1410
newproxy->OpenAsync (newproxy, open_url.c_str (), targetFlags, mode, 0 );
1413
1411
// wait this time for completion
1414
1412
@@ -1557,8 +1555,7 @@ data::datax::try_wopen(fuse_req_t req, XrdCl::shared_proxy &proxy,
1557
1555
1558
1556
eos_warning (" recover reopening file for writing" );
1559
1557
// Issue a new open requesting also a new TCP connection
1560
- const struct fuse_ctx * ctx = fuse_req_ctx (req);
1561
- XrdCl::shared_proxy newproxy = XrdCl::Proxy::Factory (ctx);
1558
+ XrdCl::shared_proxy newproxy = XrdCl::Proxy::Factory (proxy, true );
1562
1559
newproxy->OpenAsync (newproxy, open_url.c_str (), targetFlags, mode, 0 );
1563
1560
// wait this time for completion
1564
1561
@@ -1739,8 +1736,7 @@ data::datax::recover_write(fuse_req_t req)
1739
1736
}
1740
1737
1741
1738
// Issue a new open requesting also a new TCP connection
1742
- const struct fuse_ctx * ctx = fuse_req_ctx (req);
1743
- XrdCl::shared_proxy newproxy = XrdCl::Proxy::Factory (ctx);
1739
+ XrdCl::shared_proxy newproxy = XrdCl::Proxy::Factory (proxy, true );
1744
1740
1745
1741
if (!recover_from_file_cache && !recover_truncate) {
1746
1742
// we need to open this file because it is not complete locally
@@ -1904,7 +1900,8 @@ data::datax::recover_write(fuse_req_t req)
1904
1900
}
1905
1901
1906
1902
// upload into identical inode using the drop & replace option (repair flag)
1907
- XrdCl::shared_proxy uploadproxy = XrdCl::Proxy::Factory ();
1903
+ // Issue a new open requesting also a new TCP connection
1904
+ XrdCl::shared_proxy uploadproxy = XrdCl::Proxy::Factory (proxy, true );
1908
1905
uploadproxy->inherit_attached (proxy);
1909
1906
uploadproxy->inherit_writequeue (uploadproxy, proxy);
1910
1907
@@ -1913,15 +1910,14 @@ data::datax::recover_write(fuse_req_t req)
1913
1910
eos_warning (" failed to signal begin-flush" );
1914
1911
}
1915
1912
1913
+ std::string tmpUrl = mRemoteUrlRW ;
1916
1914
// add the repair flag to drop existing locations and select new ones
1917
- mRemoteUrlRW += " &eos.repair=1" ;
1915
+ tmpUrl += " &eos.repair=1" ;
1918
1916
// request enough space for this recovery upload
1919
- mRemoteUrlRW += " &eos.bookingsize=0" ;
1917
+ tmpUrl += " &eos.bookingsize=0" ;
1920
1918
eos_warning (" re-opening with repair flag for recovery %s" ,
1921
- mRemoteUrlRW .c_str ());
1922
- int rc = try_wopen (req, uploadproxy, mRemoteUrlRW );
1923
- mRemoteUrlRW .erase (mRemoteUrlRW .length () -
1924
- std::string (" &eos.repair=1" ).length ());
1919
+ tmpUrl.c_str ());
1920
+ int rc = try_wopen (req, uploadproxy, tmpUrl);
1925
1921
1926
1922
// put back the flush indicator
1927
1923
if (req && begin_flush (req)) {
@@ -2641,7 +2637,7 @@ data::datax::peek_pread(fuse_req_t req, char*& buf, size_t count, off_t offset)
2641
2637
int tret = 0 ;
2642
2638
2643
2639
// call recovery for an open
2644
- if ((tret = TryRecovery (req, false ))) {
2640
+ if ((tret = TryRecovery (req, true ))) {
2645
2641
mRecoveryStack .push_back (eos_log (LOG_SILENT,
2646
2642
" status='%s' errno='%d' hint='failed TryRecovery'" ,
2647
2643
status.ToString ().c_str (), tret));
@@ -2702,7 +2698,7 @@ data::datax::peek_pread(fuse_req_t req, char*& buf, size_t count, off_t offset)
2702
2698
mRecoveryStack .push_back (eos_log (LOG_SILENT,
2703
2699
" status='%s' hint='will TryRecovery'" ,
2704
2700
status.ToString ().c_str ()));
2705
- recovery = TryRecovery (req, false );
2701
+ recovery = TryRecovery (req, mFile -> has_xrdioro (req) ? false : true );
2706
2702
2707
2703
if (recovery) {
2708
2704
// recovery failed
@@ -3026,6 +3022,18 @@ data::datax::set_remote(const std::string& hostport,
3026
3022
}
3027
3023
}
3028
3024
3025
+ /* -------------------------------------------------------------------------- */
3026
+ std::string
3027
+ /* -------------------------------------------------------------------------- */
3028
+ data::datax::get_remote (bool isRW)
3029
+ /* -------------------------------------------------------------------------- */
3030
+ {
3031
+ if (isRW) {
3032
+ return mRemoteUrlRW ;
3033
+ }
3034
+ return mRemoteUrlRO ;
3035
+ }
3036
+
3029
3037
/* -------------------------------------------------------------------------- */
3030
3038
void
3031
3039
data::datax::dump_recovery_stack ()
@@ -3355,33 +3363,48 @@ data::dmap::ioflush(ThreadAssistant& assistant)
3355
3363
// let's see if the initial OpenAsync got a timeout, this we should retry always
3356
3364
XrdCl::XRootDStatus status = fit->second ->opening_state ();
3357
3365
bool rescue = true ;
3366
+ bool canreissue = true ;
3367
+ const std::string opname =
3368
+ (fit->second ->state () == XrdCl::Proxy::CLOSEFAILED) ? " CloseAsync" : " OpenAsync" ;
3369
+
3370
+ if (fit->second ->state () == XrdCl::Proxy::CLOSEFAILED &&
3371
+ fit->second ->opening_state ().code == XrdCl::errOperationExpired) {
3372
+ // to trigger new tcp conneciton for next time
3373
+ XrdCl::shared_proxy newproxy = XrdCl::Proxy::Factory (fit->second , true );
3374
+ canreissue = false ;
3375
+ }
3358
3376
3359
- if (
3377
+ if (canreissue && (
3360
3378
(status.code == XrdCl::errConnectionError) ||
3361
3379
(status.code == XrdCl::errSocketTimeout) ||
3362
3380
(status.code == XrdCl::errOperationExpired) ||
3363
- (status.code == XrdCl::errSocketDisconnected)) {
3364
- eos_static_warning (" re-issuing OpenAsync request after timeout - ino:%16lx err-code:%d" ,
3365
- (*it)->id (), status.code );
3381
+ (status.code == XrdCl::errSocketDisconnected))) {
3382
+ eos_static_warning (" re-issuing %s request after timeout - ino:%16lx err-code:%d" ,
3383
+ opname. c_str (), (*it)->id (), status.code );
3366
3384
// Recover such errors by force creation of a new XrdCl
3367
- // File object and a new TCP connection to avoid pilling
3368
- // up requests on a "blocked" TCP due, for example, to a
3369
- // slow close operation.
3370
- fuse_id id = fit->second ->fuseid ();
3371
- XrdCl::shared_proxy newproxy = XrdCl::Proxy::Factory (nullptr , &id);
3372
- newproxy->OpenAsync (newproxy, fit->second ->url (), fit->second ->flags (),
3373
- fit->second ->mode (), 0 );
3385
+ // File object. Also try to use a new TCP connection for our
3386
+ // fuseid(), to avoid pilling up requests on a "blocked" TCP due,
3387
+ // for example, to a slow close operation. A new conneciton may
3388
+ // not always used for the re-issued open below, e.g. if the
3389
+ // process identified by the proxy's fuseid() has exited, the
3390
+ // processCache will not increment the conneciton counter.
3391
+ XrdCl::shared_proxy newproxy = XrdCl::Proxy::Factory (fit->second , true );
3392
+ // For the open use the url from the ioctx (datax) object rather than
3393
+ // the previous proxy. The previous proxy may have used url options
3394
+ // we don't want here, such as eos.repair.
3395
+ newproxy->OpenAsync (newproxy, (*it)->get_remote (true ),
3396
+ fit->second ->flags (), fit->second ->mode (), 0 );
3374
3397
newproxy->inherit_attached (fit->second );
3375
3398
newproxy->inherit_protocol (fit->second );
3376
3399
map[fit->first ] = newproxy;
3377
3400
continue ;
3378
3401
} else {
3379
- eos_static_warning (" OpenAsync failed - trying recovery - ino:%16lx err-code:%d" ,
3380
- (*it)->id (), status.code );
3381
-
3382
3402
if (status.errNo == kXR_noserver ) {
3383
3403
int tret = 0 ;
3384
3404
3405
+ eos_static_warning (" %s failed - trying recovery - ino:%16lx err-code:%d" ,
3406
+ opname.c_str (),(*it)->id (), status.code );
3407
+
3385
3408
if (!(tret = (*it)->TryRecovery (0 , true ))) {
3386
3409
(*it)->recoverystack ().push_back
3387
3410
(eos_static_log (LOG_SILENT, " hint='success TryRecovery'" ));
@@ -3402,8 +3425,8 @@ data::dmap::ioflush(ThreadAssistant& assistant)
3402
3425
}
3403
3426
}
3404
3427
3405
- eos_static_warning (" giving up OpenAsync request - ino:%16lx err-code:%d" ,
3406
- (*it)->id (), status.code );
3428
+ eos_static_warning (" giving up %s request - ino:%16lx err-code:%d" ,
3429
+ opname. c_str (), (*it)->id (), status.code );
3407
3430
3408
3431
if (status.errNo == kXR_overQuota ) {
3409
3432
// don't preserve these files, they got an application error beforehand
0 commit comments