use port instead of pidof to detect tor going down (#2320)

* use port instead of pidof to detect tor going down

* fix errors

* healthcheck timeout
This commit is contained in:
Aiden McClelland
2023-06-23 13:06:00 -06:00
committed by GitHub
parent ef416ef60b
commit dd28ad20ef

View File

@@ -1,5 +1,6 @@
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::net::SocketAddr; use std::net::SocketAddr;
use std::sync::atomic::AtomicBool;
use std::sync::{Arc, Weak}; use std::sync::{Arc, Weak};
use std::time::Duration; use std::time::Duration;
@@ -31,6 +32,7 @@ use crate::util::{display_none, Invoke};
use crate::{Error, ErrorKind, ResultExt as _}; use crate::{Error, ErrorKind, ResultExt as _};
pub const SYSTEMD_UNIT: &str = "tor@default"; pub const SYSTEMD_UNIT: &str = "tor@default";
const STARTING_HEALTH_TIMEOUT: u64 = 120; // 2min
enum ErrorLogSeverity { enum ErrorLogSeverity {
Fatal { wipe_state: bool }, Fatal { wipe_state: bool },
@@ -256,15 +258,10 @@ async fn torctl(
tor_socks: SocketAddr, tor_socks: SocketAddr,
recv: &mut mpsc::UnboundedReceiver<TorCommand>, recv: &mut mpsc::UnboundedReceiver<TorCommand>,
services: &mut BTreeMap<[u8; 64], BTreeMap<u16, BTreeMap<SocketAddr, Weak<()>>>>, services: &mut BTreeMap<[u8; 64], BTreeMap<u16, BTreeMap<SocketAddr, Weak<()>>>>,
wipe_state: &AtomicBool,
health_timeout: &mut Duration,
) -> Result<(), Error> { ) -> Result<(), Error> {
let bootstrap = async { let bootstrap = async {
tokio::fs::create_dir_all("/var/lib/tor").await?;
Command::new("chown")
.arg("-R")
.arg("debian-tor")
.arg("/var/lib/tor")
.invoke(ErrorKind::Filesystem)
.await?;
if Command::new("systemctl") if Command::new("systemctl")
.arg("is-active") .arg("is-active")
.arg("--quiet") .arg("--quiet")
@@ -278,15 +275,30 @@ async fn torctl(
.arg("tor") .arg("tor")
.invoke(ErrorKind::Tor) .invoke(ErrorKind::Tor)
.await?; .await?;
while Command::new("pidof") for _ in 0..30 {
.arg("tor") if TcpStream::connect(tor_control).await.is_err() {
.invoke(ErrorKind::Tor) break;
.await }
.is_ok()
{
tokio::time::sleep(Duration::from_secs(1)).await; tokio::time::sleep(Duration::from_secs(1)).await;
} }
if TcpStream::connect(tor_control).await.is_ok() {
return Err(Error::new(
eyre!("Tor is failing to shut down"),
ErrorKind::Tor,
));
}
} }
if wipe_state.load(std::sync::atomic::Ordering::SeqCst) {
tokio::fs::remove_dir_all("/var/lib/tor").await?;
wipe_state.store(false, std::sync::atomic::Ordering::SeqCst);
}
tokio::fs::create_dir_all("/var/lib/tor").await?;
Command::new("chown")
.arg("-R")
.arg("debian-tor")
.arg("/var/lib/tor")
.invoke(ErrorKind::Filesystem)
.await?;
Command::new("systemctl") Command::new("systemctl")
.arg("start") .arg("start")
.arg("tor") .arg("tor")
@@ -393,7 +405,14 @@ async fn torctl(
))) )))
.unwrap_or_default(); .unwrap_or_default();
} }
_ => (), TorCommand::GC { .. } => (),
TorCommand::Reset {
wipe_state: new_wipe_state,
context,
} => {
wipe_state.fetch_or(new_wipe_state, std::sync::atomic::Ordering::SeqCst);
return Err(context);
}
} }
} }
Ok(()) Ok(())
@@ -546,17 +565,10 @@ async fn torctl(
.unwrap_or_default(); .unwrap_or_default();
} }
TorCommand::Reset { TorCommand::Reset {
wipe_state, wipe_state: new_wipe_state,
context, context,
} => { } => {
if wipe_state { wipe_state.fetch_or(new_wipe_state, std::sync::atomic::Ordering::SeqCst);
Command::new("systemctl")
.arg("stop")
.arg("tor")
.invoke(ErrorKind::Tor)
.await?;
tokio::fs::remove_dir_all("/var/lib/tor").await?;
}
return Err(context); return Err(context);
} }
} }
@@ -601,10 +613,37 @@ async fn torctl(
} }
Err(Error::new(eyre!("Log stream terminated"), ErrorKind::Tor)) Err(Error::new(eyre!("Log stream terminated"), ErrorKind::Tor))
}; };
let health_checker = async {
let mut last_success = Instant::now();
loop {
tokio::time::sleep(Duration::from_secs(30)).await;
if let Err(e) = tokio::time::timeout(
Duration::from_secs(30),
tokio_socks::tcp::Socks5Stream::connect(
tor_socks,
(hck_key.public().get_onion_address().to_string(), 80),
),
)
.await
.map_err(|e| e.to_string())
.and_then(|e| e.map_err(|e| e.to_string()))
{
if last_success.elapsed() > *health_timeout {
let err = Error::new(eyre!("Tor health check failed for longer than current timeout ({health_timeout:?})"), crate::ErrorKind::Tor);
*health_timeout *= 2;
wipe_state.store(true, std::sync::atomic::Ordering::SeqCst);
return Err(err);
}
} else {
last_success = Instant::now();
}
}
};
tokio::select! { tokio::select! {
res = handler => res?, res = handler => res?,
res = log_parser => res?, res = log_parser => res?,
res = health_checker => res?,
} }
Ok(()) Ok(())
@@ -620,7 +659,18 @@ impl TorControl {
Self { Self {
_thread: tokio::spawn(async move { _thread: tokio::spawn(async move {
let mut services = BTreeMap::new(); let mut services = BTreeMap::new();
while let Err(e) = torctl(tor_control, tor_socks, &mut recv, &mut services).await { let wipe_state = AtomicBool::new(false);
let mut health_timeout = Duration::from_secs(STARTING_HEALTH_TIMEOUT);
while let Err(e) = torctl(
tor_control,
tor_socks,
&mut recv,
&mut services,
&wipe_state,
&mut health_timeout,
)
.await
{
tracing::error!("{e}: Restarting tor"); tracing::error!("{e}: Restarting tor");
tracing::debug!("{e:?}"); tracing::debug!("{e:?}");
} }