Skip to content

Commit

Permalink
Implement regexp matching & bf_match
Browse files Browse the repository at this point in the history
  * Copy and wrap bindings for the crufty legacy C regexpr package from LambdaMOO.
  * Write a bf_match that uses it.
  • Loading branch information
rdaum committed Jul 23, 2023
1 parent 9c278dd commit b7d2362
Show file tree
Hide file tree
Showing 13 changed files with 2,294 additions and 18 deletions.
27 changes: 25 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@
members = [
"moor-lib",
"moor-bin",
"regexpr-binding",
]
2 changes: 1 addition & 1 deletion moor-bin/src/server/ws_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use futures_util::{SinkExt, StreamExt};
use tokio::net::{TcpListener, TcpStream};
use tokio::sync::RwLock;
use tokio_tungstenite::{accept_async, WebSocketStream};
use tracing::{error, info, instrument};
use tracing::{error, info, instrument, warn};
use tungstenite::{Error, Message};

use moor_lib::tasks::scheduler::Scheduler;
Expand Down
2 changes: 2 additions & 0 deletions moor-lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ edition = "2021"
tempdir = "0.3.7"

[dependencies]
## Own
regexpr-binding = { path = "../regexpr-binding" }

## General usefullness
itertools = "0.10.5"
Expand Down
30 changes: 21 additions & 9 deletions moor-lib/src/tasks/scheduler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use thiserror::Error;
use tokio::sync::mpsc::error::TryRecvError;
use tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender};
use tokio::sync::RwLock;
use tracing::{debug, error, instrument, trace};
use tracing::{debug, error, instrument, trace, warn};

use crate::db::match_env::DBMatchEnvironment;
use crate::db::matching::world_environment_match_object;
Expand Down Expand Up @@ -419,16 +419,22 @@ impl Task {
match &fr {
FinallyReason::Abort => {
error!("Task {} aborted", task_id);
self.sessions
if let Err(send_error) = self
.sessions
.write()
.await
.send_text(self.player, format!("Aborted: {:?}", fr).to_string())
.await
.unwrap();
{
warn!("Could not send abort message to player: {:?}", send_error);
};

self.response_sender
if let Err(send_error) = self
.response_sender
.send((self.task_id, TaskControlResponse::AbortCancelled))
.expect("Could not send exception response");
{
warn!("Could not send abort cancelled response: {:?}", send_error);
}
}
FinallyReason::Uncaught {
code: _,
Expand All @@ -447,17 +453,23 @@ impl Task {
}

for l in traceback.iter() {
self.sessions
if let Err(send_error) = self
.sessions
.write()
.await
.send_text(self.player, l.to_string())
.await
.unwrap();
{
warn!("Could not send traceback to player: {:?}", send_error);
}
}

self.response_sender
if let Err(send_error) = self
.response_sender
.send((self.task_id, TaskControlResponse::Exception(fr)))
.expect("Could not send exception response");
{
warn!("Could not send exception response: {:?}", send_error);
}
}
_ => {
self.response_sender
Expand Down
12 changes: 7 additions & 5 deletions moor-lib/src/vm/bf_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::time::SystemTime;

use async_trait::async_trait;
use tokio::sync::RwLock;
use tracing::warn;

use crate::bf_declare;
use crate::compiler::builtins::offset_for_builtin;
Expand Down Expand Up @@ -43,11 +44,12 @@ async fn bf_notify(
return Ok(v_err(E_TYPE));
};

sess.write()
.await
.send_text(*player, msg.clone())
.await
.unwrap();
if let Err(send_error) = sess.write().await.send_text(*player, msg.clone()).await {
warn!(
"Unable to send message to player: #{}: {}",
player.0, send_error
);
}

Ok(VAR_NONE)
}
Expand Down
51 changes: 50 additions & 1 deletion moor-lib/src/vm/bf_strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@ use async_trait::async_trait;
use magic_crypt::{new_magic_crypt, MagicCryptTrait};
use rand::distributions::Alphanumeric;
use rand::Rng;
use regexpr_binding::Pattern;
use tokio::sync::RwLock;

use crate::bf_declare;
use crate::compiler::builtins::offset_for_builtin;
use crate::db::state::WorldState;
use crate::tasks::Sessions;
use crate::var::error::Error::{E_INVARG, E_TYPE};
use crate::var::{v_err, v_int, v_str, Var, Variant};
use crate::var::{v_err, v_int, v_list, v_str, Var, Variant};
use crate::vm::activation::Activation;
use crate::vm::vm::{BfFunction, VM};

Expand Down Expand Up @@ -237,6 +238,53 @@ async fn bf_binary_hash(
}
bf_declare!(binary_hash, bf_binary_hash);

#[no_mangle]
#[used]
// TODO: This is not thread safe. If we actually want to use this flag, we will want to put the
// whole 'legacy' regex engine in a mutex.
pub static mut task_timed_out: u64 = 0;

async fn bf_match(
_ws: &mut dyn WorldState,
_frame: &mut Activation,
_sess: Arc<RwLock<dyn Sessions>>,
args: &[Var],
) -> Result<Var, anyhow::Error> {
if args.len() < 3 || args.len() > 3 {
return Ok(v_err(E_INVARG));
}
let (subject, pattern) = match (args[0].variant(), args[1].variant()) {
(Variant::Str(subject), Variant::Str(pattern)) => (subject, pattern),
_ => return Ok(v_err(E_TYPE)),
};

let case_matters = if args.len() == 3 {
let Variant::Int(case_matters) = args[2].variant() else {
return Ok(v_err(E_TYPE));
};
*case_matters == 1
} else {
false
};

// TODO: pattern cache?
let Ok(pattern) = Pattern::new(pattern.as_str(), case_matters) else {
return Ok(v_err(E_INVARG));
};

let Ok(match_vec) = pattern.match_pattern(subject.as_str()) else {
return Ok(v_err(E_INVARG));
};

Ok(v_list(
match_vec
.iter()
.map(|(start, end)| v_list(vec![v_int(*start as i64), v_int(*end as i64)]))
.collect(),
))
}
bf_declare!(match, bf_match);

impl VM {
pub(crate) fn register_bf_strings(&mut self) -> Result<(), anyhow::Error> {
self.bf_funcs[offset_for_builtin("strsub")] = Arc::new(Box::new(BfStrsub {}));
Expand All @@ -246,6 +294,7 @@ impl VM {
self.bf_funcs[offset_for_builtin("crypt")] = Arc::new(Box::new(BfCrypt {}));
self.bf_funcs[offset_for_builtin("string_hash")] = Arc::new(Box::new(BfStringHash {}));
self.bf_funcs[offset_for_builtin("binary_hash")] = Arc::new(Box::new(BfBinaryHash {}));
self.bf_funcs[offset_for_builtin("match")] = Arc::new(Box::new(BfMatch {}));

Ok(())
}
Expand Down
11 changes: 11 additions & 0 deletions regexpr-binding/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[package]
name = "regexpr-binding"
version = "0.1.0"
edition = "2021"

[build-dependencies]
cc = { version = "1.0", features = ["parallel"] }
bindgen = "0.65.1"

[dependencies]
once_cell = "1.17.2"
11 changes: 11 additions & 0 deletions regexpr-binding/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Copy/fork of and Rust bindings for the LambdaMOO regexp implementation, which
itself is a fork of the old Python <1.3 regex implementation. There are various
differences between modern regex and this implementation, so rather than try to
rewrite the regex engine from scratch, and debug all the edge cases to get 100%
compatibility for existing cores, I'm just going to wrap this.

I will provide a separate builtin that uses modern regexp matching, and new code
should use that. This is just for compatibility with existing cores.



24 changes: 24 additions & 0 deletions regexpr-binding/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
use std::env;
use std::path::PathBuf;

fn main() {
// first compile the C library
cc::Build::new()
.file("c_src/regexpr.c")
.include("c_src")
.compile("regexpr");

// Then build the Rust bindings

println!("cargo:rerun-if-changed=c_src/regexpr.h");
let bindings = bindgen::Builder::default()
.header("c_src/regexpr.h")
.parse_callbacks(Box::new(bindgen::CargoCallbacks))
.generate()
.expect("Unable to generate bindings");

let out_path = PathBuf::from(env::var("OUT_DIR").unwrap());
bindings
.write_to_file(out_path.join("bindings.rs"))
.expect("Couldn't write bindings!");
}
Loading

0 comments on commit b7d2362

Please sign in to comment.