123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385 |
- use heed3::RoTxn;
- use get_routes::handler;
- use helix_db::{field_remapping, identifier_remapping, traversal_remapping, exclude_field, value_remapping};
- use helix_db::helix_engine::vector_core::vector::HVector;
- use helix_db::{
- helix_engine::graph_core::ops::{
- g::G,
- in_::{in_::InAdapter, in_e::InEdgesAdapter, to_n::ToNAdapter, to_v::ToVAdapter},
- out::{from_n::FromNAdapter, from_v::FromVAdapter, out::OutAdapter, out_e::OutEdgesAdapter},
- source::{
- add_e::{AddEAdapter, EdgeType},
- add_n::AddNAdapter,
- e_from_id::EFromIdAdapter,
- e_from_type::EFromTypeAdapter,
- n_from_id::NFromIdAdapter,
- n_from_type::NFromTypeAdapter,
- n_from_index::NFromIndexAdapter,
- },
- tr_val::{Traversable, TraversalVal},
- util::{
- dedup::DedupAdapter, filter_mut::FilterMut,
- filter_ref::FilterRefAdapter, range::RangeAdapter, update::UpdateAdapter,
- map::MapAdapter, paths::ShortestPathAdapter, props::PropsAdapter, drop::Drop,
- },
- vectors::{insert::InsertVAdapter, search::SearchVAdapter, brute_force_search::BruteForceSearchVAdapter},
- bm25::search_bm25::SearchBM25Adapter,
- },
- helix_engine::types::GraphError,
- helix_gateway::router::router::HandlerInput,
- node_matches, props,
- protocol::count::Count,
- protocol::remapping::{RemappingMap, ResponseRemapping},
- protocol::response::Response,
- protocol::{
- filterable::Filterable, remapping::Remapping, return_values::ReturnValue, value::Value, id::ID,
- },
- };
- use sonic_rs::{Deserialize, Serialize};
- use std::collections::{HashMap, HashSet};
- use std::sync::Arc;
- use std::time::Instant;
- use std::cell::RefCell;
- use chrono::{DateTime, Utc};
-
- pub struct Company {
- pub company_number: String,
- pub number_of_filings: i32,
- }
- pub struct DocumentEdge {
- pub from: Company,
- pub to: DocumentEmbedding,
- pub filing_id: String,
- pub category: String,
- pub subcategory: String,
- pub date: String,
- pub description: String,
- }
- pub struct DocumentEmbedding {
- pub text: String,
- pub chunk_id: String,
- pub page_number: u16,
- pub reference: String,
- pub source_link: String,
- pub source_date: String,
- }
- #[derive(Serialize, Deserialize)]
- pub struct GetAllCompanyEmbeddingsInput {
- pub company_number: String
- }
- #[handler]
- pub fn GetAllCompanyEmbeddings (input: &HandlerInput, response: &mut Response) -> Result<(), GraphError> {
- let data: GetAllCompanyEmbeddingsInput = match sonic_rs::from_slice(&input.request.body) {
- Ok(data) => data,
- Err(err) => return Err(GraphError::from(err)),
- };
- let mut remapping_vals = RemappingMap::new();
- let db = Arc::clone(&input.graph.storage);
- let txn = db.graph_env.read_txn().unwrap();
- let c = G::new(Arc::clone(&db), &txn)
- .n_from_index("company_number", &data.company_number).collect_to::<Vec<_>>();
- let embeddings = G::new_from(Arc::clone(&db), &txn, c.clone())
- .out("DocumentEdge",&EdgeType::Vec).collect_to::<Vec<_>>();
- let mut return_vals: HashMap<String, ReturnValue> = HashMap::new();
- return_vals.insert("embeddings".to_string(), ReturnValue::from_traversal_value_array_with_mixin(embeddings.clone(), remapping_vals.borrow_mut()));
- txn.commit().unwrap();
- response.body = sonic_rs::to_vec(&return_vals).unwrap();
- Ok(())
- }
- #[derive(Serialize, Deserialize)]
- pub struct AddVectorInput {
- pub vector: Vec<f64>,
- pub text: String,
- pub chunk_id: String,
- pub page_number: i32,
- pub reference: String
- }
- #[handler]
- pub fn AddVector (input: &HandlerInput, response: &mut Response) -> Result<(), GraphError> {
- let data: AddVectorInput = match sonic_rs::from_slice(&input.request.body) {
- Ok(data) => data,
- Err(err) => return Err(GraphError::from(err)),
- };
- let mut remapping_vals = RemappingMap::new();
- let db = Arc::clone(&input.graph.storage);
- let mut txn = db.graph_env.write_txn().unwrap();
- let embedding = G::new_mut(Arc::clone(&db), &mut txn)
- .insert_v::<fn(&HVector, &RoTxn) -> bool>(&data.vector, "DocumentEmbedding", Some(props! { "text" => data.text, "page_number" => data.page_number, "chunk_id" => data.chunk_id, "reference" => data.reference })).collect_to::<Vec<_>>();
- let mut return_vals: HashMap<String, ReturnValue> = HashMap::new();
- return_vals.insert("embedding".to_string(), ReturnValue::from_traversal_value_array_with_mixin(embedding.clone(), remapping_vals.borrow_mut()));
- txn.commit().unwrap();
- response.body = sonic_rs::to_vec(&return_vals).unwrap();
- Ok(())
- }
- #[derive(Serialize, Deserialize)]
- pub struct AddCompanyInput {
- pub company_number: String,
- pub number_of_filings: i32
- }
- #[handler]
- pub fn AddCompany (input: &HandlerInput, response: &mut Response) -> Result<(), GraphError> {
- let data: AddCompanyInput = match sonic_rs::from_slice(&input.request.body) {
- Ok(data) => data,
- Err(err) => return Err(GraphError::from(err)),
- };
- let mut remapping_vals = RemappingMap::new();
- let db = Arc::clone(&input.graph.storage);
- let mut txn = db.graph_env.write_txn().unwrap();
- let company = G::new_mut(Arc::clone(&db), &mut txn)
- .add_n("Company", Some(props! { "number_of_filings" => data.number_of_filings.clone(), "company_number" => data.company_number.clone() }), Some(&["company_number"])).collect_to::<Vec<_>>();
- let mut return_vals: HashMap<String, ReturnValue> = HashMap::new();
- return_vals.insert("company".to_string(), ReturnValue::from_traversal_value_array_with_mixin(company.clone(), remapping_vals.borrow_mut()));
- txn.commit().unwrap();
- response.body = sonic_rs::to_vec(&return_vals).unwrap();
- Ok(())
- }
- #[handler]
- pub fn DeleteAll (input: &HandlerInput, response: &mut Response) -> Result<(), GraphError> {
- let mut remapping_vals = RemappingMap::new();
- let db = Arc::clone(&input.graph.storage);
- let mut txn = db.graph_env.write_txn().unwrap();
- Drop::<Vec<_>>::drop_traversal(
- G::new(Arc::clone(&db), &txn)
- .n_from_type("Company").collect::<Vec<_>>(),
- Arc::clone(&db),
- &mut txn,
- )?;;
- let mut return_vals: HashMap<String, ReturnValue> = HashMap::new();
- return_vals.insert("success".to_string(), ReturnValue::from(Value::from("success")));
- txn.commit().unwrap();
- response.body = sonic_rs::to_vec(&return_vals).unwrap();
- Ok(())
- }
- #[derive(Serialize, Deserialize)]
- pub struct HasCompanyInput {
- pub company_number: String
- }
- #[handler]
- pub fn HasCompany (input: &HandlerInput, response: &mut Response) -> Result<(), GraphError> {
- let data: HasCompanyInput = match sonic_rs::from_slice(&input.request.body) {
- Ok(data) => data,
- Err(err) => return Err(GraphError::from(err)),
- };
- let mut remapping_vals = RemappingMap::new();
- let db = Arc::clone(&input.graph.storage);
- let txn = db.graph_env.read_txn().unwrap();
- let company = G::new(Arc::clone(&db), &txn)
- .n_from_index("company_number", &data.company_number).collect_to::<Vec<_>>();
- let mut return_vals: HashMap<String, ReturnValue> = HashMap::new();
- return_vals.insert("company".to_string(), ReturnValue::from_traversal_value_array_with_mixin(company.clone(), remapping_vals.borrow_mut()));
- txn.commit().unwrap();
- response.body = sonic_rs::to_vec(&return_vals).unwrap();
- Ok(())
- }
- #[derive(Serialize, Deserialize)]
- pub struct DeleteCompanyInput {
- pub company_number: String
- }
- #[handler]
- pub fn DeleteCompany (input: &HandlerInput, response: &mut Response) -> Result<(), GraphError> {
- let data: DeleteCompanyInput = match sonic_rs::from_slice(&input.request.body) {
- Ok(data) => data,
- Err(err) => return Err(GraphError::from(err)),
- };
- let mut remapping_vals = RemappingMap::new();
- let db = Arc::clone(&input.graph.storage);
- let mut txn = db.graph_env.write_txn().unwrap();
- Drop::<Vec<_>>::drop_traversal(
- G::new(Arc::clone(&db), &txn)
- .n_from_index("company_number", &data.company_number)
- .out("DocumentEdge",&EdgeType::Vec).collect::<Vec<_>>(),
- Arc::clone(&db),
- &mut txn,
- )?;;
- Drop::<Vec<_>>::drop_traversal(
- G::new(Arc::clone(&db), &txn)
- .n_from_index("company_number", &data.company_number).collect::<Vec<_>>(),
- Arc::clone(&db),
- &mut txn,
- )?;;
- let mut return_vals: HashMap<String, ReturnValue> = HashMap::new();
- return_vals.insert("success".to_string(), ReturnValue::from(Value::from("success")));
- txn.commit().unwrap();
- response.body = sonic_rs::to_vec(&return_vals).unwrap();
- Ok(())
- }
- #[derive(Serialize, Deserialize)]
- pub struct HasDocumentEmbeddingsInput {
- pub company_number: String
- }
- #[handler]
- pub fn HasDocumentEmbeddings (input: &HandlerInput, response: &mut Response) -> Result<(), GraphError> {
- let data: HasDocumentEmbeddingsInput = match sonic_rs::from_slice(&input.request.body) {
- Ok(data) => data,
- Err(err) => return Err(GraphError::from(err)),
- };
- let mut remapping_vals = RemappingMap::new();
- let db = Arc::clone(&input.graph.storage);
- let txn = db.graph_env.read_txn().unwrap();
- let c = G::new(Arc::clone(&db), &txn)
- .n_from_index("company_number", &data.company_number).collect_to::<Vec<_>>();
- let embeddings = G::new_from(Arc::clone(&db), &txn, c.clone())
- .out("DocumentEdge",&EdgeType::Vec).collect_to::<Vec<_>>();
- let mut return_vals: HashMap<String, ReturnValue> = HashMap::new();
- return_vals.insert("embeddings".to_string(), ReturnValue::from_traversal_value_array_with_mixin(embeddings.clone(), remapping_vals.borrow_mut()));
- txn.commit().unwrap();
- response.body = sonic_rs::to_vec(&return_vals).unwrap();
- Ok(())
- }
- #[handler]
- pub fn GetCompanies (input: &HandlerInput, response: &mut Response) -> Result<(), GraphError> {
- let mut remapping_vals = RemappingMap::new();
- let db = Arc::clone(&input.graph.storage);
- let txn = db.graph_env.read_txn().unwrap();
- let companies = G::new(Arc::clone(&db), &txn)
- .n_from_type("Company").collect_to::<Vec<_>>();
- let mut return_vals: HashMap<String, ReturnValue> = HashMap::new();
- return_vals.insert("companies".to_string(), ReturnValue::from_traversal_value_array_with_mixin(companies.clone(), remapping_vals.borrow_mut()));
- txn.commit().unwrap();
- response.body = sonic_rs::to_vec(&return_vals).unwrap();
- Ok(())
- }
- #[derive(Serialize, Deserialize)]
- pub struct embeddings_dataData {
- pub category: String,
- pub subcategory: String,
- pub reference: String,
- pub date1: String,
- pub source: String,
- pub chunk_id: String,
- pub description: String,
- pub filing_id: String,
- pub vector: Vec<f64>,
- pub page_number: i32,
- pub date2: String,
- pub text: String,
- }
- #[derive(Serialize, Deserialize)]
- pub struct AddEmbeddingsToCompanyInput {
- pub company_number: String,
- pub embeddings_data: Vec<embeddings_dataData>
- }
- #[handler]
- pub fn AddEmbeddingsToCompany (input: &HandlerInput, response: &mut Response) -> Result<(), GraphError> {
- let data: AddEmbeddingsToCompanyInput = match sonic_rs::from_slice(&input.request.body) {
- Ok(data) => data,
- Err(err) => return Err(GraphError::from(err)),
- };
- let mut remapping_vals = RemappingMap::new();
- let db = Arc::clone(&input.graph.storage);
- let mut txn = db.graph_env.write_txn().unwrap();
- let c = G::new(Arc::clone(&db), &txn)
- .n_from_index("company_number", &data.company_number).collect_to::<Vec<_>>();
- for data in data.embeddings_data {
- let embedding = G::new_mut(Arc::clone(&db), &mut txn)
- .insert_v::<fn(&HVector, &RoTxn) -> bool>(&data.vector, "DocumentEmbedding", Some(props! { "source_date" => data.date1, "source_link" => data.source, "page_number" => data.page_number, "reference" => data.reference, "text" => data.text, "chunk_id" => data.chunk_id })).collect_to::<Vec<_>>();
- let edges = G::new_mut(Arc::clone(&db), &mut txn)
- .add_e("DocumentEdge", Some(props! { "filing_id" => data.filing_id.clone(), "date" => data.date2.clone(), "subcategory" => data.subcategory.clone(), "category" => data.category.clone(), "description" => data.description.clone() }), c.id(), embedding.id(), true, EdgeType::Node).collect_to::<Vec<_>>();
- }
- ;
- let mut return_vals: HashMap<String, ReturnValue> = HashMap::new();
- return_vals.insert("success".to_string(), ReturnValue::from(Value::from("success")));
- txn.commit().unwrap();
- response.body = sonic_rs::to_vec(&return_vals).unwrap();
- Ok(())
- }
- #[derive(Serialize, Deserialize)]
- pub struct SearchVectorInput {
- pub query: Vec<f64>,
- pub k: i32
- }
- #[handler]
- pub fn SearchVector (input: &HandlerInput, response: &mut Response) -> Result<(), GraphError> {
- let data: SearchVectorInput = match sonic_rs::from_slice(&input.request.body) {
- Ok(data) => data,
- Err(err) => return Err(GraphError::from(err)),
- };
- let mut remapping_vals = RemappingMap::new();
- let db = Arc::clone(&input.graph.storage);
- let txn = db.graph_env.read_txn().unwrap();
- let embedding_search = G::new(Arc::clone(&db), &txn)
- .search_v::<fn(&HVector, &RoTxn) -> bool>(&data.query, data.k as usize, None).collect_to::<Vec<_>>();
- let mut return_vals: HashMap<String, ReturnValue> = HashMap::new();
- return_vals.insert("embedding_search".to_string(), ReturnValue::from_traversal_value_array_with_mixin(embedding_search.clone(), remapping_vals.borrow_mut()));
- txn.commit().unwrap();
- response.body = sonic_rs::to_vec(&return_vals).unwrap();
- Ok(())
- }
- #[derive(Serialize, Deserialize)]
- pub struct CompanyEmbeddingSearchInput {
- pub company_number: String,
- pub query: Vec<f64>,
- pub k: i32
- }
- #[handler]
- pub fn CompanyEmbeddingSearch (input: &HandlerInput, response: &mut Response) -> Result<(), GraphError> {
- let data: CompanyEmbeddingSearchInput = match sonic_rs::from_slice(&input.request.body) {
- Ok(data) => data,
- Err(err) => return Err(GraphError::from(err)),
- };
- let mut remapping_vals = RemappingMap::new();
- let db = Arc::clone(&input.graph.storage);
- let txn = db.graph_env.read_txn().unwrap();
- let c = G::new(Arc::clone(&db), &txn)
- .n_from_index("company_number", &data.company_number)
- .out_e("DocumentEdge")
- .to_v().collect_to::<Vec<_>>();
- let embedding_search = G::new_from(Arc::clone(&db), &txn, c.clone())
- .brute_force_search_v(&data.query, data.k as usize).collect_to::<Vec<_>>();
- let mut return_vals: HashMap<String, ReturnValue> = HashMap::new();
- return_vals.insert("embedding_search".to_string(), ReturnValue::from_traversal_value_array_with_mixin(embedding_search.clone(), remapping_vals.borrow_mut()));
- txn.commit().unwrap();
- response.body = sonic_rs::to_vec(&return_vals).unwrap();
- Ok(())
- }
|