From 438acc989c076751875b3904a19948e07cb97274 Mon Sep 17 00:00:00 2001 From: karanchellani <142801957+karanchellani@users.noreply.github.com> Date: Thu, 23 Jan 2025 15:49:25 +0530 Subject: [PATCH] Dev to Staging (#1028) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * rectified code to not include Document node while graph_consolidation (#1007) * Nova models addition (#1006) * amazon nova models added, titan embeddings added * example env added with nova model config * New models (#1009) * Dev to staging (#1005) * Post-Processing-Alerts (#758) * added the alerts before and after the post processing * Tooltip changes * added write access check * added write access param * added fulltext creation * disabled the write and delete actions for read only user mode * modified query * test updates * test uupdated * Read Only User Support (#766) * added local chat history * added write access check * added write access param * added fulltext creation * disabled the write and delete actions for read only user mode * modified query --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * storing the gds status and write access on refresh * Langchain libs update (#769) * LLMs with latest langchain dev libraries * conflict resolved * all llm models with latest library changes * fixed the rerendering of the table while file status is processing * fix: Read Only User Fix * Global search fulltext (#767) * added global search+vector+fulltext mode * added community details in chunk entities * added node ids * updated vector graph query * added entities and modified chat response * added params * api response changes * added chunk entity query * modifies query * payload changes * added nodetails properties * payload new changes * communities check * communities selecetion check * Communities bug solutions (#770) * added local chat history * added write access check * added write access param * labels cahnge for nodes * added fulltext creation * disabled the write and delete actions for read only user mode * modified query * test updates * test uupdated * enable communities * removed the selected prop * Read Only User Support (#766) * added local chat history * added write access check * added write access param * added fulltext creation * disabled the write and delete actions for read only user mode * modified query --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * storing the gds status and write access on refresh * enable communities label change --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> * readonly fixed on refresh * clear chat history * slectedFiles check for Chatbot * clear history --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> * Added elapsed time for extarction on each breakdown function * lint and format fixes * removed dev logs * communities fix * disabled the generate graph for read only user * format fixes * graph labels change * added the readonly check for already added waiting files * Retriever evaluation using RAGAS * deleted unused file * code optimization using memo * Added elapsed_time on each api and getiing time per_entity * Added the post processing Alert showcasing the ongoing post processing jobs * fix: readonly user retry option disable * update script to get details of extarcted doc * Issue fixed, Latency count per entity * Multiple chat modes selection (#780) * added Multi modes selection * multimodes state mangement * fix: state handling of chat details of the default mode * Added the ChatModeSwitch Component * modes switch statemangement * added the chatmodes switch in both view * removed the copied text * Handled the error scenario * fix: speech issue between modes * fix: Handled activespeech speech and othermessage modes switch * used requestanimationframe instead of setTimeOut * removed the commented code * Fix: ChatModes DeSelection on FIle Selection * Fix: Order of the chatmodes accordoing to selected chatmodes * Community optimization (#790) * modified leidens parameters * updated disconnected nodes query * excluded communities from dedup * added index creation * modified de dup query * added delete query for communities * Async way to create entities from multiple chunks (#788) * LLMs with latest langchain dev libraries * conflict resolved * all llm models with latest library changes * async way to get graph documents * indentation correction * fixed graph mode error (#792) * Raga's Evaluation Metrics (#787) * added Multi modes selection * ragas eval * added response * multimodes state mangement * fix: state handling of chat details of the default mode * Added the ChatModeSwitch Component * modes switch statemangement * added the chatmodes switch in both view * removed the copied text * Handled the error scenario * fix: speech issue between modes * ragas evaluation metric show * Output return type changed * fix: Handled activespeech speech and othermessage modes switch * used requestanimationframe instead of setTimeOut * removed the commented code * Added ragas to requirements * Integrated the metric api * ragas response updated, llm list updated * resolved syntax error in score * Added the Metrics Table * fix: Long text UI Issue * code optimization for evaluation * added the download button for downloading the info * key name change * Optimized the downloadClickHandler --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kaustubh-darekar Co-authored-by: a-s-poorna * Openai gemini config (#794) * openai and gemini models as config backend * updated dropdown llm values * updated docs * Added the user action for metrics table * Graph enhancements (#795) * graph changes * graph properties changes * graph communities changes * graph type selection * checkbox check changes * format changes * Communities Bug fixes (#775) * added global search+vector+fulltext mode * added community details in chunk entities * added node ids * updated vector graph query * added entities and modified chat response * added params * api response changes * added chunk entity query * modifies query * labels cahnge for nodes * payload changes * added nodetails properties * payload new changes * communities check * communities selecetion check * enable communities * removed the selected prop * enable communities label change * communities name change * cred check * tooltip * fix: Copy Icon Theme Fix --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * llm name changes * build fix * default mode fix * ragas model names update * lint fixes * Chunk Entities API condition * added the tooltip for unsupported lllms for ragas metric loading * removed unused imports * multimode fix when we get error response * mode changes for score display * fix: Fixed the details state handling between multiple chats feature: Added the warning banner If selected llm model is not supported for raga's evaluation * Fix: Entity Mode Width Fix * diffbot fix for async (#797) * Minor changes (#798) * added congig variable for default diffbot chat model * fulltext index creation is skipped when the labels are empty * entity vector change * added optinal to communities for entity mode * updated the entity query --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * New: Added the supported llm models for ragas evaluation * Fix: Communitites Tab is displayed based communitites length * added the conversation download button (#800) * model name correction * chatmode switch mode fix * Add API payload GCP logging (#805) * Adding Links to get neighboring nodes (#796) * addition of link * added neighbours query * implemented with driver * updated the query * communitiesInfo name change * communities.tsx removed * api integration * modified response * entities change * chunk and communities * chunk space removal * added element id to chunks * loading on click * format changes * added file name for Dcoumrnt node * chat token cut off model name update * icon change * duplicate sources removal * Entity change --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * added error message for doc retriver (#807) * copy row (#803) * copy row * column for copy * column copy * Raga's Evaluation For Multi Modes (#806) * Updatedmodels for ragas eval * context utilization metrics removed * updated supported llms for ragas * removed context utilization * Implemented Parallel API * multi api calls error resolved * MultiMode Metrics * Fix: Metric Evalution For Single Mode * multi modes ragas evaluation * api payload changes * metric api output format changed * multi mode ragas changes * removed pre process dataset * api response changes * Multimode metrics api integration * nan error for no answer resolved * QA integration changes --------- Co-authored-by: kaustubh-darekar * lint fixes * fix: multimode metrics state handling fix: lint fixes * fix: Multimode metrics mode change state issue fix: chunk list style issue * fix: list style fix * Correct TYPO mistake * added new env for ragas embedding model * Props name changes (#811) * Props name changes * removed the accesstoken from row on copy action * props changes for dropzone component * graph view changes --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> * test * view graph * nodes count and relationshipcount updation fix * sourceUrl Fix * empty string "" fix to keep the default values we should keep the value blank instead "" * prop changes * props changes * retry condition update for failed files (#820) * Chat modes name changes (#815) * Props name changes * removed the accesstoken from row on copy action * updated chat mode names * Chat Modes Name Changes * lint fixes * using readble format In UI * removal of size to avoid console warning * key add --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> * Youtube transcript fix with proxy (#822) * update script for async func * ragas changes for graph retrieval mode. context added in api output (#825) * Remove extract latency from logging and add LIMIT in duplicate nodes * Document updates (#828) * document updated with ragas evaluation information * formatting changes * chatbot api documentation updated * api details added in document * function name changed for drop create vector index api * Update README.md * updated api structire in docs (#827) * Update backend_docs.adoc * 821 llm model listing (#823) * added logic for document filters * LLM models * message change * link added * removed the text --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Exclude session lable node from duplicate nodes list * Added the tooltip for disabled llm option (#835) * node size changes * mode removal of rows check * formatting * Exclude __Entity__ node label from duplicate node list * Update README.md * Update README.md * Update README.md * fixed the youtube link * Security header and GZIPMiddleware (#847) * Added security header all API * Add GZipMiddleware * Chunk Text Details (#850) * Community title added * Added api for fetching chunk text details * output format changed for chunk text * integrated the service layer for chunkdata * added the chunks * formatting output of llm call for title generation * formatting llm output for title generation * added flex row * Changes related to pagination of fetch chunk api * Integrated the pagination * page changes error resolved for fetch chunk api * for get neighbours api , community title added in properties * moving community title related changes to separate branch * Removed Query module from fastapi import statement * icon changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Communities Id to Title (#851) * Staging to main (#735) * Dev (#537) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * disabled the sumbit buttom on loading * Deduplication tab (#566) * de-duplication API * Update De-Duplicate query * created the Deduplication tab * added the API service * added the removeable tags for similar nodes in deduplication tab * Integrate Tag * added GraphLabel * added loader state * added the merge service * integrated the merge API * Merge Query issue fixed * Auto refresh the duplicate nodes after merging operation * added the description for de duplication * reset on merging --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Update frontend_docs.adoc (#538) * Update frontend_docs.adoc * doc update * Images * Images folder change * Images folder change * test image * Update frontend_docs.adoc * image change * Update frontend_docs.adoc * Update frontend_docs.adoc * added the Graph Mode SS * added the Query SS * Update frontend_docs.adoc * conflics fix * conflict fix * Update frontend_docs.adoc --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * updated langchain versions (#565) * Update the De-Duplication query * Node relationship id type none issue (#547) * de-duplication API * Update De-Duplicate query * Issue fixed Nodes,Relationship Id and Type None or Blank * added the tooltips * type fix * Unneccory import * added score threshold and added some error handling (#571) * Update requirements.txt * Tooltip and other UI fixes (#572) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-sol… * Graph consolidation prompt updated (#1013) * Graph consolidation changes (#1014) * Graph consolidation prompt updated * by default keeping graph schema consolidation unchecked --------- Co-authored-by: kaustubh-darekar * Chunks to be created (#1015) * Restrict chunks creation configurable * Added chunk_to_be_created en value in connect endpoint * Added Chunks To be Processed * Update Content.tsx * Conditional display --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Oauth integration (#1011) * Add User Component * Integrated Oauth * Implemented SPA Oauth authentication * User Profile Rendering * Profile component changes * env changes * Protected Route * Naming Fix * Message placement changes * Docker changes * Feature toggle for authentication * minor issue fixed * passing and logging authenticated user email in backend logging (#1019) * Passed the email as new parameter for all api's * removed the profile * Added email as param in each API and logging as well * added email for source list and conditional invoking of post processing api --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Conflict issue fixed * default graph consolidation unchecked * time formatted as string for logging * removed debugging react strict mode * Remove strict (#1027) * DEV to STAGING (#1022) * fixed the rerendering of the table while file status is processing * fix: Read Only User Fix * Global search fulltext (#767) * added global search+vector+fulltext mode * added community details in chunk entities * added node ids * updated vector graph query * added entities and modified chat response * added params * api response changes * added chunk entity query * modifies query * payload changes * added nodetails properties * payload new changes * communities check * communities selecetion check * Communities bug solutions (#770) * added local chat history * added write access check * added write access param * labels cahnge for nodes * added fulltext creation * disabled the write and delete actions for read only user mode * modified query * test updates * test uupdated * enable communities * removed the selected prop * Read Only User Support (#766) * added local chat history * added write access check * added write access param * added fulltext creation * disabled the write and delete actions for read only user mode * modified query --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * storing the gds status and write access on refresh * enable communities label change --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> * readonly fixed on refresh * clear chat history * slectedFiles check for Chatbot * clear history --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> * Added elapsed time for extarction on each breakdown function * lint and format fixes * removed dev logs * communities fix * disabled the generate graph for read only user * format fixes * graph labels change * added the readonly check for already added waiting files * Retriever evaluation using RAGAS * deleted unused file * code optimization using memo * Added elapsed_time on each api and getiing time per_entity * Added the post processing Alert showcasing the ongoing post processing jobs * fix: readonly user retry option disable * update script to get details of extarcted doc * Issue fixed, Latency count per entity * Multiple chat modes selection (#780) * added Multi modes selection * multimodes state mangement * fix: state handling of chat details of the default mode * Added the ChatModeSwitch Component * modes switch statemangement * added the chatmodes switch in both view * removed the copied text * Handled the error scenario * fix: speech issue between modes * fix: Handled activespeech speech and othermessage modes switch * used requestanimationframe instead of setTimeOut * removed the commented code * Fix: ChatModes DeSelection on FIle Selection * Fix: Order of the chatmodes accordoing to selected chatmodes * Community optimization (#790) * modified leidens parameters * updated disconnected nodes query * excluded communities from dedup * added index creation * modified de dup query * added delete query for communities * Async way to create entities from multiple chunks (#788) * LLMs with latest langchain dev libraries * conflict resolved * all llm models with latest library changes * async way to get graph documents * indentation correction * fixed graph mode error (#792) * Raga's Evaluation Metrics (#787) * added Multi modes selection * ragas eval * added response * multimodes state mangement * fix: state handling of chat details of the default mode * Added the ChatModeSwitch Component * modes switch statemangement * added the chatmodes switch in both view * removed the copied text * Handled the error scenario * fix: speech issue between modes * ragas evaluation metric show * Output return type changed * fix: Handled activespeech speech and othermessage modes switch * used requestanimationframe instead of setTimeOut * removed the commented code * Added ragas to requirements * Integrated the metric api * ragas response updated, llm list updated * resolved syntax error in score * Added the Metrics Table * fix: Long text UI Issue * code optimization for evaluation * added the download button for downloading the info * key name change * Optimized the downloadClickHandler --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kaustubh-darekar Co-authored-by: a-s-poorna * Openai gemini config (#794) * openai and gemini models as config backend * updated dropdown llm values * updated docs * Added the user action for metrics table * Graph enhancements (#795) * graph changes * graph properties changes * graph communities changes * graph type selection * checkbox check changes * format changes * Communities Bug fixes (#775) * added global search+vector+fulltext mode * added community details in chunk entities * added node ids * updated vector graph query * added entities and modified chat response * added params * api response changes * added chunk entity query * modifies query * labels cahnge for nodes * payload changes * added nodetails properties * payload new changes * communities check * communities selecetion check * enable communities * removed the selected prop * enable communities label change * communities name change * cred check * tooltip * fix: Copy Icon Theme Fix --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * llm name changes * build fix * default mode fix * ragas model names update * lint fixes * Chunk Entities API condition * added the tooltip for unsupported lllms for ragas metric loading * removed unused imports * multimode fix when we get error response * mode changes for score display * fix: Fixed the details state handling between multiple chats feature: Added the warning banner If selected llm model is not supported for raga's evaluation * Fix: Entity Mode Width Fix * diffbot fix for async (#797) * Minor changes (#798) * added congig variable for default diffbot chat model * fulltext index creation is skipped when the labels are empty * entity vector change * added optinal to communities for entity mode * updated the entity query --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * New: Added the supported llm models for ragas evaluation * Fix: Communitites Tab is displayed based communitites length * added the conversation download button (#800) * model name correction * chatmode switch mode fix * Add API payload GCP logging (#805) * Adding Links to get neighboring nodes (#796) * addition of link * added neighbours query * implemented with driver * updated the query * communitiesInfo name change * communities.tsx removed * api integration * modified response * entities change * chunk and communities * chunk space removal * added element id to chunks * loading on click * format changes * added file name for Dcoumrnt node * chat token cut off model name update * icon change * duplicate sources removal * Entity change --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * added error message for doc retriver (#807) * copy row (#803) * copy row * column for copy * column copy * Raga's Evaluation For Multi Modes (#806) * Updatedmodels for ragas eval * context utilization metrics removed * updated supported llms for ragas * removed context utilization * Implemented Parallel API * multi api calls error resolved * MultiMode Metrics * Fix: Metric Evalution For Single Mode * multi modes ragas evaluation * api payload changes * metric api output format changed * multi mode ragas changes * removed pre process dataset * api response changes * Multimode metrics api integration * nan error for no answer resolved * QA integration changes --------- Co-authored-by: kaustubh-darekar * lint fixes * fix: multimode metrics state handling fix: lint fixes * fix: Multimode metrics mode change state issue fix: chunk list style issue * fix: list style fix * Correct TYPO mistake * added new env for ragas embedding model * Props name changes (#811) * Props name changes * removed the accesstoken from row on copy action * props changes for dropzone component * graph view changes --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> * test * view graph * nodes count and relationshipcount updation fix * sourceUrl Fix * empty string "" fix to keep the default values we should keep the value blank instead "" * prop changes * props changes * retry condition update for failed files (#820) * Chat modes name changes (#815) * Props name changes * removed the accesstoken from row on copy action * updated chat mode names * Chat Modes Name Changes * lint fixes * using readble format In UI * removal of size to avoid console warning * key add --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> * Youtube transcript fix with proxy (#822) * update script for async func * ragas changes for graph retrieval mode. context added in api output (#825) * Remove extract latency from logging and add LIMIT in duplicate nodes * Document updates (#828) * document updated with ragas evaluation information * formatting changes * chatbot api documentation updated * api details added in document * function name changed for drop create vector index api * Update README.md * updated api structire in docs (#827) * Update backend_docs.adoc * 821 llm model listing (#823) * added logic for document filters * LLM models * message change * link added * removed the text --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Exclude session lable node from duplicate nodes list * Added the tooltip for disabled llm option (#835) * node size changes * mode removal of rows check * formatting * Exclude __Entity__ node label from duplicate node list * Update README.md * Update README.md * Update README.md * fixed the youtube link * Security header and GZIPMiddleware (#847) * Added security header all API * Add GZipMiddleware * Chunk Text Details (#850) * Community title added * Added api for fetching chunk text details * output format changed for chunk text * integrated the service layer for chunkdata * added the chunks * formatting output of llm call for title generation * formatting llm output for title generation * added flex row * Changes related to pagination of fetch chunk api * Integrated the pagination * page changes error resolved for fetch chunk api * for get neighbours api , community title added in properties * moving community title related changes to separate branch * Removed Query module from fastapi import statement * icon changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Communities Id to Title (#851) * Staging to main (#735) * Dev (#537) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * disabled the sumbit buttom on loading * Deduplication tab (#566) * de-duplication API * Update De-Duplicate query * created the Deduplication tab * added the API service * added the removeable tags for similar nodes in deduplication tab * Integrate Tag * added GraphLabel * added loader state * added the merge service * integrated the merge API * Merge Query issue fixed * Auto refresh the duplicate nodes after merging operation * added the description for de duplication * reset on merging --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Update frontend_docs.adoc (#538) * Update frontend_docs.adoc * doc update * Images * Images folder change * Images folder change * test image * Update frontend_docs.adoc * image change * Update frontend_docs.adoc * Update frontend_docs.adoc * added the Graph Mode SS * added the Query SS * Update frontend_docs.adoc * conflics fix * conflict fix * Update frontend_docs.adoc --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * updated langchain versions (#565) * Update the De-Duplication query * Node relationship id type none issue (#547) * de-duplication API * Update De-Duplicate query * Issue fixed Nodes,Relationship Id and Type None or Blank * added the tooltips * type fix * Unneccory import * added score threshold and added some error handling (#571) * Update requirements.txt * Tooltip and other UI fixes (#572) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status… --------- Co-authored-by: kaustubh-darekar Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: a-s-poorna Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: Ikko Eltociear Ashimine Co-authored-by: Jayanth T Co-authored-by: Jayanth T Co-authored-by: Jerome Choo Co-authored-by: jeromechoo Co-authored-by: Michael Hunger Co-authored-by: Kain Shu <44948284+Kain-90@users.noreply.github.com> Co-authored-by: destiny966113 <90891243+destiny966113@users.noreply.github.com> Co-authored-by: Pravesh1988 Co-authored-by: edenbuaa Co-authored-by: Marcos Cannabrava <54267712+marcoscannabrava@users.noreply.github.com> --- backend/src/shared/common_fn.py | 2 +- frontend/src/context/UsersFiles.tsx | 1 - frontend/src/main.tsx | 16 ++++++++-------- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/backend/src/shared/common_fn.py b/backend/src/shared/common_fn.py index 589b64d28..d95626bb3 100644 --- a/backend/src/shared/common_fn.py +++ b/backend/src/shared/common_fn.py @@ -133,7 +133,7 @@ def create_gcs_bucket_folder_name_hashed(uri, file_name): def formatted_time(current_time): formatted_time = current_time.strftime('%Y-%m-%d %H:%M:%S %Z') - return formatted_time + return str(formatted_time) def last_url_segment(url): parsed_url = urlparse(url) diff --git a/frontend/src/context/UsersFiles.tsx b/frontend/src/context/UsersFiles.tsx index 1487e35a0..af019ba76 100644 --- a/frontend/src/context/UsersFiles.tsx +++ b/frontend/src/context/UsersFiles.tsx @@ -43,7 +43,6 @@ const FileContextProvider: FC = ({ children }) => { 'enable_hybrid_search_and_fulltext_search_in_bloom', 'materialize_entity_similarities', 'enable_communities', - 'graph_schema_consolidation', ]); const [processedCount, setProcessedCount] = useState(0); const [postProcessingVal, setPostProcessingVal] = useState(false); diff --git a/frontend/src/main.tsx b/frontend/src/main.tsx index 7451c7a8d..955489fd7 100644 --- a/frontend/src/main.tsx +++ b/frontend/src/main.tsx @@ -6,13 +6,13 @@ import App from './App.tsx'; import { SKIP_AUTH } from './utils/Constants.ts'; ReactDOM.createRoot(document.getElementById('root')!).render( - - {SKIP_AUTH ? ( + + {SKIP_AUTH ? ( + + ) : ( + - ) : ( - - - - )} - + + )} + );