Skip to content

Commit

Permalink
2025-01-28 17:12:16.476817 new snippets
Browse files Browse the repository at this point in the history
  • Loading branch information
eduardocerqueira committed Jan 28, 2025
1 parent a323f80 commit e954e23
Show file tree
Hide file tree
Showing 10 changed files with 815 additions and 0 deletions.
20 changes: 20 additions & 0 deletions seeker/report.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,23 @@
--------------------------------------------------------------------------------
2025-01-28 17:12:16.476817
--------------------------------------------------------------------------------
On branch main
Your branch is up to date with 'origin/main'.

Untracked files:
(use "git add <file>..." to include in what will be committed)
snippet/Extreme_Value_Theory_1.py
snippet/Extreme_Value_Theory_2.py
snippet/Scraper Google Scholar Profiles.py
snippet/app.py
snippet/csf_release_no_packet_ip.sh
snippet/gcp_vertex_search_utils.py
snippet/single-number.py
snippet/test.java
snippet/vpn.sh

nothing added to commit but untracked files present (use "git add" to track)

--------------------------------------------------------------------------------
2025-01-27 17:12:38.877396
--------------------------------------------------------------------------------
Expand Down
15 changes: 15 additions & 0 deletions seeker/snippet/Extreme_Value_Theory_1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#date: 2025-01-28T16:53:24Z
#url: https://api.github.com/gists/a6de6bebc1168d8d0603dafd6ebc1e3c
#owner: https://api.github.com/users/TLaconde

import pandas as pd
import matplotlib.pyplot as plt

#Opening data
path = 'Tmax.csv'

df = p.d. read _csv (path, parse_dates = [ "DATE" ])
df = df.set_index ( "DATE" )

#Chart
plt. plot (df. TMAX )
6 changes: 6 additions & 0 deletions seeker/snippet/Extreme_Value_Theory_2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#date: 2025-01-28T17:10:47Z
#url: https://api.github.com/gists/f1584c6f5ee2be00338f5c179be2e4fd
#owner: https://api.github.com/users/TLaconde

df = df[df.index.month. isin([6,7,8])]
df.quantile([0.9, 0.99])
77 changes: 77 additions & 0 deletions seeker/snippet/Scraper Google Scholar Profiles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#date: 2025-01-28T17:03:34Z
#url: https://api.github.com/gists/c70149985f59745a744d179058dc886f
#owner: https://api.github.com/users/SofiaECalle

import pandas as pd
from time import sleep
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.maximize_window()

#driver = webdriver.Firefox()
#driver.maximize_window()
import random


driver.get("https://scholar.google.es/citations?view_op=search_authors&hl=es&mauthors=universidad+nacional+de+educaci%C3%B3n+UNAE&before_author=C25-_wIBAAAJ&astart=0")
sleep(4)

profile_list=[]


for i in range(1,15):
sleep(2)
profiles = driver.find_elements(By.XPATH,'//h3[@class="gs_ai_name"]/a')
for p in profiles:
profile = p.get_attribute('href')
profile_list.append(profile)

try:
driver.find_element(By.XPATH,'//button[@type="button"][2]').click()
except:
break
'''
profiles = driver.find_elements(By.XPATH,'//h3[@class="gs_ai_name"]/a')
for p in profiles:
profile = p.get_attribute('href')
profile_list.append(profile)
'''
data = []
for pr in profile_list:
driver.get(pr)


name=driver.find_element(By.XPATH,'//div[@id="gsc_prf_in"]').text
verify=driver.find_element(By.XPATH,'(//div[@class="gsc_prf_il"])[2]').text
Citas_Total=driver.find_element(By.XPATH,'(//td[@class="gsc_rsb_std"])[1]').text
Citas_Desde=driver.find_element(By.XPATH,'(//td[@class="gsc_rsb_std"])[2]').text

Indice_h_Total=driver.find_element(By.XPATH,'(//td[@class="gsc_rsb_std"])[3]').text
Indice_h_Desde=driver.find_element(By.XPATH,'(//td[@class="gsc_rsb_std"])[4]').text

Indice_i10_Total=driver.find_element(By.XPATH,'(//td[@class="gsc_rsb_std"])[3]').text
Indice_i10_Desde=driver.find_element(By.XPATH,'(//td[@class="gsc_rsb_std"])[4]').text

for i in range(1,7):
sleep(2)
button=driver.find_element(By.XPATH,'//button[@class="gs_btnPD gs_in_ib gs_btn_flat gs_btn_lrge gs_btn_lsu"]').click()

aritcals= driver.find_elements(By.XPATH,'//tr[@class="gsc_a_tr"]')
for a in aritcals:
article_link = a.find_element(By.XPATH,'.//a[@class="gsc_a_at"]').get_attribute('href')
article_title = a.find_element(By.XPATH,'.//a[@class="gsc_a_at"]').text
citado= a.find_element(By.XPATH,'.//td[@class="gsc_a_c"]/a').text
anoo= a.find_element(By.XPATH,'.//td[@class="gsc_a_y"]/span').text
data.append([name, verify, Citas_Total, Citas_Desde, Indice_h_Total, Indice_h_Desde,
Indice_i10_Total, Indice_i10_Desde, article_title, article_link, citado, anoo])

# Create a DataFrame from the data list
df = pd.DataFrame(data, columns=['Name', 'Verify', 'Citas_Total', 'Citas_Desde', 'Indice_h_Total',
'Indice_h_Desde', 'Indice_i10_Total', 'Indice_i10_Desde', 'Article_Title',
'Article_Link', 'Citado', 'Anoo'])

# Save DataFrame to an Excel sheet
df.to_excel(f'profile_dataall.xlsx', index=False)
driver.quit()
130 changes: 130 additions & 0 deletions seeker/snippet/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
#date: 2025-01-28T16:48:21Z
#url: https://api.github.com/gists/dd76c0faddb94b5b4784dc5713c8fd6a
#owner: https://api.github.com/users/rbioteau

import streamlit as st
from langchain_community.document_loaders import PDFPlumberLoader
from langchain_experimental.text_splitter import SemanticChunker
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import Ollama
from langchain.prompts import PromptTemplate
from langchain.chains.llm import LLMChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chains import RetrievalQA

# color palette
primary_color = "#1E90FF"
secondary_color = "#FF6347"
background_color = "#F5F5F5"
text_color = "#4561e9"

# Custom CSS
st.markdown(f"""
<style>
.stApp {{
background-color: {background_color};
color: {text_color};
}}
.stButton>button {{
background-color: {primary_color};
color: white;
border-radius: 5px;
border: none;
padding: 10px 20px;
font-size: 16px;
}}
.stTextInput>div>div>input {{
border: 2px solid {primary_color};
border-radius: 5px;
padding: 10px;
font-size: 16px;
}}
.stFileUploader>div>div>div>button {{
background-color: {secondary_color};
color: white;
border-radius: 5px;
border: none;
padding: 10px 20px;
font-size: 16px;
}}
</style>
""", unsafe_allow_html=True)

# Streamlit app title
st.title("Build a RAG System with DeepSeek R1 & Ollama")

# Load the PDF
uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")

if uploaded_file is not None:
# Save the uploaded file to a temporary location
with open("temp.pdf", "wb") as f:
f.write(uploaded_file.getvalue())

# Load the PDF
loader = PDFPlumberLoader("temp.pdf")
docs = loader.load()

# Split into chunks
text_splitter = SemanticChunker(HuggingFaceEmbeddings())
documents = text_splitter.split_documents(docs)

# Instantiate the embedding model
embedder = HuggingFaceEmbeddings()

# Create the vector store and fill it with embeddings
vector = FAISS.from_documents(documents, embedder)
retriever = vector.as_retriever(search_type="similarity", search_kwargs={"k": 3})

# Define llm
llm = Ollama(model="deepseek-r1")

# Define the prompt
prompt = """
1. Use the following pieces of context to answer the question at the end.
2. If you don't know the answer, just say that "I don't know" but don't make up an answer on your own.\n
3. Keep the answer crisp and limited to 3,4 sentences.
Context: {context}
Question: {question}
Helpful Answer:"""

QA_CHAIN_PROMPT = PromptTemplate.from_template(prompt)

llm_chain = LLMChain(
llm=llm,
prompt=QA_CHAIN_PROMPT,
callbacks=None,
verbose=True)

document_prompt = PromptTemplate(
input_variables=["page_content", "source"],
template="Context:\ncontent:{page_content}\nsource:{source}",
)

combine_documents_chain = StuffDocumentsChain(
llm_chain=llm_chain,
document_variable_name="context",
document_prompt=document_prompt,
callbacks=None)

qa = RetrievalQA(
combine_documents_chain=combine_documents_chain,
verbose=True,
retriever=retriever,
return_source_documents=True)

# User input
user_input = st.text_input("Ask a question related to the PDF :")

# Process user input
if user_input:
with st.spinner("Processing..."):
response = qa(user_input)["result"]
st.write("Response:")
st.write(response)
else:
st.write("Please upload a PDF file to proceed.")
48 changes: 48 additions & 0 deletions seeker/snippet/csf_release_no_packet_ip.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#date: 2025-01-28T16:51:56Z
#url: https://api.github.com/gists/6e2ef64acd2dde85bc329bf335fe4a98
#owner: https://api.github.com/users/parsibox

#!/bin/bash

# Temporary file to store IPs dropped by iptables in DENYIN chain
TEMP_DROPPED_IPS="/tmp/dropped_ips.txt"

# Function to extract IP blocks/ranges being dropped in the DENYIN chain
get_dropped_ips() {
# Extract IP blocks/ranges that are actively being dropped in the DENYIN chain
iptables -vL DENYIN -n | grep 'DROP' | awk '{if ($1 > 0) print $8}' | sort | uniq > "$TEMP_DROPPED_IPS"
}

# Function to check if an IP block/range is dropped in iptables
is_ip_dropped() {
local ip="$1"
grep -q "$ip" "$TEMP_DROPPED_IPS"
}

# Function to clean up CSF deny list based on iptables drop status
clean_csf_deny_list() {
# Loop through each IP in CSF deny list
while IFS= read -r line; do
# Skip lines that are comments or don't have an IP (e.g., manual deny)
if [[ "$line" =~ ^# || -z "$line" ]]; then
continue
fi

# Extract the IP from the line (this works for both IPs and IP ranges)
blocked_ip=$(echo "$line" | awk '{print $1}')

# Check if the IP is still being dropped by iptables
if ! is_ip_dropped "$blocked_ip"; then
# If the IP is no longer dropped, unblock it using csf -dr
echo "Unblocking $blocked_ip from CSF deny list."
csf -dr "$blocked_ip"
fi
done < /etc/csf/csf.deny
}

# Main script execution
get_dropped_ips
clean_csf_deny_list

# Reload CSF to apply changes (optional if needed)
csf -r
Loading

0 comments on commit e954e23

Please sign in to comment.