2025-01-28 17:12:16.476817 new snippets

eduardocerqueira · Jan 28, 2025 · e954e23 · e954e23
1 parent a323f80
commit e954e23
Show file tree

Hide file tree

Showing 10 changed files with 815 additions and 0 deletions.
diff --git a/seeker/report.txt b/seeker/report.txt
@@ -1,3 +1,23 @@
+--------------------------------------------------------------------------------
+ 2025-01-28 17:12:16.476817
+--------------------------------------------------------------------------------
+  On branch main
+Your branch is up to date with 'origin/main'.
+
+Untracked files:
+  (use "git add <file>..." to include in what will be committed)
+	snippet/Extreme_Value_Theory_1.py
+	snippet/Extreme_Value_Theory_2.py
+	snippet/Scraper Google Scholar Profiles.py
+	snippet/app.py
+	snippet/csf_release_no_packet_ip.sh
+	snippet/gcp_vertex_search_utils.py
+	snippet/single-number.py
+	snippet/test.java
+	snippet/vpn.sh
+
+nothing added to commit but untracked files present (use "git add" to track)
+
 --------------------------------------------------------------------------------
  2025-01-27 17:12:38.877396
 --------------------------------------------------------------------------------

diff --git a/seeker/snippet/Extreme_Value_Theory_1.py b/seeker/snippet/Extreme_Value_Theory_1.py
@@ -0,0 +1,15 @@
+#date: 2025-01-28T16:53:24Z
+#url: https://api.github.com/gists/a6de6bebc1168d8d0603dafd6ebc1e3c
+#owner: https://api.github.com/users/TLaconde
+
+import pandas as pd
+import matplotlib.pyplot as plt
+
+#Opening data
+path = 'Tmax.csv'
+
+df =   p.d. read _csv (path, parse_dates = [ "DATE" ])
+df = df.set_index ( "DATE" )
+
+#Chart
+plt. plot (df. TMAX )
diff --git a/seeker/snippet/Extreme_Value_Theory_2.py b/seeker/snippet/Extreme_Value_Theory_2.py
@@ -0,0 +1,6 @@
+#date: 2025-01-28T17:10:47Z
+#url: https://api.github.com/gists/f1584c6f5ee2be00338f5c179be2e4fd
+#owner: https://api.github.com/users/TLaconde
+
+df = df[df.index.month. isin([6,7,8])]
+df.quantile([0.9, 0.99])
diff --git a/seeker/snippet/Scraper Google Scholar Profiles.py b/seeker/snippet/Scraper Google Scholar Profiles.py
@@ -0,0 +1,77 @@
+#date: 2025-01-28T17:03:34Z
+#url: https://api.github.com/gists/c70149985f59745a744d179058dc886f
+#owner: https://api.github.com/users/SofiaECalle
+
+import pandas as pd
+from time import sleep
+from selenium import webdriver
+from webdriver_manager.chrome import ChromeDriverManager
+from selenium.webdriver.common.by import By
+driver = webdriver.Chrome(ChromeDriverManager().install())
+driver.maximize_window()
+
+#driver = webdriver.Firefox()
+#driver.maximize_window()
+import random
+
+
+driver.get("https://scholar.google.es/citations?view_op=search_authors&hl=es&mauthors=universidad+nacional+de+educaci%C3%B3n+UNAE&before_author=C25-_wIBAAAJ&astart=0")
+sleep(4)
+
+profile_list=[]
+
+
+for i in range(1,15):
+    sleep(2)
+    profiles = driver.find_elements(By.XPATH,'//h3[@class="gs_ai_name"]/a')
+    for p in profiles:
+        profile = p.get_attribute('href')
+        profile_list.append(profile)
+
+    try:
+        driver.find_element(By.XPATH,'//button[@type="button"][2]').click()
+    except:
+        break
+'''
+profiles = driver.find_elements(By.XPATH,'//h3[@class="gs_ai_name"]/a')   
+for p in profiles:
+    profile = p.get_attribute('href')
+    profile_list.append(profile)
+'''
+data = []
+for pr in profile_list:
+    driver.get(pr)
+
+
+    name=driver.find_element(By.XPATH,'//div[@id="gsc_prf_in"]').text
+    verify=driver.find_element(By.XPATH,'(//div[@class="gsc_prf_il"])[2]').text
+    Citas_Total=driver.find_element(By.XPATH,'(//td[@class="gsc_rsb_std"])[1]').text
+    Citas_Desde=driver.find_element(By.XPATH,'(//td[@class="gsc_rsb_std"])[2]').text
+
+    Indice_h_Total=driver.find_element(By.XPATH,'(//td[@class="gsc_rsb_std"])[3]').text
+    Indice_h_Desde=driver.find_element(By.XPATH,'(//td[@class="gsc_rsb_std"])[4]').text
+
+    Indice_i10_Total=driver.find_element(By.XPATH,'(//td[@class="gsc_rsb_std"])[3]').text
+    Indice_i10_Desde=driver.find_element(By.XPATH,'(//td[@class="gsc_rsb_std"])[4]').text
+
+    for i in range(1,7):
+        sleep(2)
+        button=driver.find_element(By.XPATH,'//button[@class="gs_btnPD gs_in_ib gs_btn_flat gs_btn_lrge gs_btn_lsu"]').click()
+
+    aritcals= driver.find_elements(By.XPATH,'//tr[@class="gsc_a_tr"]')
+    for a in aritcals:
+        article_link = a.find_element(By.XPATH,'.//a[@class="gsc_a_at"]').get_attribute('href')
+        article_title = a.find_element(By.XPATH,'.//a[@class="gsc_a_at"]').text
+        citado= a.find_element(By.XPATH,'.//td[@class="gsc_a_c"]/a').text
+        anoo= a.find_element(By.XPATH,'.//td[@class="gsc_a_y"]/span').text
+        data.append([name, verify, Citas_Total, Citas_Desde, Indice_h_Total, Indice_h_Desde,
+                    Indice_i10_Total, Indice_i10_Desde, article_title, article_link, citado, anoo])
+
+# Create a DataFrame from the data list
+df = pd.DataFrame(data, columns=['Name', 'Verify', 'Citas_Total', 'Citas_Desde', 'Indice_h_Total',
+                                'Indice_h_Desde', 'Indice_i10_Total', 'Indice_i10_Desde', 'Article_Title',
+                                'Article_Link', 'Citado', 'Anoo'])
+
+# Save DataFrame to an Excel sheet
+df.to_excel(f'profile_dataall.xlsx', index=False)
+driver.quit()
diff --git a/seeker/snippet/app.py b/seeker/snippet/app.py
@@ -0,0 +1,130 @@
+#date: 2025-01-28T16:48:21Z
+#url: https://api.github.com/gists/dd76c0faddb94b5b4784dc5713c8fd6a
+#owner: https://api.github.com/users/rbioteau
+
+import streamlit as st
+from langchain_community.document_loaders import PDFPlumberLoader
+from langchain_experimental.text_splitter import SemanticChunker
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain_community.llms import Ollama
+from langchain.prompts import PromptTemplate
+from langchain.chains.llm import LLMChain
+from langchain.chains.combine_documents.stuff import StuffDocumentsChain
+from langchain.chains import RetrievalQA
+
+# color palette
+primary_color = "#1E90FF"
+secondary_color = "#FF6347"
+background_color = "#F5F5F5"
+text_color = "#4561e9"
+
+# Custom CSS
+st.markdown(f"""
+    <style>
+    .stApp {{
+        background-color: {background_color};
+        color: {text_color};
+    }}
+    .stButton>button {{
+        background-color: {primary_color};
+        color: white;
+        border-radius: 5px;
+        border: none;
+        padding: 10px 20px;
+        font-size: 16px;
+    }}
+    .stTextInput>div>div>input {{
+        border: 2px solid {primary_color};
+        border-radius: 5px;
+        padding: 10px;
+        font-size: 16px;
+    }}
+    .stFileUploader>div>div>div>button {{
+        background-color: {secondary_color};
+        color: white;
+        border-radius: 5px;
+        border: none;
+        padding: 10px 20px;
+        font-size: 16px;
+    }}
+    </style>
+""", unsafe_allow_html=True)
+
+# Streamlit app title
+st.title("Build a RAG System with DeepSeek R1 & Ollama")
+
+# Load the PDF
+uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
+
+if uploaded_file is not None:
+    # Save the uploaded file to a temporary location
+    with open("temp.pdf", "wb") as f:
+        f.write(uploaded_file.getvalue())
+
+    # Load the PDF
+    loader = PDFPlumberLoader("temp.pdf")
+    docs = loader.load()
+
+    # Split into chunks
+    text_splitter = SemanticChunker(HuggingFaceEmbeddings())
+    documents = text_splitter.split_documents(docs)
+
+    # Instantiate the embedding model
+    embedder = HuggingFaceEmbeddings()
+
+    # Create the vector store and fill it with embeddings
+    vector = FAISS.from_documents(documents, embedder)
+    retriever = vector.as_retriever(search_type="similarity", search_kwargs={"k": 3})
+
+    # Define llm
+    llm = Ollama(model="deepseek-r1")
+
+    # Define the prompt
+    prompt = """
+    1. Use the following pieces of context to answer the question at the end.
+    2. If you don't know the answer, just say that "I don't know" but don't make up an answer on your own.\n
+    3. Keep the answer crisp and limited to 3,4 sentences.
+
+    Context: {context}
+
+    Question: {question}
+
+    Helpful Answer:"""
+
+    QA_CHAIN_PROMPT = PromptTemplate.from_template(prompt)
+
+    llm_chain = LLMChain(
+        llm=llm,
+        prompt=QA_CHAIN_PROMPT,
+        callbacks=None,
+        verbose=True)
+
+    document_prompt = PromptTemplate(
+        input_variables=["page_content", "source"],
+        template="Context:\ncontent:{page_content}\nsource:{source}",
+    )
+
+    combine_documents_chain = StuffDocumentsChain(
+        llm_chain=llm_chain,
+        document_variable_name="context",
+        document_prompt=document_prompt,
+        callbacks=None)
+
+    qa = RetrievalQA(
+        combine_documents_chain=combine_documents_chain,
+        verbose=True,
+        retriever=retriever,
+        return_source_documents=True)
+
+    # User input
+    user_input = st.text_input("Ask a question related to the PDF :")
+
+    # Process user input
+    if user_input:
+        with st.spinner("Processing..."):
+            response = qa(user_input)["result"]
+            st.write("Response:")
+            st.write(response)
+else:
+    st.write("Please upload a PDF file to proceed.")
diff --git a/seeker/snippet/csf_release_no_packet_ip.sh b/seeker/snippet/csf_release_no_packet_ip.sh
@@ -0,0 +1,48 @@
+#date: 2025-01-28T16:51:56Z
+#url: https://api.github.com/gists/6e2ef64acd2dde85bc329bf335fe4a98
+#owner: https://api.github.com/users/parsibox
+
+#!/bin/bash
+
+# Temporary file to store IPs dropped by iptables in DENYIN chain
+TEMP_DROPPED_IPS="/tmp/dropped_ips.txt"
+
+# Function to extract IP blocks/ranges being dropped in the DENYIN chain
+get_dropped_ips() {
+    # Extract IP blocks/ranges that are actively being dropped in the DENYIN chain
+    iptables -vL DENYIN -n | grep 'DROP' | awk '{if ($1 > 0) print $8}' | sort | uniq > "$TEMP_DROPPED_IPS"
+}
+
+# Function to check if an IP block/range is dropped in iptables
+is_ip_dropped() {
+    local ip="$1"
+    grep -q "$ip" "$TEMP_DROPPED_IPS"
+}
+
+# Function to clean up CSF deny list based on iptables drop status
+clean_csf_deny_list() {
+    # Loop through each IP in CSF deny list
+    while IFS= read -r line; do
+        # Skip lines that are comments or don't have an IP (e.g., manual deny)
+        if [[ "$line" =~ ^# || -z "$line" ]]; then
+            continue
+        fi
+
+        # Extract the IP from the line (this works for both IPs and IP ranges)
+        blocked_ip=$(echo "$line" | awk '{print $1}')
+
+        # Check if the IP is still being dropped by iptables
+        if ! is_ip_dropped "$blocked_ip"; then
+            # If the IP is no longer dropped, unblock it using csf -dr
+            echo "Unblocking $blocked_ip from CSF deny list."
+            csf -dr "$blocked_ip"
+        fi
+    done < /etc/csf/csf.deny
+}
+
+# Main script execution
+get_dropped_ips
+clean_csf_deny_list
+
+# Reload CSF to apply changes (optional if needed)
+csf -r