diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0927d92..b9bf9ee 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v3 @@ -45,9 +45,18 @@ jobs: - name: Run tests env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - run: poetry run pytest - - - name: Upload coverage reports to Codecov - uses: codecov/codecov-action@v3 - env: - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + run: poetry run test + + # - name: Upload coverage reports to Codecov + # uses: codecov/codecov-action@v3 + # env: + # CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + + # security: + # runs-on: ubuntu-latest + # steps: + # - uses: actions/checkout@v3 + # - name: Run Snyk to check for vulnerabilities + # uses: snyk/actions/python@master + # env: + # SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} diff --git a/.gitignore b/.gitignore index 7563ab7..6656130 100644 --- a/.gitignore +++ b/.gitignore @@ -1,11 +1,34 @@ -.venv +# Environments .env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ -# dev tools -create_txt.py -file_structure_and_contents.txt -**/**/__pycache__ +# Python cache +__pycache__/ +*.py[cod] +*$py.class + +# Distribution / packaging +dist/ +build/ +*.egg-info/ +# Test coverage .coverage +htmlcov/ -dist/ +# IDE specific files +.vscode/ +.idea/ + +# OS generated files +.DS_Store +Thumbs.db + +# Other +create_txt.py +file_structure_and_contents.txt diff --git a/README.md b/README.md index 5713210..3b67a65 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ Optional reasoning for grading decisions ## Requirements -Python 3.8+ +Python 3.9+ OpenAI API key @@ -63,6 +63,20 @@ poetry run test ``` +## Security + +- Always use environment variables for sensitive information like API keys. +- Never commit `.env` files to version control. +- Regularly update dependencies to their latest secure versions. +- Use HTTPS for all external communications. +- Sanitize all user inputs before processing. + +For local development: + +1. Copy `.env.example` to `.env` +2. Fill in your actual API keys and other sensitive information in `.env` +3. Ensure `.env` is in your `.gitignore` file + ## Contributing Contributions are welcome! Please feel free to submit a Pull Request. diff --git a/langrade/grader.py b/langrade/grader.py index 2b82f1e..1ce40df 100644 --- a/langrade/grader.py +++ b/langrade/grader.py @@ -7,6 +7,7 @@ REASONING_DESCRIPTION, BINARY_SCORE_DESCRIPTION, ) +import html class GradeDocuments(BaseModel): @@ -16,7 +17,6 @@ class GradeDocuments(BaseModel): class DocumentGrader: def __init__(self, api_key: str, model: str = DEFAULT_MODEL): - # type: ignore[arg-type] self.llm = ChatOpenAI(api_key=api_key, model=model) self.prompt = self._create_prompt() @@ -32,9 +32,15 @@ def _create_prompt(self): ) def grade_document(self, document: str, question: str): + # Sanitize inputs + safe_document = html.escape(document) + safe_question = html.escape(question) + structured_llm = self.llm.with_structured_output(GradeDocuments) chain = self.prompt | structured_llm - result = chain.invoke({"document": document, "question": question}) + result = chain.invoke( + {"document": safe_document, "question": safe_question} + ) # noqa: E501 return result diff --git a/pyproject.toml b/pyproject.toml index 8e602ec..ca8daa9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,12 +1,12 @@ [tool.poetry] name = "langrade" -version = "0.1.1" +version = "0.1.2" description = "A library for grading document relevance using LLMs" authors = ["nisaji "] license = "MIT" [tool.poetry.dependencies] -python = "^3.9" +python = "^3.9" # Updated to require Python 3.9+ langchain = "^0.2.6" langchain-community = "^0.2.6" langchain-openai = "^0.1.14" @@ -20,8 +20,7 @@ beautifulsoup4 = "^4.12.3" numpy = "^1.22.5" chromadb = "^0.5.3,<0.6.0" -google-generativeai = "^0.7.1" -anthropic = "0.2.8" + [tool.poetry.group.dev.dependencies] ruff = "^0.5.0" mypy = "^1.10.1" @@ -37,7 +36,7 @@ build-backend = "poetry.core.masonry.api" [tool.ruff] line-length = 88 -select = ["E", "F", "W", "C", "N"] +select = ["E", "F", "W", "C", "N", "S"] # Added "S" for security checks ignore = ["E501", "W503"] [tool.isort] @@ -52,3 +51,7 @@ ignore_missing_imports = true [tool.poetry.scripts] test = "scripts.run_tests:main" + +[tool.pytest.ini_options] +testpaths = ["tests"] +addopts = "-v --cov=langrade" diff --git a/setup.py b/setup.py index 1512682..b1bad3f 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name="langrade", - version="0.1.1", + version="0.1.2", author="Your Name", author_email="your.email@example.com", description="A library for grading and retrieving documents based on relevance", # noqa: E501 @@ -18,23 +18,20 @@ "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", ], - tests_require=[ - "pytest", - "python-dotenv", - ], - python_requires=">=3.8", + python_requires=">=3.9", install_requires=[ - "langchain", - "langchain_core", - "langchain_openai", - "langchain_community", - "openai", - "chromadb", - "tiktoken", - "python-dotenv", + "langchain>=0.2.6,<0.3.0", + "langchain_core>=0.2.11,<0.3.0", + "langchain_openai>=0.1.14,<0.2.0", + "langchain_community>=0.2.6,<0.3.0", + "openai>=1.2.0,<2.0.0", + "chromadb>=0.5.3,<0.6.0", + "tiktoken>=0.5.2,<0.6.0", + "python-dotenv>=1.0.0,<2.0.0", + "beautifulsoup4>=4.12.0,<5.0.0", ], )