diff --git a/.github/workflows/check-ios-wheels.yml b/.github/workflows/check-ios-wheels.yml new file mode 100644 index 0000000..86c1599 --- /dev/null +++ b/.github/workflows/check-ios-wheels.yml @@ -0,0 +1,45 @@ +--- +name: Check iOS Wheels + +on: + pull_request: + paths: + - "uv.lock" + push: + branches: + - main + paths: + - "uv.lock" + +jobs: + check-ios-wheels: + runs-on: ubuntu-latest + timeout-minutes: 5 + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Check for iOS wheels in uv.lock + run: | + echo "đ Checking for iOS-specific wheels in uv.lock..." + + if grep -q "ios_[0-9]\+_[0-9]\+_.*\.whl" uv.lock; then + echo "â iOS-specific wheels found in uv.lock!" + echo "" + echo "These wheels are not compatible with Docker builds and must be removed." + echo "" + echo "Found the following iOS wheels:" + grep "ios_[0-9]\+_[0-9]\+_.*\.whl" uv.lock + echo "" + echo "To fix this issue:" + echo "1. Run: ./scripts/remove-ios-wheels.sh" + echo "2. Commit the updated uv.lock file" + echo "" + echo "To prevent this in the future when updating dependencies:" + echo "- Use Linux or Docker to regenerate uv.lock" + echo "- Or use UV_EXCLUDE_NEWER with a platform constraint" + exit 1 + fi + + echo "â No iOS-specific wheels found in uv.lock" diff --git a/.github/workflows/docker-validate.yml b/.github/workflows/docker-validate.yml index 50d7ece..d1c94a8 100644 --- a/.github/workflows/docker-validate.yml +++ b/.github/workflows/docker-validate.yml @@ -114,7 +114,7 @@ jobs: - name: Check docker-compose services run: | services=$(docker-compose config --services | sort | tr "\n" " " | sed "s/ $//") - expected="dashboard discovery extractor graphinator neo4j postgres rabbitmq tableinator" + expected="dashboard discovery extractor graphinator neo4j postgres rabbitmq redis tableinator" if [ "$services" != "$expected" ]; then echo "â Service mismatch!" @@ -128,14 +128,14 @@ jobs: run: | # Check that services have correct dependencies deps=$(docker-compose config | yq eval '.services.dashboard.depends_on | keys | sort | join(" ")' -) - if [ "$deps" != "neo4j postgres rabbitmq" ]; then - echo "â Dashboard should depend on neo4j, postgres, and rabbitmq" + if [ "$deps" != "neo4j postgres rabbitmq redis" ]; then + echo "â Dashboard should depend on neo4j, postgres, rabbitmq, and redis" exit 1 fi deps=$(docker-compose config | yq eval '.services.discovery.depends_on | keys | sort | join(" ")' -) - if [ "$deps" != "neo4j postgres rabbitmq" ]; then - echo "â Discovery should depend on neo4j, postgres, and rabbitmq" + if [ "$deps" != "neo4j postgres rabbitmq redis" ]; then + echo "â Discovery should depend on neo4j, postgres, rabbitmq, and redis" exit 1 fi diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5a5f9cc..67ecfd2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -94,3 +94,12 @@ repos: hooks: - id: shfmt args: ["-i", "2", "-ci", "-bn", "-sr"] # 2 space indent, indent case, binary next line, redirect operators + + - repo: local + hooks: + - id: check-ios-wheels + name: Check for iOS wheels in uv.lock + entry: ./scripts/check-ios-wheels-hook.sh + language: system + files: ^uv\.lock$ + pass_filenames: false diff --git a/common/config.py b/common/config.py index d61eb62..acbb8cd 100644 --- a/common/config.py +++ b/common/config.py @@ -181,6 +181,7 @@ class DashboardConfig: postgres_username: str postgres_password: str postgres_database: str + redis_url: str = "redis://localhost:6379/0" @classmethod def from_env(cls) -> "DashboardConfig": @@ -189,6 +190,9 @@ def from_env(cls) -> "DashboardConfig": graphinator_config = GraphinatorConfig.from_env() tableinator_config = TableinatorConfig.from_env() + # Redis configuration + redis_url = getenv("REDIS_URL", "redis://localhost:6379/0") + return cls( amqp_connection=graphinator_config.amqp_connection, neo4j_address=graphinator_config.neo4j_address, @@ -198,6 +202,7 @@ def from_env(cls) -> "DashboardConfig": postgres_username=tableinator_config.postgres_username, postgres_password=tableinator_config.postgres_password, postgres_database=tableinator_config.postgres_database, + redis_url=redis_url, ) diff --git a/discovery/cache.py b/discovery/cache.py new file mode 100644 index 0000000..a07f1e8 --- /dev/null +++ b/discovery/cache.py @@ -0,0 +1,254 @@ +"""Redis caching module for Discovery service.""" + +import hashlib +import json +import logging +from collections.abc import Callable +from typing import Any + +import orjson +from common import get_config +from redis import asyncio as aioredis +from redis.exceptions import RedisError + + +logger = logging.getLogger(__name__) + + +class CacheManager: + """Manages Redis caching for the Discovery service.""" + + def __init__( + self, + redis_url: str = "redis://localhost:6379/0", + default_ttl: int = 3600, + key_prefix: str = "discovery:", + ) -> None: + """Initialize the cache manager. + + Args: + redis_url: Redis connection URL + default_ttl: Default TTL in seconds (1 hour) + key_prefix: Prefix for all cache keys + """ + self.redis_url = redis_url + self.default_ttl = default_ttl + self.key_prefix = key_prefix + self.redis: aioredis.Redis | None = None + self.connected = False + + async def initialize(self) -> None: + """Initialize Redis connection.""" + try: + self.redis = await aioredis.from_url( + self.redis_url, + encoding="utf-8", + decode_responses=False, # We'll handle decoding ourselves for orjson + max_connections=10, + ) + # Test connection + await self.redis.ping() + self.connected = True + logger.info("đ Redis cache connected successfully") + except (RedisError, OSError) as e: + logger.warning(f"â ī¸ Redis connection failed: {e}. Running without cache.") + self.connected = False + + async def close(self) -> None: + """Close Redis connection.""" + if self.redis: + await self.redis.close() + logger.info("đ Redis cache connection closed") + + def _make_key(self, key: str) -> str: + """Create a namespaced cache key.""" + return f"{self.key_prefix}{key}" + + @staticmethod + def _serialize_value(value: Any) -> bytes: + """Serialize value using orjson for performance.""" + return orjson.dumps(value) + + @staticmethod + def _deserialize_value(data: bytes) -> Any: + """Deserialize value using orjson.""" + return orjson.loads(data) + + async def get(self, key: str) -> Any | None: + """Get value from cache. + + Args: + key: Cache key + + Returns: + Cached value or None if not found + """ + if not self.connected or not self.redis: + return None + + try: + cache_key = self._make_key(key) + data = await self.redis.get(cache_key) + if data: + logger.debug(f"đ Cache hit: {key}") + return self._deserialize_value(data) + logger.debug(f"đ Cache miss: {key}") + return None + except Exception as e: + logger.error(f"â Cache get error: {e}") + return None + + async def set(self, key: str, value: Any, ttl: int | None = None) -> bool: + """Set value in cache. + + Args: + key: Cache key + value: Value to cache + ttl: Time to live in seconds (uses default if None) + + Returns: + True if successful, False otherwise + """ + if not self.connected or not self.redis: + return False + + try: + cache_key = self._make_key(key) + data = self._serialize_value(value) + ttl = ttl or self.default_ttl + await self.redis.setex(cache_key, ttl, data) + logger.debug(f"đ Cache set: {key} (TTL: {ttl}s)") + return True + except Exception as e: + logger.error(f"â Cache set error: {e}") + return False + + async def delete(self, key: str) -> bool: + """Delete value from cache. + + Args: + key: Cache key + + Returns: + True if deleted, False otherwise + """ + if not self.connected or not self.redis: + return False + + try: + cache_key = self._make_key(key) + result = await self.redis.delete(cache_key) + logger.debug(f"đ Cache delete: {key}") + return bool(result) + except Exception as e: + logger.error(f"â Cache delete error: {e}") + return False + + async def clear_pattern(self, pattern: str) -> int: + """Clear all keys matching a pattern. + + Args: + pattern: Key pattern (e.g., "search:*") + + Returns: + Number of keys deleted + """ + if not self.connected or not self.redis: + return 0 + + try: + full_pattern = self._make_key(pattern) + keys = [] + async for key in self.redis.scan_iter(match=full_pattern): + keys.append(key) + + if keys: + deleted = await self.redis.delete(*keys) + logger.info(f"đ Cleared {deleted} cache keys matching: {pattern}") + return int(deleted) + return 0 + except Exception as e: + logger.error(f"â Cache clear error: {e}") + return 0 + + def cache_key_for_params(self, prefix: str, **params: Any) -> str: + """Generate a cache key from parameters. + + Args: + prefix: Key prefix (e.g., "search", "graph") + **params: Parameters to include in key + + Returns: + Cache key string + """ + # Sort params for consistent keys + sorted_params = sorted(params.items()) + param_str = json.dumps(sorted_params, sort_keys=True) + # Using MD5 for cache key generation only - not for security purposes + param_hash = hashlib.md5(param_str.encode()).hexdigest()[:8] # nosec B324 # noqa: S324 + return f"{prefix}:{param_hash}" + + +# Decorator for caching async functions +def cached( + prefix: str, + ttl: int | None = None, + key_func: Callable[..., str] | None = None, +) -> Callable: + """Decorator for caching async function results. + + Args: + prefix: Cache key prefix + ttl: Time to live in seconds + key_func: Custom function to generate cache key from args + + Returns: + Decorated function + """ + + def decorator(func: Callable) -> Callable: + async def wrapper(self: Any, *args: Any, **kwargs: Any) -> Any: + # Check if cache manager is available + cache_manager = getattr(self, "cache", None) + if not cache_manager or not cache_manager.connected: + return await func(self, *args, **kwargs) + + # Generate cache key + cache_key = key_func(*args, **kwargs) if key_func else cache_manager.cache_key_for_params(prefix, args=args, kwargs=kwargs) + + # Try to get from cache + cached_value = await cache_manager.get(cache_key) + if cached_value is not None: + return cached_value + + # Execute function and cache result + result = await func(self, *args, **kwargs) + await cache_manager.set(cache_key, result, ttl) + return result + + return wrapper + + return decorator + + +# Global cache manager instance with configuration +def _get_cache_manager() -> CacheManager: + """Get cache manager with configuration.""" + config = get_config() + return CacheManager(redis_url=config.redis_url) + + +cache_manager = _get_cache_manager() + + +# Cache configuration for different data types +CACHE_TTL = { + "search": 3600, # 1 hour + "graph": 1800, # 30 minutes + "journey": 3600, # 1 hour + "trends": 7200, # 2 hours + "heatmap": 7200, # 2 hours + "artist_details": 3600, # 1 hour + "recommendations": 1800, # 30 minutes + "analytics": 3600, # 1 hour +} diff --git a/discovery/discovery.py b/discovery/discovery.py index 418856a..ea0151d 100755 --- a/discovery/discovery.py +++ b/discovery/discovery.py @@ -22,6 +22,16 @@ get_analytics_instance, ) from discovery.graph_explorer import GraphQuery, explore_graph, get_graph_explorer_instance +from discovery.playground_api import ( + JourneyRequest, + artist_details_handler, + graph_data_handler, + heatmap_handler, + journey_handler, + playground_api, + search_handler, + trends_handler, +) from discovery.recommender import ( RecommendationRequest, get_recommendations, @@ -62,6 +72,7 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None]: await recommender.initialize() await analytics.initialize() await graph_explorer.initialize() + await playground_api.initialize() logger.info("â Discovery service started successfully") @@ -72,6 +83,7 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None]: await recommender.close() await analytics.close() await graph_explorer.close() + await playground_api.close() logger.info("â Discovery service shutdown complete") @@ -203,6 +215,59 @@ async def explore_graph_api(query: GraphQuery) -> dict[str, Any]: raise HTTPException(status_code=500, detail=str(e)) from e +# Playground API Routes +@app.get("/api/search") # type: ignore[misc] +async def search_api( + q: str, + type: str = "all", + limit: int = 10, +) -> dict[str, Any]: + """Search endpoint for playground.""" + return await search_handler(q=q, type=type, limit=limit) + + +@app.get("/api/graph") # type: ignore[misc] +async def graph_api( + node_id: str, + depth: int = 2, + limit: int = 50, +) -> dict[str, Any]: + """Graph data endpoint for playground.""" + return await graph_data_handler(node_id=node_id, depth=depth, limit=limit) + + +@app.post("/api/journey") # type: ignore[misc] +async def journey_api(request: JourneyRequest) -> dict[str, Any]: + """Music journey endpoint for playground.""" + return await journey_handler(request) + + +@app.get("/api/trends") # type: ignore[misc] +async def trends_api( + type: str, + start_year: int = 1950, + end_year: int = 2024, + top_n: int = 20, +) -> dict[str, Any]: + """Trends endpoint for playground.""" + return await trends_handler(type=type, start_year=start_year, end_year=end_year, top_n=top_n) + + +@app.get("/api/heatmap") # type: ignore[misc] +async def heatmap_api( + type: str, + top_n: int = 20, +) -> dict[str, Any]: + """Heatmap endpoint for playground.""" + return await heatmap_handler(type=type, top_n=top_n) + + +@app.get("/api/artists/{artist_id}") # type: ignore[misc] +async def artist_details_api(artist_id: str) -> dict[str, Any]: + """Artist details endpoint for playground.""" + return await artist_details_handler(artist_id) + + # WebSocket endpoint for real-time updates @app.websocket("/ws") # type: ignore[misc] async def websocket_endpoint(websocket: WebSocket) -> None: diff --git a/discovery/examples.md b/discovery/examples.md new file mode 100644 index 0000000..4bafc0e --- /dev/null +++ b/discovery/examples.md @@ -0,0 +1,281 @@ +# Discovery Playground Examples + +This document provides practical examples of using the Discovery Playground to explore music data. + +## Getting Started + +1. **Start the Services** + + ```bash + docker-compose up -d + ``` + +1. **Wait for Data Processing** + + - Check dashboard at http://localhost:8003 for processing status + - Wait for some data to be processed before exploring + +1. **Open the Playground** + + - Navigate to http://localhost:8005 + - The interface loads with the Graph Explorer view + +## Example Workflows + +### 1. Exploring Artist Connections + +**Objective**: Discover how artists are connected through collaborations, labels, and releases. + +**Steps**: + +1. Search for "Miles Davis" in the search bar +1. Click "Explore" to load the graph visualization +1. Adjust the depth slider to 3 for deeper connections +1. Hover over nodes to see connection highlights +1. Click on connected artists to explore further + +**What You'll See**: + +- Blue nodes: Artists (Miles Davis at the center) +- Red nodes: Releases/Albums +- Gray nodes: Record labels +- Yellow nodes: Genres + +**Insights Gained**: + +- Direct collaborators and band members +- Record labels throughout career +- Genre evolution over time +- Influence network and connections + +### 2. Finding Musical Journeys + +**Objective**: Find the shortest path between two seemingly unconnected artists. + +**Steps**: + +1. Switch to "Music Journey" view +1. Search for "The Beatles" and select as start artist +1. Search for "Kendrick Lamar" and select as end artist +1. Click "Find Journey" +1. Explore the timeline view to see chronological progression + +**Expected Results**: + +- Path might go through: The Beatles â Producer â Hip-hop producer â Kendrick Lamar +- Or: The Beatles â Sample usage â Hip-hop artist â Kendrick Lamar +- Timeline shows the decades spanned in the connection + +**Use Cases**: + +- Music education and discovery +- Understanding genre evolution +- Finding surprising connections +- Creating themed playlists + +### 3. Analyzing Music Trends + +**Objective**: Visualize how musical genres evolved over decades. + +**Steps**: + +1. Go to "Trend Analysis" view +1. Select "Genre" as the trend type +1. Set start year to 1960 and end year to 2020 +1. Click "Explore" to generate the visualization +1. Hover over different areas to see specific data points + +**What to Look For**: + +- Rise and fall of different genres +- Peak periods for specific styles +- Emergence of new genres +- Correlation between genres + +**Insights**: + +- Rock's dominance in the 70s-80s +- Hip-hop's emergence in the 80s-90s +- Electronic music growth in the 90s-2000s +- Genre fragmentation in modern times + +### 4. Artist Similarity Analysis + +**Objective**: Discover which artists share the most musical similarities. + +**Steps**: + +1. Navigate to "Similarity Heatmap" view +1. Select "Genre" similarity type +1. Adjust the "Top Artists" slider to 20 +1. Click "Explore" to generate the heatmap +1. Click on high-intensity cells to explore connections + +**Reading the Heatmap**: + +- Darker colors = higher similarity +- Light/white areas = little to no similarity +- Diagonal shows perfect self-similarity +- Off-diagonal cells show cross-artist similarity + +**Advanced Usage**: + +- Try "Collaboration" mode to see who worked together +- Use results to discover new artists with similar styles +- Export interesting findings for further research + +## API Examples + +### Using the REST API Directly + +```bash +# Search for artists +curl "http://localhost:8005/api/search?q=metallica&type=artist" + +# Get graph data for an artist +curl "http://localhost:8005/api/graph?node_id=72872&depth=2&limit=50" + +# Find journey between artists +curl -X POST "http://localhost:8005/api/journey" \ + -H "Content-Type: application/json" \ + -d '{"start_artist_id": "72872", "end_artist_id": "194", "max_depth": 5}' + +# Get trend data +curl "http://localhost:8005/api/trends?type=genre&start_year=1970&end_year=2000" + +# Get similarity heatmap +curl "http://localhost:8005/api/heatmap?type=genre&top_n=15" +``` + +### WebSocket Connection + +```javascript +// Connect to real-time updates +const ws = new WebSocket('ws://localhost:8005/ws'); + +ws.onmessage = function(event) { + const data = JSON.parse(event.data); + console.log('Real-time update:', data); +}; + +// Send a message +ws.send(JSON.stringify({ + type: 'subscribe', + view: 'graph' +})); +``` + +## Performance Tips + +### Optimizing Graph Exploration + +- Start with depth 2, increase gradually +- Use node limits (50-100) for large graphs +- Clear cache if data seems stale +- Use specific searches rather than broad terms + +### Trend Analysis Best Practices + +- Limit year ranges for detailed analysis +- Use "Top N" slider to focus on major players +- Compare different trend types side by side +- Export data for deeper analysis + +### Heatmap Usage + +- Start with 15-20 artists for readability +- Use genre similarity for musical discovery +- Try collaboration mode for network analysis +- Click on cells to explore specific connections + +## Troubleshooting + +### Common Issues + +**Empty Results**: + +- Ensure data has been processed (check dashboard) +- Try broader search terms +- Reduce complexity (depth, limits) +- Check spelling of artist names + +**Slow Performance**: + +- Reduce node limits in graph explorer +- Use smaller year ranges in trends +- Lower the number of artists in heatmaps +- Check Redis cache connection + +**Connection Errors**: + +- Verify all services are running +- Check Docker container health +- Restart discovery service if needed +- Clear browser cache and cookies + +### Performance Monitoring + +Check the dashboard at http://localhost:8003 for: + +- Service health status +- Cache hit rates +- Database connection status +- Processing queue lengths + +## Advanced Features + +### Custom Queries + +The playground uses Neo4j Cypher queries. Advanced users can: + +- Examine the source code for query patterns +- Use Neo4j Browser (http://localhost:7474) for custom queries +- Extend the API with additional endpoints + +### Data Export + +- Journey data can be exported as JSON +- Graph visualizations can be saved as images +- Trend data suitable for further analysis +- Heatmap data exportable for research + +### Integration + +- Use API endpoints in other applications +- Build custom dashboards with the data +- Create automated music discovery workflows +- Integrate with recommendation systems + +## Educational Use Cases + +### Music History Classes + +- Trace genre evolution through decades +- Explore artist influence networks +- Analyze regional music movements +- Study technological impact on music + +### Data Science Projects + +- Network analysis of music industry +- Machine learning on music similarity +- Time series analysis of trends +- Graph theory applications + +### Music Discovery + +- Find new artists similar to favorites +- Explore musical connections +- Discover collaboration networks +- Understand genre relationships + +## Contributing + +To add new examples or improve existing ones: + +1. Fork the repository +1. Add your examples to this file +1. Test with real data +1. Submit a pull request + +For questions or suggestions, please open an issue on GitHub. diff --git a/discovery/playground.md b/discovery/playground.md new file mode 100644 index 0000000..7a29163 --- /dev/null +++ b/discovery/playground.md @@ -0,0 +1,126 @@ +# Music Discovery Playground + +The Music Discovery Playground is an interactive web interface for exploring music data from the Discogs database. It provides multiple visualization modes and analysis tools to discover musical connections and trends. + +## Features + +### 1. Graph Explorer + +- Interactive network visualization of artists, releases, and labels +- Explore connections between music entities +- Adjustable exploration depth and node limits +- Real-time graph manipulation with zoom and drag capabilities + +### 2. Music Journey + +- Find musical paths between any two artists +- Visualize the connections through collaborations, labels, and releases +- Timeline view showing the chronological progression +- Export journey data for further analysis + +### 3. Trend Analysis + +- Analyze music trends over time +- Genre evolution tracking +- Artist productivity metrics +- Label activity patterns +- Interactive time-series visualizations + +### 4. Similarity Heatmap + +- Visual representation of artist similarities +- Genre overlap analysis +- Collaboration network mapping +- Style-based connections + +## Getting Started + +1. **Access the Playground** + + - Navigate to http://localhost:8005 after starting the discovery service + - The interface loads automatically with the Graph Explorer view + +1. **Search for Music** + + - Use the search bar to find artists, releases, or labels + - Results appear in real-time as you type + - Click on any result to explore further + +1. **Navigate Views** + + - Use the navigation menu to switch between different visualizations + - Each view has its own controls in the sidebar + - The info panel shows details about selected items + +## API Endpoints + +The playground extends the Discovery Service with these endpoints: + +- `GET /api/search` - Search for music entities +- `GET /api/graph` - Get graph data for visualization +- `POST /api/journey` - Find paths between artists +- `GET /api/trends` - Get trend analysis data +- `GET /api/heatmap` - Get similarity heatmap data +- `GET /api/artists/{id}` - Get detailed artist information + +## WebSocket Support + +Real-time updates are supported through WebSocket connections at `/ws`. This enables: + +- Live data updates across all connected clients +- Collaborative exploration sessions +- Performance monitoring + +## Technical Stack + +- **Frontend**: HTML5, Bootstrap 5, D3.js for visualizations +- **Backend**: FastAPI with async support +- **Database**: Neo4j for graph data, PostgreSQL for structured data +- **Real-time**: WebSocket for live updates + +## Development + +To extend the playground: + +1. **Add New Visualizations** + + - Create a new JavaScript module in `/static/js/` + - Implement the visualization class with standard methods + - Register it in `playground.js` + +1. **Add API Endpoints** + + - Extend `playground_api.py` with new methods + - Add route handlers in `discovery.py` + - Update the API client in `api-client.js` + +1. **Customize Styling** + + - Modify `/static/css/playground.css` + - Follow the existing theme and color scheme + +## Performance Tips + +- Limit graph exploration depth for large datasets +- Use the node limit slider to control visualization complexity +- Enable caching for frequently accessed data (coming soon) +- Close unused browser tabs to free resources + +## Examples and Tutorials + +See **[examples.md](examples.md)** for detailed usage examples, workflows, and tutorials including: + +- Exploring artist connections and influence networks +- Finding musical journeys between artists +- Analyzing music trends over decades +- Creating similarity heatmaps and collaboration networks +- API usage examples and WebSocket integration +- Performance optimization tips and troubleshooting + +## Future Enhancements + +- Export visualizations as images or PDFs +- Collaborative playlists based on discoveries +- Machine learning-powered recommendations +- Advanced filtering and search options +- Custom query builder interface diff --git a/discovery/playground_api.py b/discovery/playground_api.py new file mode 100644 index 0000000..06a979d --- /dev/null +++ b/discovery/playground_api.py @@ -0,0 +1,480 @@ +"""Extended API endpoints for Discovery Playground.""" + +import logging +from typing import Any + +from common import get_config +from fastapi import HTTPException, Query +from neo4j import AsyncGraphDatabase +from pydantic import BaseModel +from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine +from sqlalchemy.orm import declarative_base + +from discovery.cache import CACHE_TTL, cache_manager, cached + + +logger = logging.getLogger(__name__) + +# Database models +Base = declarative_base() + + +class SearchRequest(BaseModel): + """Search request model.""" + + query: str + type: str = "all" # all, artist, release, label + limit: int = 10 + + +class JourneyRequest(BaseModel): + """Music journey request model.""" + + start_artist_id: str + end_artist_id: str + max_depth: int = 5 + + +class TrendRequest(BaseModel): + """Trend analysis request model.""" + + type: str # genre, artist, label + start_year: int = 1950 + end_year: int = 2024 + top_n: int = 20 + + +class HeatmapRequest(BaseModel): + """Heatmap request model.""" + + type: str # genre, collab, style + top_n: int = 20 + + +class PlaygroundAPI: + """Extended API functionality for Discovery Playground.""" + + def __init__(self) -> None: + """Initialize the Playground API.""" + self.config = get_config() + self.neo4j_driver: Any | None = None + self.pg_engine: Any | None = None + self.pg_session_maker: Any | None = None + self.cache = cache_manager + + async def initialize(self) -> None: + """Initialize database connections.""" + # Initialize cache + await self.cache.initialize() + + # Neo4j connection + self.neo4j_driver = AsyncGraphDatabase.driver( + self.config.neo4j_address, + auth=(self.config.neo4j_username, self.config.neo4j_password), + ) + + # PostgreSQL connection + pg_url = ( + f"postgresql+asyncpg://{self.config.postgres_username}:" + f"{self.config.postgres_password}@{self.config.postgres_address}/" + f"{self.config.postgres_database}" + ) + self.pg_engine = create_async_engine(pg_url) + self.pg_session_maker = async_sessionmaker( + bind=self.pg_engine, + expire_on_commit=False, + ) + + async def close(self) -> None: + """Close database connections.""" + await self.cache.close() + if self.neo4j_driver: + await self.neo4j_driver.close() + if self.pg_engine: + await self.pg_engine.dispose() + + @cached("search", ttl=CACHE_TTL["search"]) + async def search(self, query: str, search_type: str = "all", limit: int = 10) -> dict[str, Any]: + """Search for artists, releases, or labels.""" + results: dict[str, list[dict[str, Any]]] = {"artists": [], "releases": [], "labels": []} + + if not self.neo4j_driver: + raise HTTPException(status_code=500, detail="Database not initialized") + + async with self.neo4j_driver.session() as session: + # Search artists + if search_type in ["all", "artist"]: + artist_query = """ + MATCH (a:Artist) + WHERE toLower(a.name) CONTAINS toLower($query) + RETURN a.id AS id, a.name AS name, a.real_name AS real_name + LIMIT $limit + """ + artist_result = await session.run(artist_query, query=query, limit=limit) + results["artists"] = [dict(record) async for record in artist_result] + + # Search releases + if search_type in ["all", "release"]: + release_query = """ + MATCH (r:Release) + WHERE toLower(r.title) CONTAINS toLower($query) + RETURN r.id AS id, r.title AS title, r.year AS year + LIMIT $limit + """ + release_result = await session.run(release_query, query=query, limit=limit) + results["releases"] = [dict(record) async for record in release_result] + + # Search labels + if search_type in ["all", "label"]: + label_query = """ + MATCH (l:Label) + WHERE toLower(l.name) CONTAINS toLower($query) + RETURN l.id AS id, l.name AS name + LIMIT $limit + """ + label_result = await session.run(label_query, query=query, limit=limit) + results["labels"] = [dict(record) async for record in label_result] + + return results + + @cached("graph", ttl=CACHE_TTL["graph"]) + async def get_graph_data(self, node_id: str, depth: int = 2, limit: int = 50) -> dict[str, Any]: + """Get graph data for visualization.""" + nodes = [] + links = [] + node_ids = set() + + if not self.neo4j_driver: + raise HTTPException(status_code=500, detail="Database not initialized") + + async with self.neo4j_driver.session() as session: + # Get the center node and its connections + query = """ + MATCH (center) + WHERE center.id = $node_id + OPTIONAL MATCH path = (center)-[*1..$depth]-(connected) + WITH center, connected, relationships(path) AS rels, nodes(path) AS path_nodes + LIMIT $limit + RETURN DISTINCT center, connected, rels, path_nodes + """ + + result = await session.run(query, node_id=node_id, depth=depth, limit=limit) + + async for record in result: + # Add center node + center = record["center"] + if center and center["id"] not in node_ids: + node_ids.add(center["id"]) + nodes.append( + { + "id": center["id"], + "name": center.get("name", center.get("title", "")), + "type": next(iter(center.labels)).lower(), + "properties": dict(center), + } + ) + + # Add connected nodes and relationships + if record["connected"] and record["rels"]: + connected = record["connected"] + if connected["id"] not in node_ids: + node_ids.add(connected["id"]) + nodes.append( + { + "id": connected["id"], + "name": connected.get("name", connected.get("title", "")), + "type": next(iter(connected.labels)).lower(), + "properties": dict(connected), + } + ) + + # Add relationships + for i, rel in enumerate(record["rels"]): + if i < len(record["path_nodes"]) - 1: + source_node = record["path_nodes"][i] + target_node = record["path_nodes"][i + 1] + links.append( + { + "source": source_node["id"], + "target": target_node["id"], + "type": rel.type.lower(), + "properties": dict(rel), + } + ) + + return {"nodes": nodes, "links": links} + + @cached("journey", ttl=CACHE_TTL["journey"]) + async def find_music_journey(self, start_artist_id: str, end_artist_id: str, max_depth: int = 5) -> dict[str, Any]: + """Find a musical journey between two artists.""" + if not self.neo4j_driver: + raise HTTPException(status_code=500, detail="Database not initialized") + + async with self.neo4j_driver.session() as session: + query = """ + MATCH path = shortestPath( + (start:Artist {id: $start_id})-[*1..$max_depth]-(end:Artist {id: $end_id}) + ) + RETURN path, + [node in nodes(path) | { + id: node.id, + name: node.name, + type: labels(node)[0], + properties: properties(node) + }] AS nodes, + [rel in relationships(path) | { + type: type(rel), + properties: properties(rel) + }] AS relationships + """ + + result = await session.run( + query, + start_id=start_artist_id, + end_id=end_artist_id, + max_depth=max_depth, + ) + + record = await result.single() + if not record: + return {"journey": None, "message": "No path found between these artists"} + + return { + "journey": { + "nodes": record["nodes"], + "relationships": record["relationships"], + "length": len(record["nodes"]) - 1, + } + } + + @cached("trends", ttl=CACHE_TTL["trends"]) + async def get_trends(self, trend_type: str, start_year: int, end_year: int, top_n: int = 20) -> dict[str, Any]: + """Get trend analysis data.""" + trends = [] + + if trend_type == "genre": + if not self.neo4j_driver: + return {"trends": [], "type": trend_type} + + async with self.neo4j_driver.session() as session: + query = """ + MATCH (r:Release)-[:HAS_GENRE]->(g:Genre) + WHERE r.year >= $start_year AND r.year <= $end_year + WITH g.name AS genre, r.year AS year, COUNT(r) AS count + ORDER BY year, count DESC + WITH year, collect({genre: genre, count: count})[0..$top_n] AS top_genres + RETURN year, top_genres + ORDER BY year + """ + + result = await session.run(query, start_year=start_year, end_year=end_year, top_n=top_n) + + async for record in result: + trends.append({"year": record["year"], "data": record["top_genres"]}) + + elif trend_type == "artist": + if not self.neo4j_driver: + return {"trends": [], "type": trend_type} + + async with self.neo4j_driver.session() as session: + query = """ + MATCH (a:Artist)-[:BY]-(r:Release) + WHERE r.year >= $start_year AND r.year <= $end_year + WITH a.name AS artist, r.year AS year, COUNT(r) AS releases + ORDER BY year, releases DESC + WITH year, collect({artist: artist, releases: releases})[0..$top_n] AS top_artists + RETURN year, top_artists + ORDER BY year + """ + + result = await session.run(query, start_year=start_year, end_year=end_year, top_n=top_n) + + async for record in result: + trends.append({"year": record["year"], "data": record["top_artists"]}) + + return {"trends": trends, "type": trend_type} + + @cached("heatmap", ttl=CACHE_TTL["heatmap"]) + async def get_heatmap(self, heatmap_type: str, top_n: int = 20) -> dict[str, Any]: + """Get similarity heatmap data.""" + if heatmap_type == "genre": + if not self.neo4j_driver: + return {"heatmap": [], "labels": [], "type": heatmap_type} + + async with self.neo4j_driver.session() as session: + # Get top artists by release count + query = """ + MATCH (a:Artist)-[:BY]->(r:Release) + WITH a, COUNT(r) AS release_count + ORDER BY release_count DESC + LIMIT $top_n + WITH collect(a) AS artists + UNWIND artists AS a1 + UNWIND artists AS a2 + MATCH (a1)-[:BY]->(r1:Release)-[:HAS_GENRE]->(g:Genre)<-[:HAS_GENRE]-(r2:Release)<-[:BY]-(a2) + WHERE id(a1) < id(a2) + WITH a1.name AS artist1, a2.name AS artist2, COUNT(DISTINCT g) AS shared_genres + RETURN artist1, artist2, shared_genres + ORDER BY shared_genres DESC + """ + + result = await session.run(query, top_n=top_n) + data = [] + artists = set() + + async for record in result: + artists.add(record["artist1"]) + artists.add(record["artist2"]) + data.append( + { + "x": record["artist1"], + "y": record["artist2"], + "value": record["shared_genres"], + } + ) + + return { + "heatmap": data, + "labels": sorted(artists), + "type": heatmap_type, + } + + elif heatmap_type == "collab": + if not self.neo4j_driver: + return {"heatmap": [], "labels": [], "type": heatmap_type} + + async with self.neo4j_driver.session() as session: + query = """ + MATCH (a:Artist) + WITH a, size((a)-[:COLLABORATED_WITH]-()) AS collab_count + ORDER BY collab_count DESC + LIMIT $top_n + WITH collect(a) AS artists + UNWIND artists AS a1 + UNWIND artists AS a2 + OPTIONAL MATCH (a1)-[c:COLLABORATED_WITH]-(a2) + WHERE id(a1) < id(a2) + WITH a1.name AS artist1, a2.name AS artist2, + CASE WHEN c IS NOT NULL THEN 1 ELSE 0 END AS collaborated + RETURN artist1, artist2, collaborated + """ + + result = await session.run(query, top_n=top_n) + data = [] + artists = set() + + async for record in result: + if record["collaborated"] > 0: + artists.add(record["artist1"]) + artists.add(record["artist2"]) + data.append( + { + "x": record["artist1"], + "y": record["artist2"], + "value": record["collaborated"], + } + ) + + return { + "heatmap": data, + "labels": sorted(artists), + "type": heatmap_type, + } + + return {"heatmap": [], "labels": [], "type": heatmap_type} + + @cached("artist_details", ttl=CACHE_TTL["artist_details"]) + async def get_artist_details(self, artist_id: str) -> dict[str, Any]: + """Get detailed information about an artist.""" + if not self.neo4j_driver: + raise HTTPException(status_code=500, detail="Database not initialized") + + async with self.neo4j_driver.session() as session: + query = """ + MATCH (a:Artist {id: $artist_id}) + OPTIONAL MATCH (a)-[:BY]->(r:Release) + OPTIONAL MATCH (a)-[:MEMBER_OF]->(g:Artist) + OPTIONAL MATCH (a)-[:HAS_ALIAS]->(alias:Artist) + OPTIONAL MATCH (a)-[:COLLABORATED_WITH]-(collab:Artist) + RETURN a, + COUNT(DISTINCT r) AS release_count, + collect(DISTINCT g.name) AS groups, + collect(DISTINCT alias.name) AS aliases, + collect(DISTINCT collab.name)[0..10] AS collaborators + """ + + result = await session.run(query, artist_id=artist_id) + record = await result.single() + + if not record: + raise HTTPException(status_code=404, detail="Artist not found") + + artist = record["a"] + return { + "id": artist["id"], + "name": artist.get("name"), + "real_name": artist.get("real_name"), + "profile": artist.get("profile"), + "urls": artist.get("urls", []), + "release_count": record["release_count"], + "groups": record["groups"], + "aliases": record["aliases"], + "collaborators": record["collaborators"], + } + + +# Create global instance +playground_api = PlaygroundAPI() + + +# FastAPI route handlers +async def search_handler( + q: str = Query(..., description="Search query"), + type: str = Query("all", description="Search type: all, artist, release, label"), + limit: int = Query(10, ge=1, le=50), +) -> dict[str, Any]: + """Search endpoint handler.""" + result: dict[str, Any] = await playground_api.search(q, type, limit) + return result + + +async def graph_data_handler( + node_id: str = Query(..., description="Node ID"), + depth: int = Query(2, ge=1, le=5), + limit: int = Query(50, ge=10, le=200), +) -> dict[str, Any]: + """Graph data endpoint handler.""" + result: dict[str, Any] = await playground_api.get_graph_data(node_id, depth, limit) + return result + + +async def journey_handler(request: JourneyRequest) -> dict[str, Any]: + """Music journey endpoint handler.""" + result: dict[str, Any] = await playground_api.find_music_journey(request.start_artist_id, request.end_artist_id, request.max_depth) + return result + + +async def trends_handler( + type: str = Query(..., description="Trend type: genre, artist, label"), + start_year: int = Query(1950, ge=1950), + end_year: int = Query(2024, le=2024), + top_n: int = Query(20, ge=5, le=50), +) -> dict[str, Any]: + """Trends endpoint handler.""" + result: dict[str, Any] = await playground_api.get_trends(type, start_year, end_year, top_n) + return result + + +async def heatmap_handler( + type: str = Query(..., description="Heatmap type: genre, collab, style"), + top_n: int = Query(20, ge=10, le=50), +) -> dict[str, Any]: + """Heatmap endpoint handler.""" + result: dict[str, Any] = await playground_api.get_heatmap(type, top_n) + return result + + +async def artist_details_handler(artist_id: str) -> dict[str, Any]: + """Artist details endpoint handler.""" + result: dict[str, Any] = await playground_api.get_artist_details(artist_id) + return result diff --git a/discovery/pyproject.toml b/discovery/pyproject.toml index cc7dd94..db97bcd 100644 --- a/discovery/pyproject.toml +++ b/discovery/pyproject.toml @@ -28,12 +28,16 @@ dependencies = [ "sentence-transformers>=2.2.0", # Semantic search "plotly>=5.17.0", # Advanced visualizations "sqlalchemy[asyncio]>=2.0.0", # Async SQL toolkit + "redis[hiredis]>=5.0.0", # Redis caching with performance optimizations ] [build-system] requires = ["hatchling"] build-backend = "hatchling.build" +[tool.hatch.build.targets.wheel] +packages = ["discovery"] + [tool.ruff] extend = "../pyproject.toml" diff --git a/discovery/static/css/playground.css b/discovery/static/css/playground.css new file mode 100644 index 0000000..0aaa79e --- /dev/null +++ b/discovery/static/css/playground.css @@ -0,0 +1,326 @@ +/* Discovery Playground Styles */ + +:root { + --primary-color: #1DB954; + --secondary-color: #191414; + --accent-color: #FF6B6B; + --background-color: #f8f9fa; + --text-color: #333; + --border-color: #dee2e6; +} + +body { + background-color: var(--background-color); + color: var(--text-color); + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; +} + +/* Navigation */ +.navbar { + box-shadow: 0 2px 4px rgba(0,0,0,.1); +} + +.navbar-brand { + font-weight: 600; +} + +/* Cards */ +.card { + border: none; + box-shadow: 0 0.125rem 0.25rem rgba(0,0,0,.075); + height: 100%; +} + +.card-header { + background-color: #fff; + border-bottom: 2px solid var(--primary-color); + font-weight: 600; +} + +/* Control Panel */ +.view-controls { + transition: opacity 0.3s ease; +} + +.form-label { + font-weight: 500; + color: #6c757d; + font-size: 0.875rem; +} + +/* Visualization Container */ +#visualizationContainer { + position: relative; + height: calc(100vh - 200px); + min-height: 500px; + background-color: #fff; +} + +.view-container { + width: 100%; + height: 100%; + position: absolute; + top: 0; + left: 0; +} + +/* Graph Visualization */ +#graphSvg { + width: 100%; + height: 100%; +} + +.node { + cursor: pointer; + transition: all 0.3s ease; +} + +.node:hover { + filter: brightness(1.2); +} + +.node circle { + stroke: #fff; + stroke-width: 2px; +} + +.node.artist circle { + fill: var(--primary-color); +} + +.node.release circle { + fill: var(--accent-color); +} + +.node.label circle { + fill: #6c757d; +} + +.node.genre circle { + fill: #ffc107; +} + +.node text { + font-size: 12px; + font-weight: 500; + pointer-events: none; + text-shadow: 0 1px 2px rgba(255,255,255,0.8); +} + +.link { + stroke: #999; + stroke-opacity: 0.6; + stroke-width: 1.5px; + transition: all 0.3s ease; +} + +.link:hover { + stroke-opacity: 1; + stroke-width: 3px; +} + +.link.collaboration { + stroke: var(--primary-color); +} + +.link.release { + stroke: var(--accent-color); +} + +.link.label { + stroke: #6c757d; +} + +/* Journey View */ +#journeyPath { + padding: 20px; + height: 100%; + overflow-y: auto; +} + +.journey-node { + background-color: #fff; + border: 2px solid var(--primary-color); + border-radius: 8px; + padding: 15px; + margin: 10px 0; + box-shadow: 0 2px 4px rgba(0,0,0,.1); + transition: all 0.3s ease; +} + +.journey-node:hover { + transform: translateX(5px); + box-shadow: 0 4px 8px rgba(0,0,0,.15); +} + +.journey-connection { + height: 30px; + position: relative; + margin: -5px 0; +} + +.journey-connection::before { + content: ''; + position: absolute; + left: 50%; + top: 0; + width: 2px; + height: 100%; + background-color: var(--primary-color); +} + +/* Loading Overlay */ +#loadingOverlay { + position: absolute; + top: 0; + left: 0; + right: 0; + bottom: 0; + background-color: rgba(255,255,255,0.9); + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + z-index: 1000; +} + +/* Info Panel */ +#infoPanel { + max-height: 300px; + overflow-y: auto; +} + +.info-item { + margin-bottom: 10px; + padding-bottom: 10px; + border-bottom: 1px solid var(--border-color); +} + +.info-item:last-child { + border-bottom: none; +} + +.info-label { + font-weight: 600; + color: #6c757d; + font-size: 0.875rem; +} + +.info-value { + color: var(--text-color); +} + +/* Tooltips */ +.tooltip-custom { + position: absolute; + background-color: rgba(0,0,0,0.9); + color: white; + padding: 8px 12px; + border-radius: 4px; + font-size: 12px; + pointer-events: none; + z-index: 1000; + box-shadow: 0 2px 4px rgba(0,0,0,.2); +} + +/* Responsive */ +@media (max-width: 991px) { + #visualizationContainer { + height: 500px; + } + + .col-lg-3 { + margin-bottom: 20px; + } +} + +/* Animations */ +@keyframes fadeIn { + from { opacity: 0; } + to { opacity: 1; } +} + +.fade-in { + animation: fadeIn 0.3s ease; +} + +/* Heatmap Styles */ +.heatmap-cell { + stroke: #fff; + stroke-width: 1px; + cursor: pointer; +} + +.heatmap-label { + font-size: 10px; + font-weight: 500; +} + +/* Trend Chart Styles */ +.trend-line { + fill: none; + stroke-width: 2px; +} + +.trend-area { + opacity: 0.3; +} + +/* Fullscreen Mode */ +.fullscreen { + position: fixed !important; + top: 0 !important; + left: 0 !important; + width: 100vw !important; + height: 100vh !important; + z-index: 9999 !important; + background-color: #fff; +} + +/* Custom Scrollbar */ +::-webkit-scrollbar { + width: 8px; + height: 8px; +} + +::-webkit-scrollbar-track { + background: #f1f1f1; +} + +::-webkit-scrollbar-thumb { + background: #888; + border-radius: 4px; +} + +::-webkit-scrollbar-thumb:hover { + background: #555; +} + +/* Legend */ +.legend { + position: absolute; + bottom: 20px; + right: 20px; + background-color: rgba(255,255,255,0.9); + padding: 10px; + border-radius: 4px; + box-shadow: 0 2px 4px rgba(0,0,0,.1); +} + +.legend-item { + display: flex; + align-items: center; + margin-bottom: 5px; +} + +.legend-color { + width: 20px; + height: 20px; + border-radius: 50%; + margin-right: 8px; +} + +.legend-label { + font-size: 12px; + font-weight: 500; +} diff --git a/discovery/static/index.html b/discovery/static/index.html index 05cd997..a46deef 100644 --- a/discovery/static/index.html +++ b/discovery/static/index.html @@ -3,221 +3,271 @@
-