feat: NEXT Store 2.0 重大更新 - 完整重构前后端

🎉 主要更新: 后端: - 全新华为应用市场爬虫系统 - 三表分离数据库设计 (app_info, app_metrics, app_rating) - 完整的API接口 (搜索、分类、热门、上新等) - 元服务自动识别和分类 - 智能Token管理和数据处理 - 修复热门应用重复显示问题前端: - 全新首页设计 (今日上架、热门应用) - 应用页面 (彩色分类磁贴、智能图标匹配) - 今日上新页面 (日期切换) - 热门应用页面 (卡片布局) - 应用详情页面 (完整信息展示) - Apple风格搜索栏 - Footer组件 - 底部导航栏优化 (4个导航项) - 骨架屏加载效果 - FontAwesome图标集成 UI/UX: - 统一浅色背景 (#F5F5F7) - 流畅的过渡动画 - 响应式设计 - 毛玻璃效果文档: - CHANGELOG.md - 完整更新日志 - QUICKSTART.md - 快速开始 - 多个技术文档和使用指南版本: v2.0.0
2025-10-25 21:20:32 +08:00
parent c0f81dbbe2
commit 720402ffe7
38 changed files with 5682 additions and 407 deletions
--- a/backend/app/crawler/crawler.py
+++ b/backend/app/crawler/crawler.py
@@ -0,0 +1,143 @@
+"""
+华为应用市场爬虫主程序
+"""
+import asyncio
+from typing import Optional, List
+from app.crawler.huawei_api import HuaweiAPI
+from app.crawler.data_processor import DataProcessor
+from app.crawler.app_ids import KNOWN_APP_IDS
+from app.database import AsyncSessionLocal
+
+
+class HuaweiCrawler:
+    """华为应用市场爬虫"""
+    
+    def __init__(self):
+        self.api = HuaweiAPI()
+    
+    async def __aenter__(self):
+        """异步上下文管理器入口"""
+        return self
+    
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        """异步上下文管理器出口"""
+        await self.api.close()
+    
+    async def crawl_by_ids(
+        self,
+        id_list: Optional[List[int]] = None,
+        limit: Optional[int] = None,
+        batch_size: int = 50  # 并发批次大小，默认50
+    ) -> tuple:
+        """
+        根据ID列表爬取应用（支持并发）
+        
+        Args:
+            id_list: ID列表，如果为None则使用KNOWN_APP_IDS
+            limit: 限制爬取数量
+            batch_size: 并发批次大小，默认5个
+            
+        Returns:
+            (成功数量, 失败数量)
+        """
+        if id_list is None:
+            id_list = KNOWN_APP_IDS
+        
+        if limit:
+            id_list = id_list[:limit]
+        
+        success_count = 0
+        failed_count = 0
+        
+        print("=" * 80)
+        print(f"开始爬取 {len(id_list)} 个应用（并发数: {batch_size}）")
+        print("=" * 80)
+        
+        # 分批处理
+        for batch_start in range(0, len(id_list), batch_size):
+            batch_end = min(batch_start + batch_size, len(id_list))
+            batch = id_list[batch_start:batch_end]
+            
+            # 并发爬取一批
+            tasks = []
+            for i, app_id_num in enumerate(batch, batch_start + 1):
+                app_id = f"C{app_id_num:019d}"
+                tasks.append(self._crawl_single_app(app_id, i, len(id_list)))
+            
+            # 等待这一批完成
+            results = await asyncio.gather(*tasks, return_exceptions=True)
+            
+            # 统计结果
+            for result in results:
+                if isinstance(result, Exception):
+                    failed_count += 1
+                elif result:
+                    success_count += 1
+                else:
+                    failed_count += 1
+            
+            # 批次间短暂延迟
+            if batch_end < len(id_list):
+                await asyncio.sleep(0.2)
+        
+        print("\n" + "=" * 80)
+        print(f"爬取完成: 成功 {success_count} 个, 失败 {failed_count} 个")
+        print("=" * 80)
+        
+        return success_count, failed_count
+    
+    async def _crawl_single_app(self, app_id: str, index: int, total: int) -> bool:
+        """爬取单个应用（每个任务使用独立的数据库会话）"""
+        # 为每个任务创建独立的数据库会话
+        async with AsyncSessionLocal() as db_session:
+            processor = DataProcessor(db_session)
+            
+            try:
+                print(f"\n[{index}/{total}] {app_id}", end=" ")
+                
+                # 获取应用信息
+                app_data = await self.api.get_app_info(app_id=app_id)
+                print(f"✓ {app_data['name']}", end=" ")
+                
+                # 获取评分信息
+                rating_data = await self.api.get_app_rating(app_id)
+                
+                # 保存到数据库
+                info_inserted, metric_inserted, rating_inserted = await processor.save_app_data(
+                    app_data, rating_data
+                )
+                
+                # 显示保存状态
+                status_parts = []
+                if info_inserted:
+                    status_parts.append("新应用")
+                if metric_inserted:
+                    status_parts.append("新指标")
+                if rating_inserted:
+                    status_parts.append("新评分")
+                
+                if status_parts:
+                    print(f"→ {', '.join(status_parts)}")
+                else:
+                    print(f"→ 无更新")
+                
+                return True
+                
+            except ValueError:
+                print(f"✗ 跳过（安卓应用）")
+                return False
+            except Exception as e:
+                print(f"✗ 失败: {str(e)[:50]}")
+                return False
+
+
+async def crawl_all():
+    """爬取所有已知应用"""
+    async with HuaweiCrawler() as crawler:
+        return await crawler.crawl_by_ids()
+
+
+async def crawl_limited(limit: int):
+    """爬取指定数量的应用"""
+    async with HuaweiCrawler() as crawler:
+        return await crawler.crawl_by_ids(limit=limit)