Files
ns2.0/backend/app/crawler/data_processor.py
Nvex 720402ffe7 feat: NEXT Store 2.0 重大更新 - 完整重构前后端
🎉 主要更新:

后端:
- 全新华为应用市场爬虫系统
- 三表分离数据库设计 (app_info, app_metrics, app_rating)
- 完整的API接口 (搜索、分类、热门、上新等)
- 元服务自动识别和分类
- 智能Token管理和数据处理
- 修复热门应用重复显示问题

前端:
- 全新首页设计 (今日上架、热门应用)
- 应用页面 (彩色分类磁贴、智能图标匹配)
- 今日上新页面 (日期切换)
- 热门应用页面 (卡片布局)
- 应用详情页面 (完整信息展示)
- Apple风格搜索栏
- Footer组件
- 底部导航栏优化 (4个导航项)
- 骨架屏加载效果
- FontAwesome图标集成

UI/UX:
- 统一浅色背景 (#F5F5F7)
- 流畅的过渡动画
- 响应式设计
- 毛玻璃效果

文档:
- CHANGELOG.md - 完整更新日志
- QUICKSTART.md - 快速开始
- 多个技术文档和使用指南

版本: v2.0.0
2025-10-25 21:20:32 +08:00

180 lines
6.2 KiB
Python

from typing import Dict, Any, Optional, Tuple
from datetime import datetime
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select
from app.models import AppInfo, AppMetrics, AppRating
class DataProcessor:
def __init__(self, db: AsyncSession):
self.db = db
async def save_app_data(
self,
app_data: Dict[str, Any],
rating_data: Optional[Dict[str, Any]] = None
) -> Tuple[bool, bool, bool]:
"""
保存应用数据
返回: (是否插入新应用信息, 是否插入新指标, 是否插入新评分)
"""
app_id = app_data['appId']
pkg_name = app_data['pkgName']
# 检查应用是否存在
result = await self.db.execute(
select(AppInfo).where(AppInfo.app_id == app_id)
)
existing_app = result.scalar_one_or_none()
# 保存应用基本信息
info_inserted = False
if not existing_app:
await self._save_app_info(app_data)
info_inserted = True
# 保存应用指标
metric_inserted = False
if await self._should_save_metric(app_id, app_data):
await self._save_app_metric(app_data)
metric_inserted = True
# 保存评分数据
rating_inserted = False
if rating_data and await self._should_save_rating(app_id, rating_data):
await self._save_app_rating(app_id, pkg_name, rating_data)
rating_inserted = True
await self.db.commit()
return info_inserted, metric_inserted, rating_inserted
async def _save_app_info(self, data: Dict[str, Any]):
"""保存应用基本信息"""
app_info = AppInfo(
# 基本信息
app_id=data['appId'],
name=data['name'],
pkg_name=data['pkgName'],
# 开发者信息
developer_name=data['developerName'],
dev_id=data.get('devId', ''),
supplier=data.get('supplier', ''),
# 分类信息
kind_name=data['kindName'],
kind_id=data.get('kindId', ''),
tag_name=data.get('tagName', ''),
# 展示信息
icon_url=data['icon'],
brief_desc=data.get('briefDes', ''),
description=data.get('description', ''),
# 隐私和政策
privacy_url=data.get('privacyUrl', ''),
# 价格和支付
is_pay=data.get('isPay') == '1',
price=data.get('price', '0'),
# 时间信息
listed_at=datetime.fromtimestamp(data.get('releaseDate', 0) / 1000),
# 设备支持
main_device_codes=data.get('mainDeviceCodes', []),
# SDK信息
target_sdk=data.get('targetSdk', ''),
min_sdk=data.get('minsdk', ''),
compile_sdk_version=data.get('compileSdkVersion', 0),
min_hmos_api_level=data.get('minHmosApiLevel', 0),
api_release_type=data.get('apiReleaseType', 'Release'),
# 其他信息
ctype=data.get('ctype', 0),
app_level=data.get('appLevel', 0),
packing_type=data.get('packingType', 0)
)
self.db.add(app_info)
async def _save_app_metric(self, data: Dict[str, Any]):
"""保存应用指标"""
# 清洗下载量数据
download_count = self._parse_download_count(data.get('downCount', '0'))
metric = AppMetrics(
app_id=data['appId'],
pkg_name=data['pkgName'],
version=data.get('version', ''),
size_bytes=int(data.get('size', 0)),
download_count=download_count,
release_date=int(data.get('releaseDate', 0))
)
self.db.add(metric)
async def _save_app_rating(self, app_id: str, pkg_name: str, data: Dict[str, Any]):
"""保存应用评分"""
rating = AppRating(
app_id=app_id,
pkg_name=pkg_name,
average_rating=float(data['averageRating']),
star_1_count=int(data['oneStarRatingCount']),
star_2_count=int(data['twoStarRatingCount']),
star_3_count=int(data['threeStarRatingCount']),
star_4_count=int(data['fourStarRatingCount']),
star_5_count=int(data['fiveStarRatingCount']),
total_rating_count=int(data['totalStarRatingCount'])
)
self.db.add(rating)
def _parse_download_count(self, count_str: str) -> int:
"""解析下载量字符串"""
# 移除 + 号和其他非数字字符
count_str = count_str.replace('+', '').replace(',', '')
try:
return int(count_str)
except ValueError:
return 0
async def _should_save_metric(self, app_id: str, data: Dict) -> bool:
"""判断是否需要保存新的指标数据"""
# 查询最新的指标
result = await self.db.execute(
select(AppMetrics)
.where(AppMetrics.app_id == app_id)
.order_by(AppMetrics.created_at.desc())
.limit(1)
)
latest_metric = result.scalar_one_or_none()
if not latest_metric:
return True
# 比较关键字段
return (
latest_metric.version != data.get('version', '') or
latest_metric.download_count != self._parse_download_count(data.get('downCount', '0'))
)
async def _should_save_rating(self, app_id: str, data: Dict) -> bool:
"""判断是否需要保存新的评分数据"""
result = await self.db.execute(
select(AppRating)
.where(AppRating.app_id == app_id)
.order_by(AppRating.created_at.desc())
.limit(1)
)
latest_rating = result.scalar_one_or_none()
if not latest_rating:
return True
return (
float(latest_rating.average_rating) != float(data['averageRating']) or
latest_rating.total_rating_count != int(data['totalStarRatingCount'])
)