from typing import Dict, Any, Optional, Tuple from datetime import datetime from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy import select from app.models import AppInfo, AppMetrics, AppRating class DataProcessor: def __init__(self, db: AsyncSession): self.db = db async def save_app_data( self, app_data: Dict[str, Any], rating_data: Optional[Dict[str, Any]] = None ) -> Tuple[bool, bool, bool]: """ 保存应用数据 返回: (是否插入新应用信息, 是否插入新指标, 是否插入新评分) """ app_id = app_data['appId'] pkg_name = app_data['pkgName'] # 检查应用是否存在 result = await self.db.execute( select(AppInfo).where(AppInfo.app_id == app_id) ) existing_app = result.scalar_one_or_none() # 保存应用基本信息 info_inserted = False if not existing_app: await self._save_app_info(app_data) info_inserted = True # 保存应用指标 metric_inserted = False if await self._should_save_metric(app_id, app_data): await self._save_app_metric(app_data) metric_inserted = True # 保存评分数据 rating_inserted = False if rating_data and await self._should_save_rating(app_id, rating_data): await self._save_app_rating(app_id, pkg_name, rating_data) rating_inserted = True await self.db.commit() return info_inserted, metric_inserted, rating_inserted async def _save_app_info(self, data: Dict[str, Any]): """保存应用基本信息""" app_info = AppInfo( # 基本信息 app_id=data['appId'], name=data['name'], pkg_name=data['pkgName'], # 开发者信息 developer_name=data['developerName'], dev_id=data.get('devId', ''), supplier=data.get('supplier', ''), # 分类信息 kind_name=data['kindName'], kind_id=data.get('kindId', ''), tag_name=data.get('tagName', ''), # 展示信息 icon_url=data['icon'], brief_desc=data.get('briefDes', ''), description=data.get('description', ''), # 隐私和政策 privacy_url=data.get('privacyUrl', ''), # 价格和支付 is_pay=data.get('isPay') == '1', price=data.get('price', '0'), # 时间信息 listed_at=datetime.fromtimestamp(data.get('releaseDate', 0) / 1000), # 设备支持 main_device_codes=data.get('mainDeviceCodes', []), # SDK信息 target_sdk=data.get('targetSdk', ''), min_sdk=data.get('minsdk', ''), compile_sdk_version=data.get('compileSdkVersion', 0), min_hmos_api_level=data.get('minHmosApiLevel', 0), api_release_type=data.get('apiReleaseType', 'Release'), # 其他信息 ctype=data.get('ctype', 0), app_level=data.get('appLevel', 0), packing_type=data.get('packingType', 0) ) self.db.add(app_info) async def _save_app_metric(self, data: Dict[str, Any]): """保存应用指标""" # 清洗下载量数据 download_count = self._parse_download_count(data.get('downCount', '0')) metric = AppMetrics( app_id=data['appId'], pkg_name=data['pkgName'], version=data.get('version', ''), size_bytes=int(data.get('size', 0)), download_count=download_count, release_date=int(data.get('releaseDate', 0)) ) self.db.add(metric) async def _save_app_rating(self, app_id: str, pkg_name: str, data: Dict[str, Any]): """保存应用评分""" rating = AppRating( app_id=app_id, pkg_name=pkg_name, average_rating=float(data['averageRating']), star_1_count=int(data['oneStarRatingCount']), star_2_count=int(data['twoStarRatingCount']), star_3_count=int(data['threeStarRatingCount']), star_4_count=int(data['fourStarRatingCount']), star_5_count=int(data['fiveStarRatingCount']), total_rating_count=int(data['totalStarRatingCount']) ) self.db.add(rating) def _parse_download_count(self, count_str: str) -> int: """解析下载量字符串""" # 移除 + 号和其他非数字字符 count_str = count_str.replace('+', '').replace(',', '') try: return int(count_str) except ValueError: return 0 async def _should_save_metric(self, app_id: str, data: Dict) -> bool: """判断是否需要保存新的指标数据""" # 查询最新的指标 result = await self.db.execute( select(AppMetrics) .where(AppMetrics.app_id == app_id) .order_by(AppMetrics.created_at.desc()) .limit(1) ) latest_metric = result.scalar_one_or_none() if not latest_metric: return True # 比较关键字段 return ( latest_metric.version != data.get('version', '') or latest_metric.download_count != self._parse_download_count(data.get('downCount', '0')) ) async def _should_save_rating(self, app_id: str, data: Dict) -> bool: """判断是否需要保存新的评分数据""" result = await self.db.execute( select(AppRating) .where(AppRating.app_id == app_id) .order_by(AppRating.created_at.desc()) .limit(1) ) latest_rating = result.scalar_one_or_none() if not latest_rating: return True return ( float(latest_rating.average_rating) != float(data['averageRating']) or latest_rating.total_rating_count != int(data['totalStarRatingCount']) )