#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
DOI提取功能测试脚本
"""

import re

def extract_doi(url):
    """从URL中提取DOI"""
    print("开始提取DOI信息...")
    
    # 从URL中提取DOI
    url_doi_pattern = r'/doi/(10\.\d+/[^\s?#]+)'
    url_match = re.search(url_doi_pattern, url, re.IGNORECASE)
    
    if url_match:
        doi = url_match.group(1)
        print(f"从URL中提取到DOI: {doi}")
        return doi
    
    print("未检测到DOI信息")
    return None

def test_doi_extraction():
    """测试DOI提取功能"""
    print("开始测试DOI提取功能...\n")
    
    # 测试用例
    test_cases = [
        {
            "name": "Science.org DOI URL",
            "url": "https://www.science.org/doi/10.1126/sciadv.adr9635",
            "expected": "10.1126/sciadv.adr9635"
        },
        {
            "name": "带查询参数的DOI URL",
            "url": "https://example.com/doi/10.1234/abc.def?param=value",
            "expected": "10.1234/abc.def"
        },
        {
            "name": "带锚点的DOI URL",
            "url": "https://example.com/doi/10.1234/abc.def#section",
            "expected": "10.1234/abc.def"
        },
        {
            "name": "普通网页URL",
            "url": "https://example.com/page/123",
            "expected": None
        },
        {
            "name": "Nature文章URL",
            "url": "https://www.nature.com/articles/10.1038/s41586-023-06447-0",
            "expected": None  # 这个URL格式不包含/doi/路径
        }
    ]
    
    print("=== 测试URL中的DOI提取 ===")
    passed_tests = 0
    total_tests = len(test_cases)
    
    for i, test_case in enumerate(test_cases, 1):
        print(f"\n测试用例 {i}: {test_case['name']}")
        print(f"URL: {test_case['url']}")
        
        result = extract_doi(test_case['url'])
        success = result == test_case['expected']
        
        if success:
            print(f"✅ 通过 - 期望: {test_case['expected']}, 实际: {result}")
            passed_tests += 1
        else:
            print(f"❌ 失败 - 期望: {test_case['expected']}, 实际: {result}")
    
    print(f"\n=== 测试完成 ===")
    print(f"总测试数: {total_tests}")
    print(f"通过测试: {passed_tests}")
    print(f"失败测试: {total_tests - passed_tests}")
    print(f"成功率: {(passed_tests / total_tests) * 100:.1f}%")
    
    # 特别测试你提到的URL
    print(f"\n=== 特别测试你提到的URL ===")
    your_url = "https://www.science.org/doi/10.1126/sciadv.adr9635"
    your_doi = extract_doi(your_url)
    print(f"你的URL: {your_url}")
    print(f"提取的DOI: {your_doi}")
    print(f"测试结果: {'✅ 成功' if your_doi == '10.1126/sciadv.adr9635' else '❌ 失败'}")

if __name__ == "__main__":
    test_doi_extraction()
