# -*- coding: utf-8 -*-
"""
从 需要匹配的表.xlsx 读取脱敏手机号(业务号码列),
在 被匹配的表.xls 中匹配电话号码,取最后一个匹配的创建时间,
写入需要匹配的表的最后一列。
"""
import csv
import re
from pathlib import Path
import openpyxl
# ==================== 配置 ====================
BASE_DIR = Path(__file__).parent
COMMISSION_FILE = BASE_DIR / "需要匹配的表.xlsx"
VPS_FILE = BASE_DIR / "被匹配的表.xls"
# ==============================================
def masked_phone_to_pattern(masked: str) -> str | None:
"""
将脱敏手机号转为匹配用的正则模式。
例如: 138****1234 -> 138\\d{4}1234
"""
if not masked or not isinstance(masked, str):
return None
s = str(masked).strip()
if "*" not in s:
return None
pattern = ""
i = 0
while i < len(s):
if s[i] == "*":
n = 0
while i < len(s) and s[i] == "*":
n += 1
i += 1
pattern += r"\d{" + str(n) + "}"
else:
start = i
while i < len(s) and s[i] != "*":
i += 1
pattern += re.escape(s[start:i])
return pattern if pattern else None
def normalize_phone(phone) -> str:
"""将电话号码转为字符串,去除空格等"""
if phone is None:
return ""
# Excel 可能将数字读成 13812341234.0
if isinstance(phone, float) and phone == int(phone):
phone = int(phone)
s = str(phone).strip().replace(" ", "").replace("-", "")
return s
def load_vps_data(vps_path: Path) -> list[dict]:
"""从 被匹配的表.xls 加载数据(支持 .xls 实为 CSV 的情况)"""
if not vps_path.exists():
raise FileNotFoundError(f"文件不存在: {vps_path}")
# 尝试 CSV 编码
for enc in ("utf-8-sig", "utf-8", "gbk", "gb2312"):
try:
with open(vps_path, "r", encoding=enc, newline="") as f:
reader = csv.reader(f)
header = next(reader)
try:
phone_col = header.index("电话号码")
time_col = header.index("创建时间")
except ValueError:
continue
rows = []
for row in reader:
if len(row) > max(phone_col, time_col):
phone = normalize_phone(row[phone_col])
create_time = row[time_col].strip() if row[time_col] else ""
rows.append({"phone": phone, "create_time": create_time})
return rows
except (UnicodeDecodeError, UnicodeError):
continue
raise ValueError("无法解析 vps 文件,请确认编码(utf-8/gbk)")
def find_last_match(phone_pattern: str, vps_rows: list[dict]) -> str | None:
"""
在 vps 数据中按正则匹配电话号码,返回最后一个匹配的创建时间。
"""
try:
pat = re.compile(phone_pattern)
except re.error:
return None
last_create_time = None
for row in vps_rows:
if pat.fullmatch(row["phone"]):
last_create_time = row["create_time"]
return last_create_time
def format_cell_value(val):
"""将创建时间转为可写入 Excel 的格式"""
if val is None:
return ""
# vps 中创建时间通常已是字符串如 "2025-11-06 16:56:50"
s = str(val).strip()
return s if s else ""
def main():
if not COMMISSION_FILE.exists():
print(f"错误:文件不存在 {COMMISSION_FILE}")
return 1
if not VPS_FILE.exists():
print(f"错误:文件不存在 {VPS_FILE}")
return 1
print("正在加载 被匹配的表.xls ...")
vps_rows = load_vps_data(VPS_FILE)
print(f" vps 共 {len(vps_rows)} 行")
print("正在加载 需要匹配的表.xlsx ...")
wb = openpyxl.load_workbook(COMMISSION_FILE)
ws = wb.active
# 找业务号码列
header = [ws.cell(1, c).value for c in range(1, ws.max_column + 1)]
try:
biz_col = header.index("业务号码") + 1
except ValueError:
print(f"错误:未找到列「业务号码」,表头: {header}")
return 1
last_col = ws.max_column
new_col = last_col + 1
ws.cell(1, new_col, "创建时间") # 表头
matched = 0
not_matched = 0
for row in range(2, ws.max_row + 1):
masked = ws.cell(row, biz_col).value
pattern = masked_phone_to_pattern(masked)
if not pattern:
ws.cell(row, new_col, "")
not_matched += 1
continue
create_time = find_last_match(pattern, vps_rows)
if create_time is not None:
val = format_cell_value(create_time)
ws.cell(row, new_col, val)
matched += 1
else:
ws.cell(row, new_col, "")
not_matched += 1
out_path = COMMISSION_FILE
wb.save(out_path)
print(f"完成:匹配 {matched} 行,未匹配 {not_matched} 行")
print(f"已保存到 {out_path},新列「创建时间」在第 {new_col} 列")
return 0
if __name__ == "__main__":
exit(main())
py匹配excel的手机号,然后将匹配到的创建时间放到匹配的表中
我爱海鲸 2026-02-28 17:51:20 暂无标签
简介excel、csv、xls、xlsx
你好:我的2025