73 lines
3.0 KiB
Python
73 lines
3.0 KiB
Python
import requests
|
|
import pandas as pd
|
|
from pathlib import Path
|
|
from bs4 import BeautifulSoup
|
|
|
|
API_URL = "https://energy-iot.chinatowercom.cn/api/device/device/historyPerformance"
|
|
|
|
headers = {
|
|
"Accept-Encoding": "gzip, deflate, br, zstd",
|
|
"Connection": "keep-alive",
|
|
"Content-Length": "211",
|
|
"Cookie": "HWWAFSESTIME=1732173820506; HWWAFSESID=1739685743c73769ff; dc04ed2361044be8a9355f6efb378cf2=WyIzNTI0NjE3OTgzIl0",
|
|
"Host": "energy-iot.chinatowercom.cn",
|
|
"Origin": "https://energy-iot.chinatowercom.cn",
|
|
"Sec-Fetch-Dest": "empty",
|
|
"Sec-Fetch-Mode": "cors",
|
|
"Sec-Fetch-Site": "same-origin",
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0",
|
|
"accept": "application/json, text/plain, */*",
|
|
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
|
|
"authorization": "Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiIl0sInVzZXJfbmFtZSI6IndlYl9tYW5hZ2V8d2FuZ2xlaTQiLCJzY29wZSI6WyJhbGwiXSwiZXhwIjoxNzMyMjc4NDA5LCJ1c2VySWQiOjI0Mjg1LCJqdGkiOiJkODE0YTZhYy05YmJmLTQ0ZjQtYWRhYi0wMzAzNjUzNmNhNWIiLCJjbGllbnRfaWQiOiJ3ZWJfbWFuYWdlIn0.VhJaDKwzjekwOCsw_jOF_jvg7sX45okFcxkLyWtbfFVGWVWANhKNhVqj5Dn0Qb3wUXH3e-w74sDN1RI9QADngMOGP_H7aTwI_nukj6VmjpFA7kEtOBwa6ouvPZQMa1qa3UWl21Ac6GoLu14T4TIf4kQAMTdaYAMFrwDAXAkqvIDmKKjZbnDFUjUIcj-J_Y-LfHCEBjtcz7Rp_wMO-PMA5wII6kbcNoSFiYb0djcFQyeBcIUSUTRPixPcTYBkS-IhNrsOePIWlpNYMHbPxZdrZkV4M65BmBn4A9MUjWYHm7iIut8WVMdCXR4Sxp9m0mJHXR_IPWES4O7aBcuMkOmjyw",
|
|
"content-type": "application/json;charset=UTF-8",
|
|
"sec-ch-ua": "\"Microsoft Edge\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
|
|
"sec-ch-ua-mobile": "?0",
|
|
"sec-ch-ua-platform": "Windows",
|
|
}
|
|
|
|
body = {
|
|
"startTimestamp": 1732032000000,
|
|
"endTimestamp": 1732291199000,
|
|
"deviceCode": "TTE0102DX2406240497",
|
|
"mid": "0305120001",
|
|
"businessType": "7",
|
|
"pageNum": 2,
|
|
"pageSize": 5,
|
|
"total": 0
|
|
}
|
|
|
|
# 1. 读取本地HTML文件
|
|
file_path = Path(r'D:\WorkingProject\LightStackAdapter\Log\设备测试数据记录-铁塔主站\南和县牧村\Untitled-1.html')
|
|
html_content = file_path.read_text()
|
|
|
|
|
|
# 2. 解析HTML文件
|
|
soup = BeautifulSoup(html_content, 'html.parser')
|
|
|
|
# 3. 找到表格元素
|
|
table = soup.find_all('table') # 假设页面中只有一个表格,如果有多个表格,可能需要进一步筛选
|
|
|
|
# 4. 提取表格数据
|
|
data = []
|
|
headers = []
|
|
|
|
# 提取表头
|
|
header_row = table.find('thead').find('tr')
|
|
for header in header_row.find_all('th'):
|
|
headers.append(header.text.strip())
|
|
|
|
# 提取数据行
|
|
for row in table.find('tbody').find_all('tr'):
|
|
row_data = []
|
|
for cell in row.find_all(['td', 'th']):
|
|
row_data.append(cell.text.strip())
|
|
data.append(row_data)
|
|
|
|
# 5. 将数据保存为DataFrame
|
|
df = pd.DataFrame(data, columns=headers)
|
|
|
|
# 6. 将DataFrame保存为CSV文件
|
|
output_file = 'extracted_table.csv'
|
|
df.to_csv(output_file, index=False, encoding='utf-8')
|
|
|
|
print(f'表格数据已成功提取并保存到 {output_file}') |