https://f.ws28.cn/f/eorbtcohs31 复制链接到浏览器打开
1 个赞
可以尝试将PDF转换为Word文档
https://tmp.link/f/66a35e845eaeb
https://tmp.link/f/66a35e1ad11cf
备用链接
https://pan.huang1111.cn/s/y558Gh6
https://pan.huang1111.cn/s/jRRo7Uy
1 个赞
怎么做到的
他这个pdf全是图片,每一页都有水印。没办法写了个python为他这种文档定制一下。
pip install PyMuPDF Pillow numpy
import fitz
import os
from PIL import Image
import numpy as np
import concurrent.futures
def is_green(color):
g = color[:, :, 1]
r = color[:, :, 0]
b = color[:, :, 2]
return (g > np.maximum(r, b)) & (g > 180)
def process_image(pix):
image_np = np.array(pix)
# 裁剪底部2%的高度
cropped_image_np = image_np[:-int(image_np.shape[0] * 0.02), :]
mask = is_green(cropped_image_np)
cropped_image_np[mask] = [254, 254, 254]
return Image.fromarray(cropped_image_np)
def process_pdf(input_file, output_file):
doc = fitz.open(input_file)
output_doc = fitz.open()
temp_dir = "temp_images"
os.makedirs(temp_dir, exist_ok=True)
page_numbers = range(len(doc))
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
futures = []
for page_num in page_numbers:
futures.append(executor.submit(process_page, page_num, doc, temp_dir))
for future in concurrent.futures.as_completed(futures):
page_num, img_path = future.result()
output_page = output_doc.new_page(width=doc[page_num].rect.width, height=doc[page_num].rect.height * 0.98)
output_pix = fitz.Pixmap(img_path)
output_page.insert_image(output_page.rect, pixmap=output_pix, overlay=False)
output_pix = None
output_doc.save(output_file, garbage=4, deflate=True, clean=True) # Optimize PDF saving
doc.close()
output_doc.close()
for file in os.listdir(temp_dir):
os.remove(os.path.join(temp_dir, file))
os.rmdir(temp_dir)
def process_page(page_num, doc, temp_dir):
page = doc[page_num]
pix = page.get_pixmap(matrix=fitz.Matrix(150/72, 150/72)) # Reduce image resolution
temp_png = os.path.join(temp_dir, f"page_{page_num+1}.png")
pix.save(temp_png)
img = Image.open(temp_png)
processed_img = process_image(img)
temp_jpg = temp_png.replace('.png', '.jpg')
processed_img.save(temp_jpg, "JPEG", quality=90)
return page_num, temp_jpg
if __name__ == "__main__":
process_pdf("input.pdf", "output.pdf")
随便糊的,能力有限
8 个赞
牛逼 啊佬
多谢大佬
马老师助人为乐
牛啊 佬
这个怎么用啊 大佬
mark一下,感谢分享
1 个赞
@pengzhile 可以帮忙在线去下水印嘛,大佬