Merge pull request #60 from gmbrax/fix/Importing-duplicate-images

Fix the bug that collides the hash if identical images
This commit is contained in:
Gustavo Henrique Miranda 2025-07-19 03:23:54 -03:00 committed by GitHub
commit 8919e158bc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 22 additions and 5 deletions

View File

@ -4,6 +4,7 @@ from pathlib import Path
from sqlalchemy import Column, Integer, String, ForeignKey, DateTime
from sqlalchemy.orm import relationship
from sqlalchemy.sql.schema import Index
from pilgrim.models.photo_in_entry import photo_entry_association
from ..database import Base
@ -24,6 +25,9 @@ class Photo(Base):
)
fk_travel_diary_id = Column(Integer, ForeignKey("travel_diaries.id"),nullable=False)
__table_args__ = (
Index('idx_photo_hash_diary', 'hash', 'fk_travel_diary_id'),
)
def __init__(self, filepath, name, photo_hash, addition_date=None, caption=None, entries=None, fk_travel_diary_id=None, **kw: Any):
super().__init__(**kw)

View File

@ -14,8 +14,9 @@ class PhotoService:
def __init__(self, session):
self.session = session
def _hash_file(self, filepath: Path) -> str:
"""Calculate hash of a file using SHA3-384."""
@staticmethod
def hash_file(filepath: Path) -> str:
"""Calculate the hash of a file using SHA3-384."""
hash_func = hashlib.new('sha3_384')
with open(filepath, 'rb') as f:
while chunk := f.read(8192):
@ -64,10 +65,18 @@ class PhotoService:
return dest_path
def check_photo_by_hash(self, photohash:str, traveldiaryid:int):
photo = (self.session.query(Photo).filter(Photo.photo_hash == photohash,Photo.fk_travel_diary_id == traveldiaryid)
.first())
return photo
def create(self, filepath: Path, name: str, travel_diary_id: int, caption=None, addition_date=None) -> Photo | None:
travel_diary = self.session.query(TravelDiary).filter(TravelDiary.id == travel_diary_id).first()
if not travel_diary:
return None
photo_hash = self.hash_file(filepath)
if self.check_photo_by_hash(photo_hash, travel_diary_id):
return None
# Copy photo to diary's images directory
copied_path = self._copy_photo_to_diary(filepath, travel_diary)
@ -79,8 +88,6 @@ class PhotoService:
except ValueError:
addition_date = None
# Calculate hash from the copied file
photo_hash = self._hash_file(copied_path)
new_photo = Photo(
filepath=str(copied_path), # Store the path to the copied file
@ -118,7 +125,7 @@ class PhotoService:
old_path.unlink()
original.filepath = str(new_path)
# Update hash based on the new copied file
original.photo_hash = self._hash_file(new_path)
original.photo_hash = self.hash_file(new_path)
original.name = photo_dst.name
original.addition_date = photo_dst.addition_date

View File

@ -63,10 +63,16 @@ class AddPhotoModal(Screen):
async def _async_create_photo(self, photo_data: dict):
"""Creates a new photo asynchronously using PhotoService"""
try:
service_manager = self.app.service_manager
photo_service = service_manager.get_photo_service()
if photo_service.check_photo_by_hash(photo_service.hash_file(photo_data["filepath"]),self.diary_id):
self.notify("Photo already exists in database", severity="error")
return
new_photo = photo_service.create(
filepath=Path(photo_data["filepath"]),
name=photo_data["name"],