diff --git a/src/main/java/org/wikitolearn/dao/GenericDAO.java b/src/main/java/org/wikitolearn/dao/GenericDAO.java index a5d72a1..f24f054 100644 --- a/src/main/java/org/wikitolearn/dao/GenericDAO.java +++ b/src/main/java/org/wikitolearn/dao/GenericDAO.java @@ -1,28 +1,33 @@ /** * */ package org.wikitolearn.dao; +import java.util.List; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; import org.wikitolearn.utils.DbConnection; /** * @author aletundo * */ public abstract class GenericDAO { protected final Logger LOG = LoggerFactory.getLogger(getClass()); @Autowired protected DbConnection connection; + @Value("#{'${mediawiki.langs}'.split(',')}") + protected List langs; /** * This method is used to create the class on the DB. * It creates an unique index on the id to avoid duplicated. * @return void */ public abstract void createDatabaseClass(); } \ No newline at end of file diff --git a/src/main/java/org/wikitolearn/dao/MetadataDAO.java b/src/main/java/org/wikitolearn/dao/MetadataDAO.java index 2ee5f46..0f1626b 100644 --- a/src/main/java/org/wikitolearn/dao/MetadataDAO.java +++ b/src/main/java/org/wikitolearn/dao/MetadataDAO.java @@ -1,101 +1,97 @@ package org.wikitolearn.dao; -import com.orientechnologies.orient.core.id.ORecordId; -import com.orientechnologies.orient.core.metadata.schema.OClass; import com.orientechnologies.orient.core.metadata.schema.OType; -import com.orientechnologies.orient.core.sql.OCommandSQL; -import com.sun.org.apache.xpath.internal.operations.Bool; import com.tinkerpop.blueprints.Direction; import com.tinkerpop.blueprints.Edge; import com.tinkerpop.blueprints.Vertex; import com.tinkerpop.blueprints.impls.orient.OrientGraph; import com.tinkerpop.blueprints.impls.orient.OrientGraphNoTx; import com.tinkerpop.blueprints.impls.orient.OrientVertex; import com.tinkerpop.blueprints.impls.orient.OrientVertexType; import org.springframework.stereotype.Repository; import org.wikitolearn.models.Process; import java.util.Date; import java.util.HashMap; import java.util.Iterator; import java.util.Map; /** * This class represents Metadata nodes in the DB. There is a unique Metadata node, * as a entrypoint, and a chain of Process nodes, saving some useful information at every * process in the rating engine. For example we can save the number of fetched pages or * saved user votes. - * Created by valsdav on 21/03/17. + * @author valsdav, aletundo */ @Repository public class MetadataDAO extends GenericDAO { /** * This method creates the classes Metadata and Process in the DB. * The Metadata node is the entrypoint for the chain of Processes, via the * edges LastProcess and PreviousProcess */ @Override public void createDatabaseClass() { LOG.info("Creating DB classes for MetadataDAO..."); OrientGraphNoTx graph = connection.getGraphNT(); try{ graph.createVertexType("Metadata",1); OrientVertexType processVertex = graph.createVertexType("Process",1); processVertex.createProperty("timestamp", OType.DATETIME).setMandatory(true); graph.createEdgeType("LastProcess"); graph.createEdgeType("PreviousProcess"); graph.createEdgeType("SubProcess"); - //We want also to create the singleton node for Metadata. + // Create Metadata vertex OrientVertex metadata_main = graph.addVertex("class:Metadata"); metadata_main.setProperty("creation_date", new Date()); } catch( Exception e ) { LOG.error("Something went wrong during class creation. {}.", e.getMessage()); } finally { graph.shutdown(); } } /** * This method insert a new Process on the top of the chain in the db. * It creates the new link between the Metadata node and the Process. * @param process Process to be inserted * @return */ public void addProcess(Process process){ LOG.info("Inserting process..."); OrientGraph graph = connection.getGraph(); try { - //getting last Process + // Getting latest Process Vertex metadataNode = graph.getVerticesOfClass("Metadata").iterator().next(); Iterator it = metadataNode.getEdges(Direction.OUT, "LastProcess").iterator(); Edge lastProcessEdge = null; Vertex lastProcess = null; if (it.hasNext()) { lastProcessEdge = it.next(); lastProcess = lastProcessEdge.getVertex(Direction.OUT); graph.removeEdge(lastProcessEdge); } - //adding new Process + // Adding a new Process vertex Map props = new HashMap<>(); props.put("timestamp", process.getTimestamp()); props.put("processType", process.getProcessType()); props.put("processResult", process.getProcessResult()); Vertex newProcess = graph.addVertex("class:Process", props); - //linking the node + // Linking the node graph.addEdge("class:LastProcess", metadataNode, newProcess, "LastProcess"); if (lastProcess != null){ graph.addEdge("class:PreviousProcess", newProcess, lastProcess, "PreviousProcess"); } graph.commit(); } catch (Exception e){ LOG.error("Something went wrong during the insertion of the process. {}.", e.getMessage()); graph.rollback(); } finally { graph.shutdown(); } } } diff --git a/src/main/java/org/wikitolearn/dao/PageDAO.java b/src/main/java/org/wikitolearn/dao/PageDAO.java index 9ca5bbd..a2ed4a3 100644 --- a/src/main/java/org/wikitolearn/dao/PageDAO.java +++ b/src/main/java/org/wikitolearn/dao/PageDAO.java @@ -1,107 +1,107 @@ /** * */ package org.wikitolearn.dao; import com.orientechnologies.orient.core.metadata.schema.OClass; import com.orientechnologies.orient.core.metadata.schema.OType; import com.orientechnologies.orient.core.sql.OCommandSQL; import com.orientechnologies.orient.core.storage.ORecordDuplicatedException; import com.tinkerpop.blueprints.impls.orient.OrientGraph; import java.util.HashMap; import java.util.List; import java.util.Map; import com.tinkerpop.blueprints.impls.orient.OrientGraphNoTx; import com.tinkerpop.blueprints.impls.orient.OrientVertex; import com.tinkerpop.blueprints.impls.orient.OrientVertexType; import org.springframework.stereotype.Repository; import org.wikitolearn.models.Page; /** * * @author aletundo, valsdav * */ @Repository public class PageDAO extends GenericDAO{ /** * This method is used to create the class on the DB. * It creates an unique index on pageid to avoid duplicated. * @return void */ @Override public void createDatabaseClass() { - LOG.info("Creating DB classes for PageDAO..."); + LOG.info("Creating database Page class..."); OrientGraphNoTx graph = connection.getGraphNT(); try{ OrientVertexType vertex = graph.createVertexType("Page",1); vertex.createProperty("pageid", OType.INTEGER).setMandatory(true); vertex.createProperty("lang", OType.STRING).setMandatory(true); vertex.createIndex("page_lang", OClass.INDEX_TYPE.UNIQUE, "pageid", "lang"); - //now we want to add clusters - graph.command(new OCommandSQL("ALTER CLASS Page ADDCLUSTER Pages_it")).execute(); - graph.command(new OCommandSQL("ALTER CLASS Page ADDCLUSTER Pages_en")).execute(); - //adding the clusters to the class Page - //graph.getRawGraph().getMetadata().getSchema().reload(); + // Add a cluster for each language + for(String lang : langs){ + graph.command(new OCommandSQL("ALTER CLASS Page ADDCLUSTER Pages_" + lang)).execute(); + } + //graph.getRawGraph().getMetadata().getSchema().reload(); } catch( Exception e ) { LOG.error("Something went wrong during class creation. {}.", e.getMessage()); } finally { graph.shutdown(); } } /** * Insert all the given pages in the database as vertexes. * If there are duplicates all the insertion is rolled back. * @param pages List The pages to be inserted * @param lang String * @return boolean True if insertion was committed, false otherwise */ public Boolean insertPages(List pages, String lang){ OrientGraphNoTx graph = connection.getGraphNT(); LOG.info("Starting to insert pages..."); try{ for(Page p : pages){ Map props = new HashMap<>(); props.put("pageid", p.getPageid()); props.put( "title", p.getTitle()); props.put("lang", lang); props.put("pageRank", p.getPageRank()); OrientVertex pageNode = graph.addVertex("class:Page,cluster:Pages_"+lang, props); LOG.info("Page inserted " + pageNode.toString()); } LOG.info("Pages insertion committed"); return true; } catch (ORecordDuplicatedException or) { LOG.error("Page not inserted because it's duplicated. {}", or.getMessage()); } catch( Exception e ) { LOG.error("Something went wrong during page insertion. {}", e.getMessage()); }finally { graph.shutdown(); } return false; } /** * This methods returns an Iterable over all the pages belonging to a certain cluster, * so coming from the same language domain. * @param graph OrientGraph An OrientGraph instance * @param lang String The language of the cluster * @return result Iterable with all the pages of the cluster */ public Iterable getPagesIteratorFromCluster(OrientGraph graph, String lang){ Iterable result = null; try { result = (Iterable) graph.command(new OCommandSQL( - "SELECT FROM cluster:Pages_"+ lang)).execute(); + "SELECT * FROM cluster:Pages_"+ lang)).execute(); } catch (Exception e){ LOG.error("Something went wrong during quering for pages. {}", e.getMessage()); } return result; } } diff --git a/src/main/java/org/wikitolearn/dao/RevisionDAO.java b/src/main/java/org/wikitolearn/dao/RevisionDAO.java index b82fd55..cc52f5c 100644 --- a/src/main/java/org/wikitolearn/dao/RevisionDAO.java +++ b/src/main/java/org/wikitolearn/dao/RevisionDAO.java @@ -1,156 +1,161 @@ package org.wikitolearn.dao; import com.orientechnologies.orient.core.metadata.schema.OClass; import com.orientechnologies.orient.core.metadata.schema.OType; import com.orientechnologies.orient.core.sql.OCommandSQL; import com.orientechnologies.orient.core.storage.ORecordDuplicatedException; import com.tinkerpop.blueprints.Vertex; import com.tinkerpop.blueprints.impls.orient.OrientGraph; import com.tinkerpop.blueprints.impls.orient.OrientGraphNoTx; import com.tinkerpop.blueprints.impls.orient.OrientVertex; import com.tinkerpop.blueprints.impls.orient.OrientVertexType; import org.springframework.stereotype.Repository; import org.wikitolearn.models.Revision; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; /** * * @author aletundo, valsdav * */ @Repository -public class RevisionDAO extends GenericDAO{ +public class RevisionDAO extends GenericDAO { - /** - * This method is used to create the classes on the DB. - * Moreover it creates a unique index on the userid property to avoid duplication. - * @return void - */ - @Override - public void createDatabaseClass() { - LOG.info("Creating DB classes for RevisionDAO..."); - OrientGraphNoTx graph = connection.getGraphNT(); - try{ - // Vertex type for the revision - OrientVertexType vertex = graph.createVertexType("Revision", 1); - vertex.createProperty("revid", OType.INTEGER).setMandatory(true); - vertex.createProperty("lang", OType.STRING).setMandatory(true); - vertex.createIndex("revid", OClass.INDEX_TYPE.UNIQUE, "revid", "lang"); - // Creating clusters for Revision class - graph.command(new OCommandSQL("ALTER CLASS Revision ADDCLUSTER Revs_it")).execute(); - graph.command(new OCommandSQL("ALTER CLASS Revision ADDCLUSTER Revs_en")).execute(); - // Edge type for the created edge from User to Revision - graph.createEdgeType("Author"); - // Edge type to connect revision to parent revision - graph.createEdgeType("ParentRevision"); - // Edge type to connect last revision to page vertex - graph.createEdgeType("LastRevision"); - // Edge type to connect the first revision of a page - graph.createEdgeType("FirstRevision"); - } catch( Exception e ) { - LOG.error("Something went wrong during class creation. {}.", e.getMessage()); - } finally { - graph.shutdown(); - } - } + /** + * This method is used to create the Revision class on the database. Moreover it creates + * a unique index on the revid property to avoid duplication. + */ + @Override + public void createDatabaseClass() { + LOG.info("Creating database Revision class..."); + OrientGraphNoTx graph = connection.getGraphNT(); + try { + // Vertex type for the revision + OrientVertexType vertex = graph.createVertexType("Revision", 1); + vertex.createProperty("revid", OType.INTEGER).setMandatory(true); + vertex.createProperty("lang", OType.STRING).setMandatory(true); + vertex.createIndex("revid", OClass.INDEX_TYPE.UNIQUE, "revid", "lang"); + // Add a cluster for each language + for (String lang : langs) { + graph.command(new OCommandSQL("ALTER CLASS Revision ADDCLUSTER Revisions_" + lang)).execute(); + } + // Edge type for the created edge from User to Revision + graph.createEdgeType("Author"); + // Edge type to connect revision to parent revision + graph.createEdgeType("ParentRevision"); + // Edge type to connect last revision to page vertex + graph.createEdgeType("LastRevision"); + // Edge type to connect the first revision of a page + graph.createEdgeType("FirstRevision"); + } catch (Exception e) { + LOG.error("Something went wrong during class creation. {}.", e.getMessage()); + } finally { + graph.shutdown(); + } + } + /** + * This method will insert the revisions of one page, creating the link + * ParentRevision between them and the link FirstRevision and LastRevision + * with the Page vertex. Moreover it connects the Users to the revisions + * they have created. This method must be used only for the first INIT + * import, NOT for incremental insertion. + * + * @param pageId + * @param revs + * @return + */ + public Boolean insertRevisions(int pageId, List revs, String lang) { + OrientGraphNoTx graph = connection.getGraphNT(); + LOG.info("Starting to insert revisions..."); + HashMap revsNodes = new HashMap(); + Vertex firstRev = null; + Vertex lastRev = null; + try { + for (Revision rev : revs) { + Map props = new HashMap<>(); + props.put("revid", rev.getRevid()); + props.put("lang", lang); + props.put("length", rev.getLength()); + props.put("changeCoefficient", rev.getChangeCoefficient()); + props.put("currentMeanVote", rev.getCurrentMeanVote()); + props.put("currentVotesReliability", rev.getCurrentVotesReliability()); + props.put("currentNormalizedVotesReliability", rev.getCurrentNormalisesVotesReliability()); + props.put("totalMeanVote", rev.getTotalMeanVote()); + props.put("totalVotesReliability", rev.getTotalVotesReliability()); + props.put("totalNormalizedVotesReliability", rev.getTotalNormalisesVotesReliability()); + props.put("validated", rev.isValidated()); - /** - * This method will insert the revisions of one page, creating the link ParentRevision between them and - * the link FirstRevision and LastRevision with the Page vertex. Moreover it connects the Users to - * the revisions they have created. - * This method must be used only for the first INIT import, NOT for incremental insertion. - * @param pageId - * @param revs - * @return - */ - public Boolean insertRevisions(int pageId, List revs, String lang){ - OrientGraphNoTx graph = connection.getGraphNT(); - LOG.info("Starting to insert revisions..."); - HashMap revsNodes = new HashMap(); - Vertex firstRev = null; - Vertex lastRev = null; - try{ - for(Revision rev : revs){ - Map props = new HashMap<>(); - props.put("revid", rev.getRevid()); - props.put("lang", lang); - props.put("length", rev.getLength()); - props.put("changeCoefficient", rev.getChangeCoefficient()); - props.put( "currentMeanVote", rev.getCurrentMeanVote()); - props.put( "currentVotesReliability", rev.getCurrentVotesReliability()); - props.put( "currentNormalizedVotesReliability", rev.getCurrentNormalisesVotesReliability()); - props.put( "totalMeanVote", rev.getTotalMeanVote()); - props.put( "totalVotesReliability", rev.getTotalVotesReliability()); - props.put( "totalNormalizedVotesReliability", rev.getTotalNormalisesVotesReliability()); - props.put("validated", rev.isValidated()); + Vertex revNode = graph.addVertex("class:Revision,cluster:Revisions_" + lang, props); + // LOG.info("Revision inserted {}.", revNode.toString()); + revsNodes.put(Integer.toString(rev.getRevid()), revNode); - Vertex revNode = graph.addVertex("class:Revision,cluster:Revs_"+lang, props); - //LOG.info("Revision inserted {}.", revNode.toString()); - revsNodes.put(Integer.toString(rev.getRevid()), revNode); + if (rev.getParentid() == 0) { + firstRev = revNode; + } + if (lastRev == null || rev.getRevid() > (int) lastRev.getProperty("revid")) { + lastRev = revNode; + } - if (rev.getParentid() == 0){ - firstRev = revNode; - } - if (lastRev==null || rev.getRevid() > (int) lastRev.getProperty("revid")){ - lastRev = revNode; - } + // Connecting the creator of the revisions + Vertex userCreator = null; + try { + userCreator = graph.getVertices("User.userid", rev.getUserid()).iterator().next(); + } catch (NoSuchElementException e) { + // if the user is not found we link it to the Anonymous + // user. + userCreator = graph.getVertices("User.userid", "0").iterator().next(); + } + graph.addEdge("class:Author", userCreator, revNode, "Author"); + } - // Connecting the creator of the revisions - Vertex userCreator = null; - try{ - userCreator = graph.getVertices("User.userid", rev.getUserid()).iterator().next(); - } catch (NoSuchElementException e){ - //if the user is not found we link it to the Anonymous user. - userCreator = graph.getVertices("User.userid", "0" ).iterator().next(); - } - graph.addEdge("class:Author", userCreator, revNode, "Author"); - } + // Now we have to create the the links between revisions + for (Revision r : revs) { + if (r.getParentid() != 0) { + graph.addEdge("class:ParentRevision", revsNodes.get(Integer.toString(r.getRevid())), + revsNodes.get(Integer.toString(r.getParentid())), "ParentRevision"); + } + } - // Now we have to create the the links between revisions - for (Revision r : revs){ - if (r.getParentid() != 0){ - graph.addEdge("class:ParentRevision", revsNodes.get(Integer.toString(r.getRevid())), - revsNodes.get(Integer.toString(r.getParentid())), "ParentRevision"); - } - } + // Now let's create the LastRevision and FirstRevision edges + Vertex page = graph.getVertices("Page.pageid", pageId).iterator().next(); + graph.addEdge("class:LastRevision", page, lastRev, "LastRevision"); + graph.addEdge("class:FirstRevision", page, firstRev, "FirstRevision"); - // Now let's create the LastRevision and FirstRevision edges - Vertex page = graph.getVertices("Page.pageid", pageId).iterator().next(); - graph.addEdge("class:LastRevision", page, lastRev, "LastRevision"); - graph.addEdge("class:FirstRevision", page, firstRev, "FirstRevision"); + LOG.info("Revisions of page {} insertion committed", pageId); + return true; + } catch (ORecordDuplicatedException or) { + LOG.error("Some of the pages are duplicates. {}", or.getMessage()); + } catch (Exception e) { + LOG.error("Something went wrong during user insertion. {}", e.getMessage()); + } finally { + graph.shutdown(); + } + return false; + } - LOG.info("Revisions of page {} insertion committed", pageId); - return true; - } catch (ORecordDuplicatedException or) { - LOG.error("Some of the pages are duplicates. {}", or.getMessage()); - } catch( Exception e ) { - LOG.error("Something went wrong during user insertion. {}", e.getMessage()); - } finally { - graph.shutdown(); - } - return false; - } - - /** - * This methods returns an Iterable over all the Revisions belonging to a certain cluster, - * so coming from the same language domain. - * @param lang String The language of the cluster - * @return result Iterable with all the revisions of the cluster - */ - public Iterable getRevisionsIteratorFromCluster(OrientGraph graph, String lang){ - Iterable result = null; - try { - result = (Iterable) graph.command(new OCommandSQL( - "SELECT FROM cluster:Revs_"+ lang)).execute(); - } catch (Exception e){ - LOG.error("Something went wrong during quering for revisions. {}", e.getMessage()); - } - return result; - } + /** + * This methods returns an Iterable over all the Revisions belonging to a + * certain cluster, so coming from the same language domain. + * + * @param lang + * String The language of the cluster + * @return result Iterable with all the revisions of the + * cluster + */ + public Iterable getRevisionsIteratorFromCluster(OrientGraph graph, String lang) { + Iterable result = null; + try { + result = (Iterable) graph.command(new OCommandSQL("SELECT * FROM cluster:Revisions_" + lang)) + .execute(); + } catch (Exception e) { + LOG.error("Something went wrong during quering for revisions. {}", e.getMessage()); + } + return result; + } } diff --git a/src/main/java/org/wikitolearn/dao/UserDAO.java b/src/main/java/org/wikitolearn/dao/UserDAO.java index d099093..5b58bba 100644 --- a/src/main/java/org/wikitolearn/dao/UserDAO.java +++ b/src/main/java/org/wikitolearn/dao/UserDAO.java @@ -1,87 +1,87 @@ /** * */ package org.wikitolearn.dao; import com.orientechnologies.orient.core.metadata.schema.OClass; import com.orientechnologies.orient.core.metadata.schema.OType; import com.orientechnologies.orient.core.storage.ORecordDuplicatedException; import com.tinkerpop.blueprints.Vertex; import com.tinkerpop.blueprints.impls.orient.OrientGraph; import com.tinkerpop.blueprints.impls.orient.OrientGraphNoTx; import com.tinkerpop.blueprints.impls.orient.OrientVertexType; import org.springframework.stereotype.Repository; import org.wikitolearn.models.User; import java.util.HashMap; import java.util.List; import java.util.Map; /** * * @author aletundo, valsdav * */ @Repository public class UserDAO extends GenericDAO{ /** - * This method is used to create the class on the DB. + * This method is used to create the User class on the database. * Moreover it creates a unique index on the userid property to avoid duplication. * @return void */ @Override public void createDatabaseClass() { - LOG.info("Creating DB classes for UserDAO..."); + LOG.info("Creating database User class..."); OrientGraphNoTx graph = connection.getGraphNT(); try{ OrientVertexType vertex = graph.createVertexType("User"); vertex.createProperty("userid", OType.INTEGER).setMandatory(true); vertex.createIndex("userid", OClass.INDEX_TYPE.UNIQUE, "userid"); } catch( Exception e ) { LOG.error("Something went wrong during class creation. {}", e.getMessage()); } finally { graph.shutdown(); } } /** * Insert all the given users in the database as vertexes. * If there are duplicates all the insertion is rolled back. * @param users List The pages to be inserted * @return boolean True if insertion was committed, false otherwise */ public Boolean insertUsers(List users){ OrientGraph graph = connection.getGraph(); LOG.info("Starting to insert users..."); try{ for(User p : users) { try { Map props = new HashMap<>(); props.put("userid", p.getUserid()); props.put("username", p.getUsername()); props.put("votesReliability", p.getVotesReliability()); props.put("contributesReliability", p.getContributesReliability()); props.put("totalReliability", p.getTotalReliability()); Vertex userNode = graph.addVertex("class:User", props); graph.commit(); LOG.info("User inserted {}", userNode.toString()); } catch (ORecordDuplicatedException or) { LOG.error("The user is already in the DB. {}. Operation will be rollbacked.", or.getMessage()); graph.rollback(); } } LOG.info("Users insertion ended"); graph.shutdown(); return true; } catch( Exception e ) { LOG.error("Something went wrong during user insertion. {}. Operation will be rollbacked.", e.getMessage()); graph.rollback(); graph.shutdown(); } return false; } }